npm - heyi - Versions diffs - 2.0.0 → 3.0.0 - Mend

heyi 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -24,6 +24,7 @@ heyi preset [file] [options]
 - `-m, --model <model>` - AI model to use (default: `openai/gpt-4o-mini`)
 - `-f, --format <format>` - Output format: `string`, `number`, `object`, `array` (default: `string`)
 - `-s, --schema <schema>` - Zod schema for object/array format (required when format is `object` or `array`)
+- `-c, --crawler <crawler>` - Crawler to use for fetching URLs: `fetch`, `chrome` (default: `fetch`)
 - `--file <path>` - Read content from file and include as context (can be used multiple times)
 - `--url <url>` - Fetch content from URL and include as context (can be used multiple times)
 - `--var <key=value>` - Define variables for replacement in prompt using `{{key}}` syntax (can be used multiple times)
@@ -32,8 +33,9 @@ heyi preset [file] [options]
 #### Environment Variables
-- `API_KEY` - OpenRouter API key (required, can be set via environment or `.env` file)
-- `MODEL` - Default AI model to use (optional, can be overridden with `--model` flag)
+- `HEYI_API_KEY` - OpenRouter API key (required, can be set via environment or `.env` file)
+- `HEYI_MODEL` - Default AI model to use (optional, can be overridden with `--model` flag)
+- `HEYI_CRAWLER` - Default crawler to use for fetching URLs (optional, can be overridden with `--crawler` flag)
 ### Examples
@@ -64,10 +66,10 @@ heyi prompt "Preset in {{input}} and output in {{output}}" --var input="German"
 echo "Translate to {{language}}" | heyi prompt --var language="Spanish"
 # Set default model via environment variable
-MODEL=perplexity/sonar heyi prompt "Explain AI"
+HEYI_MODEL=perplexity/sonar heyi prompt "Explain AI"
 # Set API key via environment variable
-API_KEY=your-key heyi prompt "Hello, AI!"
+HEYI_API_KEY=your-key heyi prompt "Hello, AI!"
 # Input from file as context
 heyi prompt "Summarize this content" --file input.txt
@@ -82,6 +84,10 @@ heyi prompt "Summarize this article" --url https://example.com/article.html
 # Input from multiple URLs as context
 heyi prompt "Compare these articles" --url https://example.com/article1.html --url https://example.com/article2.html
+# Use Chrome crawler for JavaScript-heavy pages
+heyi prompt "Summarize this SPA" --url https://example.com/spa --crawler chrome
+HEYI_CRAWLER=chrome heyi prompt "Get content from dynamic page" --url https://example.com/dynamic
 # Mix files and URLs as context
 heyi prompt "Compare local and remote content" --file local.txt --url https://example.com/remote.txt
@@ -106,6 +112,7 @@ Preset files allow you to define reusable configurations with prompts, models, f
   "model": "openai/gpt-4o-mini",
   "format": "array",
   "schema": "z.string()",
+  "crawler": "fetch",
   "files": ["path/to/file1.txt", "path/to/file2.txt"],
   "urls": ["https://example.com/page.html"]
 }
@@ -117,6 +124,7 @@ Preset files allow you to define reusable configurations with prompts, models, f
 - **model** (optional): AI model to use (e.g., `openai/gpt-4o-mini`, `google/gemini-2.0-flash-exp`).
 - **format** (optional): Output format: `string`, `number`, `object`, `array` (default: `string`).
 - **schema** (optional): Zod schema for object/array format (required when format is `object` or `array`).
+- **crawler** (optional): Crawler to use for fetching URLs: `fetch`, `chrome` (default: `fetch`).
 - **files** (optional): Array of file paths to include as context.
 - **urls** (optional): Array of URLs to fetch and include as context.
@@ -169,6 +177,7 @@ heyi preset languages.json
 - **Model override**: Using `--model` flag overrides the model specified in the preset file.
 - **Format override**: Using `--format` flag overrides the format specified in the preset file.
 - **Schema override**: Using `--schema` flag overrides the schema specified in the preset file.
+- **Crawler override**: Using `--crawler` flag overrides the crawler specified in the preset file.
 - **Files and URLs append**: Using `--file` or `--url` flags adds additional context to the preset's files and URLs.
 - **Variables**: Use `--var` to replace variables in the preset's prompt.
@@ -179,6 +188,9 @@ heyi preset file.json --model openai/gpt-4o
 # Override format from preset
 heyi preset file.json --format object --schema "z.object({name:z.string()})"
+# Override crawler from preset
+heyi preset file.json --crawler chrome
 # Add additional files to preset's files
 heyi preset file.json --file extra.txt
@@ -202,6 +214,35 @@ The tool uses Zod schemas to ensure the AI model returns data in the requested f
 - Object array: `--format array --schema "z.object({name:z.string(),age:z.number()})"`
 - Single object: `--format object --schema "z.object({total:z.number(),items:z.array(z.string())})"`
+## Crawlers
+The tool supports two crawlers for fetching content from URLs:
+- **fetch** (default): Uses the native `fetch` API to retrieve HTML content. Fast and lightweight, but may not work well with JavaScript-heavy or dynamically rendered pages.
+- **chrome**: Uses Puppeteer to launch a headless Chrome browser and retrieve content after the page has fully loaded. Ideal for single-page applications (SPAs) and JavaScript-heavy websites, but slower and requires more resources.
+### When to Use Chrome Crawler
+Use the `chrome` crawler when:
+- The target website relies heavily on JavaScript for rendering content
+- Content is loaded dynamically after the initial page load
+- You need to interact with a single-page application (SPA)
+- The `fetch` crawler returns incomplete or missing content
+### Crawler Examples
+```sh
+# Use default fetch crawler
+heyi prompt "Summarize this page" --url https://example.com
+# Use Chrome crawler for JS-heavy page
+heyi prompt "Extract data from SPA" --url https://app.example.com --crawler chrome
+# Set Chrome as default crawler via environment
+HEYI_CRAWLER=chrome heyi prompt "Get content" --url https://dynamic-site.com
+```
 ## Development
 ```sh

package/bin/index.js CHANGED Viewed

@@ -11,13 +11,19 @@ import { buildPrompt } from '../src/utils/prompt.js'
 import { replaceVariables } from '../src/utils/variables.js'
 const DEFAULT_MODEL = 'openai/gpt-4o-mini'
+const DEFAULT_CRAWLER = 'fetch'
-const modelFlag = ['-m, --model <model>', 'AI model to use', process.env.MODEL ?? DEFAULT_MODEL]
+const modelFlag = ['-m, --model <model>', 'AI model to use', process.env.HEYI_MODEL ?? DEFAULT_MODEL]
 const formatFlag = ['-f, --format <format>', 'Output format: string, number, object, array', 'string']
 const schemaFlag = [
   '-s, --schema <schema>',
   'Zod schema for object/array format (required when format is object or array)',
 ]
+const crawlerFlag = [
+  '-c, --crawler <crawler>',
+  'Crawler to use for fetching URLs: fetch, chrome',
+  process.env.HEYI_CRAWLER ?? DEFAULT_CRAWLER,
+]
 const fileFlag = [
   '--file <path>',
   'Read content from file and include as context (can be used multiple times)',
@@ -50,6 +56,7 @@ const varFlag = [
 const hasModelFlag = hasFlag(['--model', '-m'])
 const hasFormatFlag = hasFlag(['--format', '-f'])
 const hasSchemaFlag = hasFlag(['--schema', '-s'])
+const hasCrawlerFlag = hasFlag(['--crawler', '-c'])
 const program = new Command()
@@ -80,8 +87,8 @@ Examples:
   $ heyi prompt "Preset in {{language}}" --var language="German"
   # Environment variables
-  $ MODEL=perplexity/sonar heyi prompt "Explain AI"
-  $ API_KEY=your-key heyi prompt "Hello, AI!"
+  $ HEYI_MODEL=perplexity/sonar heyi prompt "Explain AI"
+  $ HEYI_API_KEY=your-key heyi prompt "Hello, AI!"
   # Attach context
   $ heyi prompt "Summarize this content" --file input.txt
@@ -114,6 +121,7 @@ const optionsSchema = z
     model: z.string(),
     format: z.enum(['string', 'number', 'object', 'array']),
     schema: z.string().optional(),
+    crawler: z.enum(['fetch', 'chrome']),
     files: z.array(z.string()).default([]),
     urls: z.array(z.string()).default([]),
     vars: z.record(z.string(), z.string()).default({}),
@@ -128,6 +136,7 @@ const flagsToOptions = (flags) => {
     model: flags.model,
     format: flags.format,
     schema: flags.schema,
+    crawler: flags.crawler,
     files: flags.file,
     urls: flags.url,
     vars: flags.var,
@@ -136,10 +145,11 @@ const flagsToOptions = (flags) => {
 const mergeOptionsWithPreset = (options, presetContent) => {
   return optionsSchema.parse({
-    // Overwrite model, format, schema only if not provided via flags
+    // Overwrite model, format, schema, crawler only if not provided via flags
     model: hasModelFlag ? options.model : (presetContent.model ?? options.model),
     format: hasFormatFlag ? options.format : (presetContent.format ?? options.format),
     schema: hasSchemaFlag ? options.schema : (presetContent.schema ?? options.schema),
+    crawler: hasCrawlerFlag ? options.crawler : (presetContent.crawler ?? options.crawler),
     // Merge files
     files: [...presetContent.files, ...options.files],
     // Merge URLs
@@ -167,7 +177,7 @@ const executePromptAction = async (prompt, flags) => {
     // Build the prompt and prefer the argument over stdin
     const userPrompt = replaceVariables(prompt ?? stdinContent, options.vars)
-    const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls)
+    const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls, options.crawler)
     const result = await executePrompt(finalPrompt, {
       model: options.model,
@@ -199,7 +209,7 @@ const executePresetAction = async (preset, flags) => {
     // Build the prompt
     const userPrompt = replaceVariables(prompt, options.vars)
-    const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls)
+    const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls, options.crawler)
     const result = await executePrompt(finalPrompt, {
       model: options.model,
@@ -223,6 +233,7 @@ program
   .option(...modelFlag)
   .option(...formatFlag)
   .option(...schemaFlag)
+  .option(...crawlerFlag)
   .option(...fileFlag)
   .option(...urlFlag)
   .option(...varFlag)
@@ -235,6 +246,7 @@ program
   .option(...modelFlag)
   .option(...formatFlag)
   .option(...schemaFlag)
+  .option(...crawlerFlag)
   .option(...fileFlag)
   .option(...urlFlag)
   .option(...varFlag)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "heyi",
-  "version": "2.0.0",
+  "version": "3.0.0",
   "description": "CLI tool to execute AI prompts with flexible output formatting",
   "keywords": [
     "ai",
@@ -34,6 +34,7 @@
     "ai": "^5.0.121",
     "commander": "^14.0.2",
     "dotenv": "^16.6.1",
+    "puppeteer": "^24.35.0",
     "sanitize-html": "^2.17.0",
     "zod": "^4.3.5"
   },

package/src/index.js CHANGED Viewed

@@ -19,9 +19,9 @@ config()
 export const executePrompt = async (prompt, options = {}) => {
   const { model, format = 'string', schema } = options
-  const apiKey = process.env.API_KEY
+  const apiKey = process.env.HEYI_API_KEY
   if (!apiKey) {
-    throw new Error('API_KEY environment variable is required. Set it via environment or .env file.')
+    throw new Error('HEYI_API_KEY environment variable is required. Set it via environment or .env file.')
   }
   const openrouter = createOpenRouter({

package/src/utils/input.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { readFile } from 'node:fs/promises'
 import { createInterface } from 'node:readline'
+import { launch } from 'puppeteer'
 import sanitizeHtml from 'sanitize-html'
 /**
@@ -57,18 +58,84 @@ export const hasStdinData = () => {
 }
 /**
- * Fetch content from a URL.
+ * Validate that a URL uses http or https protocol.
+ *
+ * @param {string} url - URL to validate
+ * @throws {Error} If URL is invalid or uses a dangerous protocol
+ */
+const validateUrl = (url) => {
+  try {
+    const parsedUrl = new URL(url)
+    if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
+      throw new Error(`Invalid protocol '${parsedUrl.protocol}'. Only http and https are supported.`)
+    }
+  } catch (error) {
+    if (error instanceof TypeError) {
+      throw new Error(`Invalid URL format: ${url}`)
+    }
+    throw error
+  }
+}
+/**
+ * Fetch content from a URL using fetch API.
+ *
+ * @param {string} url - URL to fetch content from
+ * @returns {Promise<string>} The URL content
+ */
+const fetchUrlContentWithFetch = async (url) => {
+  validateUrl(url)
+  const response = await fetch(url)
+  if (!response.ok) {
+    throw new Error(`HTTP ${response.status}: ${response.statusText}`)
+  }
+  const html = await response.text()
+  // Sanitize HTML to extract only text content and avoid large data
+  const cleanText = sanitizeHtml(html, {
+    allowedTags: [],
+    allowedAttributes: {},
+    allowedSchemes: [],
+    allowedSchemesAppliedToAttributes: [],
+  })
+  return cleanText.trim()
+}
+/**
+ * Fetch content from a URL using Chrome/Puppeteer.
  *
  * @param {string} url - URL to fetch content from
  * @returns {Promise<string>} The URL content
  */
-export const fetchUrlContent = async (url) => {
+const fetchUrlContentWithChrome = async (url) => {
+  validateUrl(url)
+  const browser = await launch({
+    headless: true,
+    // These args are required for running in containerized environments (e.g., Docker, CI/CD)
+    args: ['--no-sandbox', '--disable-setuid-sandbox'],
+  })
   try {
-    const response = await fetch(url)
-    if (!response.ok) {
-      throw new Error(`HTTP ${response.status}: ${response.statusText}`)
+    const page = await browser.newPage()
+    // Wait for network to be idle, with a 10-second timeout to prevent indefinite waiting.
+    // If timeout occurs, continue with whatever content is available.
+    // Wait for navigation first in case there are redirects.
+    try {
+      await Promise.all([
+        page.waitForNavigation({ timeout: 10000 }),
+        page.goto(url, { waitUntil: 'networkidle0', timeout: 10000 }),
+      ])
+    } catch (error) {
+      // If it's a timeout error, continue with the content that's already loaded
+      // For other errors (e.g., network errors), rethrow
+      if (!error.message.includes('timeout') && !error.message.includes('Navigation timeout')) {
+        throw error
+      }
     }
-    const html = await response.text()
+    const html = await page.content()
     // Sanitize HTML to extract only text content and avoid large data
     const cleanText = sanitizeHtml(html, {
       allowedTags: [],
@@ -77,6 +144,21 @@ export const fetchUrlContent = async (url) => {
       allowedSchemesAppliedToAttributes: [],
     })
     return cleanText.trim()
+  } finally {
+    await browser.close()
+  }
+}
+/**
+ * Fetch content from a URL.
+ *
+ * @param {string} url - URL to fetch content from
+ * @param {string} crawler - Crawler to use: 'fetch' or 'chrome' (default: 'fetch')
+ * @returns {Promise<string>} The URL content
+ */
+export const fetchUrlContent = async (url, crawler = 'fetch') => {
+  try {
+    return crawler === 'chrome' ? await fetchUrlContentWithChrome(url) : await fetchUrlContentWithFetch(url)
   } catch (error) {
     throw new Error(`Failed to fetch URL '${url}'`, { cause: error })
   }

package/src/utils/preset.js CHANGED Viewed

@@ -6,6 +6,7 @@ const presetSchema = z.object({
   model: z.string().optional(),
   format: z.enum(['string', 'number', 'object', 'array']).optional(),
   schema: z.string().optional(),
+  crawler: z.enum(['fetch', 'chrome']).optional(),
   files: z.array(z.string()).default([]),
   urls: z.array(z.string()).default([]),
 })

package/src/utils/prompt.js CHANGED Viewed

@@ -6,9 +6,10 @@ import { fetchUrlContent, readFileContent } from './input.js'
  * @param {string} prompt - The prompt
  * @param {string[]} filePaths - Array of file paths to include as context
  * @param {string[]} urls - Array of URLs to include as context
+ * @param {string} crawler - Crawler to use for fetching URLs: 'fetch' or 'chrome' (default: 'fetch')
  * @returns {Promise<string>} The final prompt with all contexts combined
  */
-export const buildPrompt = async (prompt, filePaths = [], urls = []) => {
+export const buildPrompt = async (prompt, filePaths = [], urls = [], crawler = 'fetch') => {
   // Handle file content as context
   const fileContents = []
   for (const filePath of filePaths) {
@@ -19,7 +20,7 @@ export const buildPrompt = async (prompt, filePaths = [], urls = []) => {
   // Handle URL content as context
   const urlContents = []
   for (const url of urls) {
-    const content = await fetchUrlContent(url)
+    const content = await fetchUrlContent(url, crawler)
     urlContents.push({ path: url, content })
   }