heyi 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,6 +24,7 @@ heyi preset [file] [options]
24
24
  - `-m, --model <model>` - AI model to use (default: `openai/gpt-4o-mini`)
25
25
  - `-f, --format <format>` - Output format: `string`, `number`, `object`, `array` (default: `string`)
26
26
  - `-s, --schema <schema>` - Zod schema for object/array format (required when format is `object` or `array`)
27
+ - `-c, --crawler <crawler>` - Crawler to use for fetching URLs: `fetch`, `chrome` (default: `fetch`)
27
28
  - `--file <path>` - Read content from file and include as context (can be used multiple times)
28
29
  - `--url <url>` - Fetch content from URL and include as context (can be used multiple times)
29
30
  - `--var <key=value>` - Define variables for replacement in prompt using `{{key}}` syntax (can be used multiple times)
@@ -32,8 +33,9 @@ heyi preset [file] [options]
32
33
 
33
34
  #### Environment Variables
34
35
 
35
- - `API_KEY` - OpenRouter API key (required, can be set via environment or `.env` file)
36
- - `MODEL` - Default AI model to use (optional, can be overridden with `--model` flag)
36
+ - `HEYI_API_KEY` - OpenRouter API key (required, can be set via environment or `.env` file)
37
+ - `HEYI_MODEL` - Default AI model to use (optional, can be overridden with `--model` flag)
38
+ - `HEYI_CRAWLER` - Default crawler to use for fetching URLs (optional, can be overridden with `--crawler` flag)
37
39
 
38
40
  ### Examples
39
41
 
@@ -64,10 +66,10 @@ heyi prompt "Preset in {{input}} and output in {{output}}" --var input="German"
64
66
  echo "Translate to {{language}}" | heyi prompt --var language="Spanish"
65
67
 
66
68
  # Set default model via environment variable
67
- MODEL=perplexity/sonar heyi prompt "Explain AI"
69
+ HEYI_MODEL=perplexity/sonar heyi prompt "Explain AI"
68
70
 
69
71
  # Set API key via environment variable
70
- API_KEY=your-key heyi prompt "Hello, AI!"
72
+ HEYI_API_KEY=your-key heyi prompt "Hello, AI!"
71
73
 
72
74
  # Input from file as context
73
75
  heyi prompt "Summarize this content" --file input.txt
@@ -82,6 +84,10 @@ heyi prompt "Summarize this article" --url https://example.com/article.html
82
84
  # Input from multiple URLs as context
83
85
  heyi prompt "Compare these articles" --url https://example.com/article1.html --url https://example.com/article2.html
84
86
 
87
+ # Use Chrome crawler for JavaScript-heavy pages
88
+ heyi prompt "Summarize this SPA" --url https://example.com/spa --crawler chrome
89
+ HEYI_CRAWLER=chrome heyi prompt "Get content from dynamic page" --url https://example.com/dynamic
90
+
85
91
  # Mix files and URLs as context
86
92
  heyi prompt "Compare local and remote content" --file local.txt --url https://example.com/remote.txt
87
93
 
@@ -106,6 +112,7 @@ Preset files allow you to define reusable configurations with prompts, models, f
106
112
  "model": "openai/gpt-4o-mini",
107
113
  "format": "array",
108
114
  "schema": "z.string()",
115
+ "crawler": "fetch",
109
116
  "files": ["path/to/file1.txt", "path/to/file2.txt"],
110
117
  "urls": ["https://example.com/page.html"]
111
118
  }
@@ -117,6 +124,7 @@ Preset files allow you to define reusable configurations with prompts, models, f
117
124
  - **model** (optional): AI model to use (e.g., `openai/gpt-4o-mini`, `google/gemini-2.0-flash-exp`).
118
125
  - **format** (optional): Output format: `string`, `number`, `object`, `array` (default: `string`).
119
126
  - **schema** (optional): Zod schema for object/array format (required when format is `object` or `array`).
127
+ - **crawler** (optional): Crawler to use for fetching URLs: `fetch`, `chrome` (default: `fetch`).
120
128
  - **files** (optional): Array of file paths to include as context.
121
129
  - **urls** (optional): Array of URLs to fetch and include as context.
122
130
 
@@ -169,6 +177,7 @@ heyi preset languages.json
169
177
  - **Model override**: Using `--model` flag overrides the model specified in the preset file.
170
178
  - **Format override**: Using `--format` flag overrides the format specified in the preset file.
171
179
  - **Schema override**: Using `--schema` flag overrides the schema specified in the preset file.
180
+ - **Crawler override**: Using `--crawler` flag overrides the crawler specified in the preset file.
172
181
  - **Files and URLs append**: Using `--file` or `--url` flags adds additional context to the preset's files and URLs.
173
182
  - **Variables**: Use `--var` to replace variables in the preset's prompt.
174
183
 
@@ -179,6 +188,9 @@ heyi preset file.json --model openai/gpt-4o
179
188
  # Override format from preset
180
189
  heyi preset file.json --format object --schema "z.object({name:z.string()})"
181
190
 
191
+ # Override crawler from preset
192
+ heyi preset file.json --crawler chrome
193
+
182
194
  # Add additional files to preset's files
183
195
  heyi preset file.json --file extra.txt
184
196
 
@@ -202,6 +214,35 @@ The tool uses Zod schemas to ensure the AI model returns data in the requested f
202
214
  - Object array: `--format array --schema "z.object({name:z.string(),age:z.number()})"`
203
215
  - Single object: `--format object --schema "z.object({total:z.number(),items:z.array(z.string())})"`
204
216
 
217
+ ## Crawlers
218
+
219
+ The tool supports two crawlers for fetching content from URLs:
220
+
221
+ - **fetch** (default): Uses the native `fetch` API to retrieve HTML content. Fast and lightweight, but may not work well with JavaScript-heavy or dynamically rendered pages.
222
+ - **chrome**: Uses Puppeteer to launch a headless Chrome browser and retrieve content after the page has fully loaded. Ideal for single-page applications (SPAs) and JavaScript-heavy websites, but slower and requires more resources.
223
+
224
+ ### When to Use Chrome Crawler
225
+
226
+ Use the `chrome` crawler when:
227
+
228
+ - The target website relies heavily on JavaScript for rendering content
229
+ - Content is loaded dynamically after the initial page load
230
+ - You need to interact with a single-page application (SPA)
231
+ - The `fetch` crawler returns incomplete or missing content
232
+
233
+ ### Crawler Examples
234
+
235
+ ```sh
236
+ # Use default fetch crawler
237
+ heyi prompt "Summarize this page" --url https://example.com
238
+
239
+ # Use Chrome crawler for JS-heavy page
240
+ heyi prompt "Extract data from SPA" --url https://app.example.com --crawler chrome
241
+
242
+ # Set Chrome as default crawler via environment
243
+ HEYI_CRAWLER=chrome heyi prompt "Get content" --url https://dynamic-site.com
244
+ ```
245
+
205
246
  ## Development
206
247
 
207
248
  ```sh
package/bin/index.js CHANGED
@@ -11,13 +11,19 @@ import { buildPrompt } from '../src/utils/prompt.js'
11
11
  import { replaceVariables } from '../src/utils/variables.js'
12
12
 
13
13
  const DEFAULT_MODEL = 'openai/gpt-4o-mini'
14
+ const DEFAULT_CRAWLER = 'fetch'
14
15
 
15
- const modelFlag = ['-m, --model <model>', 'AI model to use', process.env.MODEL ?? DEFAULT_MODEL]
16
+ const modelFlag = ['-m, --model <model>', 'AI model to use', process.env.HEYI_MODEL ?? DEFAULT_MODEL]
16
17
  const formatFlag = ['-f, --format <format>', 'Output format: string, number, object, array', 'string']
17
18
  const schemaFlag = [
18
19
  '-s, --schema <schema>',
19
20
  'Zod schema for object/array format (required when format is object or array)',
20
21
  ]
22
+ const crawlerFlag = [
23
+ '-c, --crawler <crawler>',
24
+ 'Crawler to use for fetching URLs: fetch, chrome',
25
+ process.env.HEYI_CRAWLER ?? DEFAULT_CRAWLER,
26
+ ]
21
27
  const fileFlag = [
22
28
  '--file <path>',
23
29
  'Read content from file and include as context (can be used multiple times)',
@@ -50,6 +56,7 @@ const varFlag = [
50
56
  const hasModelFlag = hasFlag(['--model', '-m'])
51
57
  const hasFormatFlag = hasFlag(['--format', '-f'])
52
58
  const hasSchemaFlag = hasFlag(['--schema', '-s'])
59
+ const hasCrawlerFlag = hasFlag(['--crawler', '-c'])
53
60
 
54
61
  const program = new Command()
55
62
 
@@ -80,8 +87,8 @@ Examples:
80
87
  $ heyi prompt "Preset in {{language}}" --var language="German"
81
88
 
82
89
  # Environment variables
83
- $ MODEL=perplexity/sonar heyi prompt "Explain AI"
84
- $ API_KEY=your-key heyi prompt "Hello, AI!"
90
+ $ HEYI_MODEL=perplexity/sonar heyi prompt "Explain AI"
91
+ $ HEYI_API_KEY=your-key heyi prompt "Hello, AI!"
85
92
 
86
93
  # Attach context
87
94
  $ heyi prompt "Summarize this content" --file input.txt
@@ -114,6 +121,7 @@ const optionsSchema = z
114
121
  model: z.string(),
115
122
  format: z.enum(['string', 'number', 'object', 'array']),
116
123
  schema: z.string().optional(),
124
+ crawler: z.enum(['fetch', 'chrome']),
117
125
  files: z.array(z.string()).default([]),
118
126
  urls: z.array(z.string()).default([]),
119
127
  vars: z.record(z.string(), z.string()).default({}),
@@ -128,6 +136,7 @@ const flagsToOptions = (flags) => {
128
136
  model: flags.model,
129
137
  format: flags.format,
130
138
  schema: flags.schema,
139
+ crawler: flags.crawler,
131
140
  files: flags.file,
132
141
  urls: flags.url,
133
142
  vars: flags.var,
@@ -136,10 +145,11 @@ const flagsToOptions = (flags) => {
136
145
 
137
146
  const mergeOptionsWithPreset = (options, presetContent) => {
138
147
  return optionsSchema.parse({
139
- // Overwrite model, format, schema only if not provided via flags
148
+ // Overwrite model, format, schema, crawler only if not provided via flags
140
149
  model: hasModelFlag ? options.model : (presetContent.model ?? options.model),
141
150
  format: hasFormatFlag ? options.format : (presetContent.format ?? options.format),
142
151
  schema: hasSchemaFlag ? options.schema : (presetContent.schema ?? options.schema),
152
+ crawler: hasCrawlerFlag ? options.crawler : (presetContent.crawler ?? options.crawler),
143
153
  // Merge files
144
154
  files: [...presetContent.files, ...options.files],
145
155
  // Merge URLs
@@ -167,7 +177,7 @@ const executePromptAction = async (prompt, flags) => {
167
177
 
168
178
  // Build the prompt and prefer the argument over stdin
169
179
  const userPrompt = replaceVariables(prompt ?? stdinContent, options.vars)
170
- const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls)
180
+ const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls, options.crawler)
171
181
 
172
182
  const result = await executePrompt(finalPrompt, {
173
183
  model: options.model,
@@ -199,7 +209,7 @@ const executePresetAction = async (preset, flags) => {
199
209
 
200
210
  // Build the prompt
201
211
  const userPrompt = replaceVariables(prompt, options.vars)
202
- const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls)
212
+ const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls, options.crawler)
203
213
 
204
214
  const result = await executePrompt(finalPrompt, {
205
215
  model: options.model,
@@ -223,6 +233,7 @@ program
223
233
  .option(...modelFlag)
224
234
  .option(...formatFlag)
225
235
  .option(...schemaFlag)
236
+ .option(...crawlerFlag)
226
237
  .option(...fileFlag)
227
238
  .option(...urlFlag)
228
239
  .option(...varFlag)
@@ -235,6 +246,7 @@ program
235
246
  .option(...modelFlag)
236
247
  .option(...formatFlag)
237
248
  .option(...schemaFlag)
249
+ .option(...crawlerFlag)
238
250
  .option(...fileFlag)
239
251
  .option(...urlFlag)
240
252
  .option(...varFlag)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "heyi",
3
- "version": "2.0.0",
3
+ "version": "3.0.0",
4
4
  "description": "CLI tool to execute AI prompts with flexible output formatting",
5
5
  "keywords": [
6
6
  "ai",
@@ -34,6 +34,7 @@
34
34
  "ai": "^5.0.121",
35
35
  "commander": "^14.0.2",
36
36
  "dotenv": "^16.6.1",
37
+ "puppeteer": "^24.35.0",
37
38
  "sanitize-html": "^2.17.0",
38
39
  "zod": "^4.3.5"
39
40
  },
package/src/index.js CHANGED
@@ -19,9 +19,9 @@ config()
19
19
  export const executePrompt = async (prompt, options = {}) => {
20
20
  const { model, format = 'string', schema } = options
21
21
 
22
- const apiKey = process.env.API_KEY
22
+ const apiKey = process.env.HEYI_API_KEY
23
23
  if (!apiKey) {
24
- throw new Error('API_KEY environment variable is required. Set it via environment or .env file.')
24
+ throw new Error('HEYI_API_KEY environment variable is required. Set it via environment or .env file.')
25
25
  }
26
26
 
27
27
  const openrouter = createOpenRouter({
@@ -1,5 +1,6 @@
1
1
  import { readFile } from 'node:fs/promises'
2
2
  import { createInterface } from 'node:readline'
3
+ import { launch } from 'puppeteer'
3
4
  import sanitizeHtml from 'sanitize-html'
4
5
 
5
6
  /**
@@ -57,18 +58,84 @@ export const hasStdinData = () => {
57
58
  }
58
59
 
59
60
  /**
60
- * Fetch content from a URL.
61
+ * Validate that a URL uses http or https protocol.
62
+ *
63
+ * @param {string} url - URL to validate
64
+ * @throws {Error} If URL is invalid or uses a dangerous protocol
65
+ */
66
+ const validateUrl = (url) => {
67
+ try {
68
+ const parsedUrl = new URL(url)
69
+ if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
70
+ throw new Error(`Invalid protocol '${parsedUrl.protocol}'. Only http and https are supported.`)
71
+ }
72
+ } catch (error) {
73
+ if (error instanceof TypeError) {
74
+ throw new Error(`Invalid URL format: ${url}`)
75
+ }
76
+ throw error
77
+ }
78
+ }
79
+
80
+ /**
81
+ * Fetch content from a URL using fetch API.
82
+ *
83
+ * @param {string} url - URL to fetch content from
84
+ * @returns {Promise<string>} The URL content
85
+ */
86
+ const fetchUrlContentWithFetch = async (url) => {
87
+ validateUrl(url)
88
+ const response = await fetch(url)
89
+ if (!response.ok) {
90
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`)
91
+ }
92
+ const html = await response.text()
93
+ // Sanitize HTML to extract only text content and avoid large data
94
+ const cleanText = sanitizeHtml(html, {
95
+ allowedTags: [],
96
+ allowedAttributes: {},
97
+ allowedSchemes: [],
98
+ allowedSchemesAppliedToAttributes: [],
99
+ })
100
+ return cleanText.trim()
101
+ }
102
+
103
+ /**
104
+ * Fetch content from a URL using Chrome/Puppeteer.
61
105
  *
62
106
  * @param {string} url - URL to fetch content from
63
107
  * @returns {Promise<string>} The URL content
64
108
  */
65
- export const fetchUrlContent = async (url) => {
109
+ const fetchUrlContentWithChrome = async (url) => {
110
+ validateUrl(url)
111
+
112
+ const browser = await launch({
113
+ headless: true,
114
+ // These args are required for running in containerized environments (e.g., Docker, CI/CD)
115
+ args: ['--no-sandbox', '--disable-setuid-sandbox'],
116
+ })
117
+
66
118
  try {
67
- const response = await fetch(url)
68
- if (!response.ok) {
69
- throw new Error(`HTTP ${response.status}: ${response.statusText}`)
119
+ const page = await browser.newPage()
120
+
121
+ // Wait for network to be idle, with a 10-second timeout to prevent indefinite waiting.
122
+ // If timeout occurs, continue with whatever content is available.
123
+ // Wait for navigation first in case there are redirects.
124
+ try {
125
+ await Promise.all([
126
+ page.waitForNavigation({ timeout: 10000 }),
127
+ page.goto(url, { waitUntil: 'networkidle0', timeout: 10000 }),
128
+ ])
129
+ } catch (error) {
130
+ // If it's a timeout error, continue with the content that's already loaded
131
+ // For other errors (e.g., network errors), rethrow
132
+ if (!error.message.includes('timeout') && !error.message.includes('Navigation timeout')) {
133
+ throw error
134
+ }
70
135
  }
71
- const html = await response.text()
136
+
137
+ const html = await page.content()
138
+
72
139
  // Sanitize HTML to extract only text content and avoid large data
73
140
  const cleanText = sanitizeHtml(html, {
74
141
  allowedTags: [],
@@ -77,6 +144,21 @@ export const fetchUrlContent = async (url) => {
77
144
  allowedSchemesAppliedToAttributes: [],
78
145
  })
79
146
  return cleanText.trim()
147
+ } finally {
148
+ await browser.close()
149
+ }
150
+ }
151
+
152
+ /**
153
+ * Fetch content from a URL.
154
+ *
155
+ * @param {string} url - URL to fetch content from
156
+ * @param {string} crawler - Crawler to use: 'fetch' or 'chrome' (default: 'fetch')
157
+ * @returns {Promise<string>} The URL content
158
+ */
159
+ export const fetchUrlContent = async (url, crawler = 'fetch') => {
160
+ try {
161
+ return crawler === 'chrome' ? await fetchUrlContentWithChrome(url) : await fetchUrlContentWithFetch(url)
80
162
  } catch (error) {
81
163
  throw new Error(`Failed to fetch URL '${url}'`, { cause: error })
82
164
  }
@@ -6,6 +6,7 @@ const presetSchema = z.object({
6
6
  model: z.string().optional(),
7
7
  format: z.enum(['string', 'number', 'object', 'array']).optional(),
8
8
  schema: z.string().optional(),
9
+ crawler: z.enum(['fetch', 'chrome']).optional(),
9
10
  files: z.array(z.string()).default([]),
10
11
  urls: z.array(z.string()).default([]),
11
12
  })
@@ -6,9 +6,10 @@ import { fetchUrlContent, readFileContent } from './input.js'
6
6
  * @param {string} prompt - The prompt
7
7
  * @param {string[]} filePaths - Array of file paths to include as context
8
8
  * @param {string[]} urls - Array of URLs to include as context
9
+ * @param {string} crawler - Crawler to use for fetching URLs: 'fetch' or 'chrome' (default: 'fetch')
9
10
  * @returns {Promise<string>} The final prompt with all contexts combined
10
11
  */
11
- export const buildPrompt = async (prompt, filePaths = [], urls = []) => {
12
+ export const buildPrompt = async (prompt, filePaths = [], urls = [], crawler = 'fetch') => {
12
13
  // Handle file content as context
13
14
  const fileContents = []
14
15
  for (const filePath of filePaths) {
@@ -19,7 +20,7 @@ export const buildPrompt = async (prompt, filePaths = [], urls = []) => {
19
20
  // Handle URL content as context
20
21
  const urlContents = []
21
22
  for (const url of urls) {
22
- const content = await fetchUrlContent(url)
23
+ const content = await fetchUrlContent(url, crawler)
23
24
  urlContents.push({ path: url, content })
24
25
  }
25
26