felo-ai 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,232 +1,316 @@
1
- #!/usr/bin/env node
2
-
3
- const DEFAULT_API_BASE = 'https://openapi.felo.ai';
4
- const DEFAULT_FORMAT = 'markdown';
5
- const DEFAULT_TIMEOUT_MS = 60_000;
6
- const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
7
- const SPINNER_INTERVAL_MS = 80;
8
- const STATUS_PAD = 56;
9
-
10
- function startSpinner(message) {
11
- const start = Date.now();
12
- let i = 0;
13
- const id = setInterval(() => {
14
- const elapsed = Math.floor((Date.now() - start) / 1000);
15
- const line = `${message} ${SPINNER_FRAMES[i % SPINNER_FRAMES.length]} ${elapsed}s`;
16
- process.stderr.write(`\r${line.padEnd(STATUS_PAD, ' ')}`);
17
- i += 1;
18
- }, SPINNER_INTERVAL_MS);
19
- return id;
20
- }
21
-
22
- function stopSpinner(id) {
23
- if (id != null) clearInterval(id);
24
- process.stderr.write(`\r${' '.repeat(STATUS_PAD)}\r`);
25
- }
26
-
27
- function usage() {
28
- console.error(
29
- [
30
- 'Usage:',
31
- ' node felo-web-fetch/scripts/run_web_fetch.mjs --url <url> [options]',
32
- '',
33
- 'Options:',
34
- ' --url <url> Page URL to fetch (required)',
35
- ' --format <format> Output format: html, text, markdown (default: markdown)',
36
- ' --target-selector <s> CSS selector for target element only',
37
- ' --wait-for-selector <s> Wait for selector before fetch',
38
- ' --readability Enable readability (main content only)',
39
- ' --crawl-mode <mode> fast or fine (default: fast)',
40
- ' --timeout <ms> Request timeout in ms (default: 60000)',
41
- ' --json Print full API response as JSON',
42
- ' --help Show this help',
43
- ].join('\n')
44
- );
45
- }
46
-
47
- function parseArgs(argv) {
48
- const out = {
49
- url: '',
50
- format: DEFAULT_FORMAT,
51
- targetSelector: '',
52
- waitForSelector: '',
53
- readability: false,
54
- crawlMode: 'fast',
55
- timeoutMs: DEFAULT_TIMEOUT_MS,
56
- json: false,
57
- };
58
-
59
- for (let i = 0; i < argv.length; i += 1) {
60
- const a = argv[i];
61
- if (a === '--help' || a === '-h') {
62
- out.help = true;
63
- } else if (a === '--json') {
64
- out.json = true;
65
- } else if (a === '--readability') {
66
- out.readability = true;
67
- } else if (a === '--url') {
68
- const next = argv[i + 1];
69
- if (next === undefined || next === null || String(next).trim() === '' || String(next).startsWith('-')) {
70
- out.url = '';
71
- } else {
72
- out.url = String(next).trim();
73
- }
74
- i += 1;
75
- } else if (a === '--format') {
76
- const f = (argv[i + 1] ?? '').toLowerCase();
77
- out.format = ['html', 'text', 'markdown'].includes(f) ? f : DEFAULT_FORMAT;
78
- i += 1;
79
- } else if (a === '--target-selector') {
80
- out.targetSelector = (argv[i + 1] ?? '').trim();
81
- i += 1;
82
- } else if (a === '--wait-for-selector') {
83
- out.waitForSelector = (argv[i + 1] ?? '').trim();
84
- i += 1;
85
- } else if (a === '--crawl-mode') {
86
- const m = (argv[i + 1] ?? '').toLowerCase();
87
- out.crawlMode = ['fast', 'fine'].includes(m) ? m : 'fast';
88
- i += 1;
89
- } else if (a === '--timeout') {
90
- const n = Number.parseInt(argv[i + 1] ?? '', 10);
91
- if (Number.isFinite(n) && n > 0) out.timeoutMs = n;
92
- i += 1;
93
- }
94
- }
95
-
96
- return out;
97
- }
98
-
99
- function getMessage(payload) {
100
- return (
101
- payload?.message ||
102
- payload?.error ||
103
- payload?.msg ||
104
- payload?.code ||
105
- 'Unknown error'
106
- );
107
- }
108
-
109
- async function fetchJson(url, init, timeoutMs) {
110
- const controller = new AbortController();
111
- const timer = setTimeout(() => controller.abort(), timeoutMs);
112
- try {
113
- const res = await fetch(url, { ...init, signal: controller.signal });
114
- let body = {};
115
- try {
116
- body = await res.json();
117
- } catch {
118
- body = {};
119
- }
120
-
121
- if (!res.ok) {
122
- throw new Error(`HTTP ${res.status}: ${getMessage(body)}`);
123
- }
124
- const code = body.code;
125
- const hasData = body?.data != null;
126
- const successCodes = [0, 200];
127
- const hasSuccessCode =
128
- successCodes.includes(Number(code)) ||
129
- code === undefined ||
130
- code === null ||
131
- (hasData && res.ok);
132
- if (!hasSuccessCode) {
133
- throw new Error(getMessage(body));
134
- }
135
- return body;
136
- } finally {
137
- clearTimeout(timer);
138
- }
139
- }
140
-
141
- function stringifyContent(content) {
142
- if (content == null) return '';
143
- if (typeof content === 'string') return content;
144
- if (typeof content === 'object') {
145
- if (content.markdown) return content.markdown;
146
- if (content.text) return content.text;
147
- if (content.html) return content.html;
148
- return JSON.stringify(content, null, 2);
149
- }
150
- return String(content);
151
- }
152
-
153
- async function main() {
154
- const args = parseArgs(process.argv.slice(2));
155
- if (args.help) {
156
- usage();
157
- process.exit(0);
158
- }
159
- if (!args.url) {
160
- usage();
161
- process.exit(1);
162
- }
163
-
164
- const apiKey = process.env.FELO_API_KEY?.trim();
165
- if (!apiKey) {
166
- console.error('ERROR: FELO_API_KEY not set');
167
- process.exit(1);
168
- }
169
-
170
- const apiBase = (process.env.FELO_API_BASE?.trim() || DEFAULT_API_BASE).replace(/\/$/, '');
171
-
172
- const shortUrl = args.url.length > 45 ? args.url.slice(0, 42) + '...' : args.url;
173
- const spinnerId = startSpinner(`Fetching ${shortUrl}`);
174
-
175
- try {
176
- const body = {
177
- url: args.url,
178
- output_format: args.format,
179
- crawl_mode: args.crawlMode,
180
- with_readability: args.readability,
181
- timeout: args.timeoutMs,
182
- };
183
- if (args.targetSelector) body.target_selector = args.targetSelector;
184
- if (args.waitForSelector) body.wait_for_selector = args.waitForSelector;
185
-
186
- const payload = await fetchJson(
187
- `${apiBase}/v2/web/extract`,
188
- {
189
- method: 'POST',
190
- headers: {
191
- Accept: 'application/json',
192
- Authorization: `Bearer ${apiKey}`,
193
- 'Content-Type': 'application/json',
194
- },
195
- body: JSON.stringify(body),
196
- },
197
- args.timeoutMs
198
- );
199
-
200
- const data = payload?.data ?? {};
201
- const content = data?.content;
202
-
203
- if (args.json) {
204
- console.log(JSON.stringify(payload, null, 2));
205
- return;
206
- }
207
-
208
- const out = stringifyContent(content);
209
- const isEmpty = out == null || String(out).trim() === '';
210
- if (isEmpty) {
211
- stopSpinner(spinnerId);
212
- process.stderr.write(
213
- `No content fetched from ${args.url}. The page may be empty, blocked, or the selector did not match.\n`
214
- );
215
- process.exit(1);
216
- }
217
- console.log(out);
218
- } finally {
219
- stopSpinner(spinnerId);
220
- }
221
- }
222
-
223
- main().catch((err) => {
224
- let url = '';
225
- const argv = process.argv.slice(2);
226
- const i = argv.findIndex((a) => a === '--url' || a === '-u');
227
- if (i >= 0 && argv[i + 1]) url = argv[i + 1];
228
- process.stderr.write(
229
- `Web fetch failed${url ? ` for ${url}` : ''}: ${err?.message || err}\n`
230
- );
231
- process.exit(1);
232
- });
1
+ #!/usr/bin/env node
2
+
3
+ const DEFAULT_API_BASE = 'https://openapi.felo.ai';
4
+ const DEFAULT_TIMEOUT_SEC = 60;
5
+ const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
6
+ const SPINNER_INTERVAL_MS = 80;
7
+ const STATUS_PAD = 56;
8
+
9
+ function startSpinner(message) {
10
+ const start = Date.now();
11
+ let i = 0;
12
+ const id = setInterval(() => {
13
+ const elapsed = Math.floor((Date.now() - start) / 1000);
14
+ const line = `${message} ${SPINNER_FRAMES[i % SPINNER_FRAMES.length]} ${elapsed}s`;
15
+ process.stderr.write(`\r${line.padEnd(STATUS_PAD, ' ')}`);
16
+ i += 1;
17
+ }, SPINNER_INTERVAL_MS);
18
+ return id;
19
+ }
20
+
21
+ function stopSpinner(id) {
22
+ if (id != null) clearInterval(id);
23
+ process.stderr.write(`\r${' '.repeat(STATUS_PAD)}\r`);
24
+ }
25
+
26
+ function usage() {
27
+ console.error(
28
+ [
29
+ 'Usage:',
30
+ ' node felo-web-fetch/scripts/run_web_fetch.mjs --url <url> [options]',
31
+ '',
32
+ 'Required:',
33
+ ' --url <url> Target page URL',
34
+ '',
35
+ 'Options:',
36
+ ' --output-format <format> html | markdown | text',
37
+ ' --crawl-mode <mode> fast | fine',
38
+ ' --target-selector <selector> CSS selector for target extraction',
39
+ ' --wait-for-selector <selector> Wait until selector appears',
40
+ ' --cookie <cookie> Add cookie entry (repeatable)',
41
+ ' --set-cookies-json <json> JSON array for set_cookies',
42
+ ' --user-agent <ua> Custom user-agent',
43
+ ' --timeout <seconds> Request timeout in seconds (default 60)',
44
+ ' --request-timeout-ms <ms> API timeout parameter in milliseconds',
45
+ ' --with-readability <bool> true | false',
46
+ ' --with-links-summary <bool> true | false',
47
+ ' --with-images-summary <bool> true | false',
48
+ ' --with-images-readability <bool> true | false',
49
+ ' --with-images <bool> true | false',
50
+ ' --with-links <bool> true | false',
51
+ ' --ignore-empty-text-image <bool> true | false',
52
+ ' --with-cache <bool> true | false',
53
+ ' --with-stypes <bool> true | false',
54
+ ' --json Print full JSON response',
55
+ ' --help Show this help',
56
+ ].join('\n')
57
+ );
58
+ }
59
+
60
+ function parseBool(v, name) {
61
+ if (typeof v !== 'string') {
62
+ throw new Error(`Missing value for ${name}`);
63
+ }
64
+ const normalized = v.trim().toLowerCase();
65
+ if (normalized === 'true') return true;
66
+ if (normalized === 'false') return false;
67
+ throw new Error(`Invalid boolean for ${name}: ${v}. Use true or false.`);
68
+ }
69
+
70
+ function parseArgs(argv) {
71
+ const out = {
72
+ url: '',
73
+ outputFormat: '',
74
+ crawlMode: '',
75
+ targetSelector: '',
76
+ waitForSelector: '',
77
+ cookies: [],
78
+ cookiesJson: '',
79
+ userAgent: '',
80
+ timeoutSec: DEFAULT_TIMEOUT_SEC,
81
+ requestTimeoutMs: null,
82
+ withReadability: null,
83
+ withLinksSummary: null,
84
+ withImagesSummary: null,
85
+ withImagesReadability: null,
86
+ withImages: null,
87
+ withLinks: null,
88
+ ignoreEmptyTextImage: null,
89
+ withCache: null,
90
+ withStypes: null,
91
+ json: false,
92
+ help: false,
93
+ };
94
+
95
+ for (let i = 0; i < argv.length; i += 1) {
96
+ const a = argv[i];
97
+ if (a === '--help' || a === '-h') {
98
+ out.help = true;
99
+ } else if (a === '--json') {
100
+ out.json = true;
101
+ } else if (a === '--url') {
102
+ out.url = argv[i + 1] ?? '';
103
+ i += 1;
104
+ } else if (a === '--output-format') {
105
+ out.outputFormat = (argv[i + 1] ?? '').trim().toLowerCase();
106
+ i += 1;
107
+ } else if (a === '--crawl-mode') {
108
+ out.crawlMode = (argv[i + 1] ?? '').trim().toLowerCase();
109
+ i += 1;
110
+ } else if (a === '--target-selector') {
111
+ out.targetSelector = argv[i + 1] ?? '';
112
+ i += 1;
113
+ } else if (a === '--wait-for-selector') {
114
+ out.waitForSelector = argv[i + 1] ?? '';
115
+ i += 1;
116
+ } else if (a === '--cookie') {
117
+ const value = argv[i + 1] ?? '';
118
+ if (value) out.cookies.push(value);
119
+ i += 1;
120
+ } else if (a === '--set-cookies-json') {
121
+ out.cookiesJson = argv[i + 1] ?? '';
122
+ i += 1;
123
+ } else if (a === '--user-agent') {
124
+ out.userAgent = argv[i + 1] ?? '';
125
+ i += 1;
126
+ } else if (a === '--timeout') {
127
+ out.timeoutSec = Number.parseInt(argv[i + 1] ?? '', 10);
128
+ i += 1;
129
+ } else if (a === '--request-timeout-ms') {
130
+ out.requestTimeoutMs = Number.parseInt(argv[i + 1] ?? '', 10);
131
+ i += 1;
132
+ } else if (a === '--with-readability') {
133
+ out.withReadability = parseBool(argv[i + 1], '--with-readability');
134
+ i += 1;
135
+ } else if (a === '--with-links-summary') {
136
+ out.withLinksSummary = parseBool(argv[i + 1], '--with-links-summary');
137
+ i += 1;
138
+ } else if (a === '--with-images-summary') {
139
+ out.withImagesSummary = parseBool(argv[i + 1], '--with-images-summary');
140
+ i += 1;
141
+ } else if (a === '--with-images-readability') {
142
+ out.withImagesReadability = parseBool(argv[i + 1], '--with-images-readability');
143
+ i += 1;
144
+ } else if (a === '--with-images') {
145
+ out.withImages = parseBool(argv[i + 1], '--with-images');
146
+ i += 1;
147
+ } else if (a === '--with-links') {
148
+ out.withLinks = parseBool(argv[i + 1], '--with-links');
149
+ i += 1;
150
+ } else if (a === '--ignore-empty-text-image') {
151
+ out.ignoreEmptyTextImage = parseBool(argv[i + 1], '--ignore-empty-text-image');
152
+ i += 1;
153
+ } else if (a === '--with-cache') {
154
+ out.withCache = parseBool(argv[i + 1], '--with-cache');
155
+ i += 1;
156
+ } else if (a === '--with-stypes') {
157
+ out.withStypes = parseBool(argv[i + 1], '--with-stypes');
158
+ i += 1;
159
+ }
160
+ }
161
+
162
+ if (!Number.isFinite(out.timeoutSec) || out.timeoutSec <= 0) {
163
+ out.timeoutSec = DEFAULT_TIMEOUT_SEC;
164
+ }
165
+ if (out.requestTimeoutMs !== null && (!Number.isFinite(out.requestTimeoutMs) || out.requestTimeoutMs <= 0)) {
166
+ out.requestTimeoutMs = null;
167
+ }
168
+ return out;
169
+ }
170
+
171
+ function ensureInSet(value, allowed, fieldName) {
172
+ if (!value) return;
173
+ if (!allowed.includes(value)) {
174
+ throw new Error(`Invalid ${fieldName}: ${value}. Allowed values: ${allowed.join(', ')}`);
175
+ }
176
+ }
177
+
178
+ function isApiError(payload) {
179
+ if (typeof payload?.code === 'number') {
180
+ return payload.code !== 0;
181
+ }
182
+ if (typeof payload?.status === 'string') {
183
+ return payload.status.toLowerCase() === 'error';
184
+ }
185
+ return false;
186
+ }
187
+
188
+ function getMessage(payload) {
189
+ return String(payload?.message || payload?.error || payload?.msg || 'Unknown error');
190
+ }
191
+
192
+ async function fetchJson(url, init, timeoutMs) {
193
+ const controller = new AbortController();
194
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
195
+ try {
196
+ const res = await fetch(url, { ...init, signal: controller.signal });
197
+ let body = {};
198
+ try {
199
+ body = await res.json();
200
+ } catch {
201
+ body = {};
202
+ }
203
+
204
+ if (!res.ok) {
205
+ throw new Error(`HTTP ${res.status}: ${getMessage(body)}`);
206
+ }
207
+ if (isApiError(body)) {
208
+ throw new Error(getMessage(body));
209
+ }
210
+ return body;
211
+ } finally {
212
+ clearTimeout(timer);
213
+ }
214
+ }
215
+
216
+ function buildPayload(args) {
217
+ const payload = {
218
+ url: args.url,
219
+ };
220
+
221
+ if (args.outputFormat) payload.output_format = args.outputFormat;
222
+ if (args.crawlMode) payload.crawl_mode = args.crawlMode;
223
+ if (args.targetSelector) payload.target_selector = args.targetSelector;
224
+ if (args.waitForSelector) payload.wait_for_selector = args.waitForSelector;
225
+ if (args.userAgent) payload.user_agent = args.userAgent;
226
+ if (args.requestTimeoutMs !== null) payload.timeout = args.requestTimeoutMs;
227
+
228
+ if (args.cookies.length) payload.set_cookies = args.cookies;
229
+ if (args.cookiesJson) {
230
+ try {
231
+ const parsed = JSON.parse(args.cookiesJson);
232
+ if (!Array.isArray(parsed)) {
233
+ throw new Error('set_cookies JSON must be an array');
234
+ }
235
+ payload.set_cookies = parsed;
236
+ } catch (err) {
237
+ throw new Error(`Invalid --set-cookies-json: ${String(err.message || err)}`);
238
+ }
239
+ }
240
+
241
+ if (args.withReadability !== null) payload.with_readability = args.withReadability;
242
+ if (args.withLinksSummary !== null) payload.with_links_summary = args.withLinksSummary;
243
+ if (args.withImagesSummary !== null) payload.with_images_summary = args.withImagesSummary;
244
+ if (args.withImagesReadability !== null) payload.with_images_readability = args.withImagesReadability;
245
+ if (args.withImages !== null) payload.with_images = args.withImages;
246
+ if (args.withLinks !== null) payload.with_links = args.withLinks;
247
+ if (args.ignoreEmptyTextImage !== null) payload.ignore_empty_text_image = args.ignoreEmptyTextImage;
248
+ if (args.withCache !== null) payload.with_cache = args.withCache;
249
+ if (args.withStypes !== null) payload.with_stypes = args.withStypes;
250
+
251
+ return payload;
252
+ }
253
+
254
+ async function main() {
255
+ const args = parseArgs(process.argv.slice(2));
256
+ if (args.help) {
257
+ usage();
258
+ process.exit(0);
259
+ }
260
+
261
+ if (!args.url) {
262
+ usage();
263
+ process.exit(1);
264
+ }
265
+
266
+ ensureInSet(args.outputFormat, ['html', 'markdown', 'text'], 'output-format');
267
+ ensureInSet(args.crawlMode, ['fast', 'fine'], 'crawl-mode');
268
+
269
+ const apiKey = process.env.FELO_API_KEY?.trim();
270
+ if (!apiKey) {
271
+ console.error('ERROR: FELO_API_KEY not set');
272
+ process.exit(1);
273
+ }
274
+
275
+ const apiBase = (process.env.FELO_API_BASE?.trim() || DEFAULT_API_BASE).replace(/\/$/, '');
276
+ const payload = buildPayload(args);
277
+
278
+ const shortUrl = args.url.length > 45 ? args.url.slice(0, 42) + '...' : args.url;
279
+ const spinnerId = startSpinner(`Fetching ${shortUrl}`);
280
+
281
+ try {
282
+ const response = await fetchJson(
283
+ `${apiBase}/v2/web/extract`,
284
+ {
285
+ method: 'POST',
286
+ headers: {
287
+ Accept: 'application/json',
288
+ Authorization: `Bearer ${apiKey}`,
289
+ 'Content-Type': 'application/json',
290
+ },
291
+ body: JSON.stringify(payload),
292
+ },
293
+ args.timeoutSec * 1000
294
+ );
295
+
296
+ if (args.json) {
297
+ console.log(JSON.stringify(response, null, 2));
298
+ return;
299
+ }
300
+
301
+ const content = response?.data?.content;
302
+ if (typeof content === 'string') {
303
+ console.log(content);
304
+ return;
305
+ }
306
+
307
+ console.log(JSON.stringify(content ?? response?.data ?? response, null, 2));
308
+ } catch (err) {
309
+ console.error(`ERROR: ${String(err?.message || err || 'Unknown error')}`);
310
+ process.exit(1);
311
+ } finally {
312
+ stopSpinner(spinnerId);
313
+ }
314
+ }
315
+
316
+ main();