@pi-unipi/web-api 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -15
- package/package.json +9 -2
- package/skills/web/SKILL.md +54 -11
- package/src/engine/constants.ts +36 -0
- package/src/engine/dependencies.ts +145 -0
- package/src/engine/dom.ts +266 -0
- package/src/engine/extract.ts +642 -0
- package/src/engine/format.ts +306 -0
- package/src/engine/profiles.ts +102 -0
- package/src/engine/types.ts +169 -0
- package/src/index.ts +9 -2
- package/src/providers/base.ts +9 -1
- package/src/settings.ts +70 -4
- package/src/tools.ts +281 -24
- package/src/tui/progress.ts +168 -0
- package/src/tui/result.ts +173 -0
- package/src/tui/settings-dialog.ts +168 -0
package/src/settings.ts
CHANGED
|
@@ -27,12 +27,42 @@ export interface CacheSettings {
|
|
|
27
27
|
ttlMs: number;
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
+
/** Smart-fetch default settings */
|
|
31
|
+
export interface SmartFetchSettings {
|
|
32
|
+
/** TLS fingerprint browser profile */
|
|
33
|
+
browser: string;
|
|
34
|
+
/** OS fingerprint */
|
|
35
|
+
os: string;
|
|
36
|
+
/** Maximum content characters */
|
|
37
|
+
maxChars: number;
|
|
38
|
+
/** Request timeout in ms */
|
|
39
|
+
timeoutMs: number;
|
|
40
|
+
/** Batch concurrency */
|
|
41
|
+
batchConcurrency: number;
|
|
42
|
+
/** Strip image references */
|
|
43
|
+
removeImages: boolean;
|
|
44
|
+
/** Include replies/comments */
|
|
45
|
+
includeReplies: boolean | "extractors";
|
|
46
|
+
}
|
|
47
|
+
|
|
30
48
|
/** Config storage structure */
|
|
31
49
|
export interface WebApiConfig {
|
|
32
50
|
providers: Record<string, ProviderSettings>;
|
|
33
51
|
cache: CacheSettings;
|
|
52
|
+
smartFetch?: Partial<SmartFetchSettings>;
|
|
34
53
|
}
|
|
35
54
|
|
|
55
|
+
/** Default smart-fetch settings */
|
|
56
|
+
const DEFAULT_SMART_FETCH_SETTINGS: SmartFetchSettings = {
|
|
57
|
+
browser: "chrome_145",
|
|
58
|
+
os: "windows",
|
|
59
|
+
maxChars: 50000,
|
|
60
|
+
timeoutMs: 15000,
|
|
61
|
+
batchConcurrency: 8,
|
|
62
|
+
removeImages: false,
|
|
63
|
+
includeReplies: "extractors",
|
|
64
|
+
};
|
|
65
|
+
|
|
36
66
|
/** Default configuration */
|
|
37
67
|
const DEFAULT_CONFIG: WebApiConfig = {
|
|
38
68
|
providers: {
|
|
@@ -49,6 +79,7 @@ const DEFAULT_CONFIG: WebApiConfig = {
|
|
|
49
79
|
enabled: true,
|
|
50
80
|
ttlMs: 3600000, // 1 hour
|
|
51
81
|
},
|
|
82
|
+
smartFetch: {},
|
|
52
83
|
};
|
|
53
84
|
|
|
54
85
|
/**
|
|
@@ -94,8 +125,8 @@ export function loadAuth(): WebApiAuth {
|
|
|
94
125
|
const content = fs.readFileSync(authPath, "utf-8");
|
|
95
126
|
return JSON.parse(content);
|
|
96
127
|
}
|
|
97
|
-
} catch
|
|
98
|
-
|
|
128
|
+
} catch {
|
|
129
|
+
// Silently ignore — auth load failure returns empty.
|
|
99
130
|
}
|
|
100
131
|
return {};
|
|
101
132
|
}
|
|
@@ -133,8 +164,8 @@ export function loadConfig(): WebApiConfig {
|
|
|
133
164
|
},
|
|
134
165
|
};
|
|
135
166
|
}
|
|
136
|
-
} catch
|
|
137
|
-
|
|
167
|
+
} catch {
|
|
168
|
+
// Silently ignore — config load failure falls back to defaults.
|
|
138
169
|
}
|
|
139
170
|
return DEFAULT_CONFIG;
|
|
140
171
|
}
|
|
@@ -261,3 +292,38 @@ export function validateApiKeyFormat(providerId: string, apiKey: string): boolea
|
|
|
261
292
|
return apiKey.length >= 8;
|
|
262
293
|
}
|
|
263
294
|
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Load smart-fetch settings.
|
|
298
|
+
* Merges defaults with saved config.
|
|
299
|
+
* @returns Smart-fetch settings
|
|
300
|
+
*/
|
|
301
|
+
export function loadSmartFetchSettings(): SmartFetchSettings {
|
|
302
|
+
const config = loadConfig();
|
|
303
|
+
return {
|
|
304
|
+
...DEFAULT_SMART_FETCH_SETTINGS,
|
|
305
|
+
...config.smartFetch,
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Save smart-fetch settings.
|
|
311
|
+
* @param settings - Partial settings to save
|
|
312
|
+
*/
|
|
313
|
+
export function saveSmartFetchSettings(settings: Partial<SmartFetchSettings>): void {
|
|
314
|
+
const config = loadConfig();
|
|
315
|
+
config.smartFetch = {
|
|
316
|
+
...config.smartFetch,
|
|
317
|
+
...settings,
|
|
318
|
+
};
|
|
319
|
+
saveConfig(config);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Reset smart-fetch settings to defaults.
|
|
324
|
+
*/
|
|
325
|
+
export function resetSmartFetchSettings(): void {
|
|
326
|
+
const config = loadConfig();
|
|
327
|
+
config.smartFetch = {};
|
|
328
|
+
saveConfig(config);
|
|
329
|
+
}
|
package/src/tools.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @unipi/web-api — Agent tools registration
|
|
3
3
|
*
|
|
4
|
-
* Registers web-search, web-read, and web-llm-summarize tools.
|
|
4
|
+
* Registers web-search, multi-web-content-read, and web-llm-summarize tools.
|
|
5
5
|
* Implements smart provider selection based on ranking.
|
|
6
6
|
*/
|
|
7
7
|
|
|
@@ -19,12 +19,20 @@ import {
|
|
|
19
19
|
getApiKey,
|
|
20
20
|
isProviderEnabled,
|
|
21
21
|
loadConfig,
|
|
22
|
+
loadSmartFetchSettings,
|
|
22
23
|
} from "./settings.js";
|
|
24
|
+
import { webCache } from "./cache.js";
|
|
25
|
+
import {
|
|
26
|
+
defuddleFetch,
|
|
27
|
+
defuddleFetchMultiple,
|
|
28
|
+
} from "./engine/extract.js";
|
|
29
|
+
import type { FetchOptions, FetchResult, BatchFetchResult } from "./engine/types.js";
|
|
30
|
+
import { formatSingleResult, formatBatchResult, formatErrorResult } from "./engine/format.js";
|
|
23
31
|
|
|
24
32
|
/** Tool names */
|
|
25
33
|
export const WEB_TOOLS = {
|
|
26
34
|
SEARCH: "web_search",
|
|
27
|
-
READ: "
|
|
35
|
+
READ: "multi_web_content_read",
|
|
28
36
|
SUMMARIZE: "web_llm_summarize",
|
|
29
37
|
} as const;
|
|
30
38
|
|
|
@@ -113,9 +121,9 @@ async function executeSearch(
|
|
|
113
121
|
}
|
|
114
122
|
|
|
115
123
|
/**
|
|
116
|
-
* Execute web read.
|
|
124
|
+
* Execute web read via provider.
|
|
117
125
|
*/
|
|
118
|
-
async function
|
|
126
|
+
async function executeProviderRead(
|
|
119
127
|
url: string,
|
|
120
128
|
sourceRank?: number
|
|
121
129
|
): Promise<ReadResult> {
|
|
@@ -151,6 +159,85 @@ async function executeSummarize(
|
|
|
151
159
|
return provider.summarize(url, prompt, config);
|
|
152
160
|
}
|
|
153
161
|
|
|
162
|
+
/**
|
|
163
|
+
* Generate cache key for smart-fetch results.
|
|
164
|
+
*/
|
|
165
|
+
function generateSmartFetchKey(
|
|
166
|
+
url: string,
|
|
167
|
+
options: Partial<FetchOptions>
|
|
168
|
+
): string {
|
|
169
|
+
const parts = [
|
|
170
|
+
url,
|
|
171
|
+
options.browser || "",
|
|
172
|
+
options.format || "",
|
|
173
|
+
String(options.maxChars || ""),
|
|
174
|
+
];
|
|
175
|
+
return parts.join(":");
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Execute smart-fetch read (single URL).
|
|
180
|
+
*/
|
|
181
|
+
async function executeSmartFetchRead(
|
|
182
|
+
url: string,
|
|
183
|
+
options: Partial<FetchOptions> = {}
|
|
184
|
+
): Promise<FetchResult> {
|
|
185
|
+
// Check cache first
|
|
186
|
+
const cacheKey = generateSmartFetchKey(url, options);
|
|
187
|
+
const cached = webCache.get(cacheKey, "smart-fetch");
|
|
188
|
+
if (cached) {
|
|
189
|
+
return cached as FetchResult;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Load defaults
|
|
193
|
+
const defaults = loadSmartFetchSettings();
|
|
194
|
+
const fetchOptions: FetchOptions = {
|
|
195
|
+
browser: options.browser || defaults.browser,
|
|
196
|
+
os: options.os || defaults.os,
|
|
197
|
+
format: options.format || "markdown",
|
|
198
|
+
maxChars: options.maxChars || defaults.maxChars,
|
|
199
|
+
timeoutMs: options.timeoutMs || defaults.timeoutMs,
|
|
200
|
+
removeImages: options.removeImages ?? defaults.removeImages,
|
|
201
|
+
includeReplies: options.includeReplies ?? defaults.includeReplies,
|
|
202
|
+
proxy: options.proxy,
|
|
203
|
+
headers: options.headers,
|
|
204
|
+
};
|
|
205
|
+
|
|
206
|
+
// Execute fetch
|
|
207
|
+
const result = await defuddleFetch(url, fetchOptions);
|
|
208
|
+
|
|
209
|
+
// Cache result
|
|
210
|
+
webCache.set(cacheKey, "smart-fetch", result);
|
|
211
|
+
|
|
212
|
+
return result;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Execute smart-fetch batch read.
|
|
217
|
+
*/
|
|
218
|
+
async function executeSmartFetchBatch(
|
|
219
|
+
urls: string[],
|
|
220
|
+
options: Partial<FetchOptions> & { batchConcurrency?: number } = {}
|
|
221
|
+
): Promise<BatchFetchResult> {
|
|
222
|
+
// Load defaults
|
|
223
|
+
const defaults = loadSmartFetchSettings();
|
|
224
|
+
const fetchOptions: FetchOptions & { batchConcurrency?: number } = {
|
|
225
|
+
browser: options.browser || defaults.browser,
|
|
226
|
+
os: options.os || defaults.os,
|
|
227
|
+
format: options.format || "markdown",
|
|
228
|
+
maxChars: options.maxChars || defaults.maxChars,
|
|
229
|
+
timeoutMs: options.timeoutMs || defaults.timeoutMs,
|
|
230
|
+
removeImages: options.removeImages ?? defaults.removeImages,
|
|
231
|
+
includeReplies: options.includeReplies ?? defaults.includeReplies,
|
|
232
|
+
proxy: options.proxy,
|
|
233
|
+
headers: options.headers,
|
|
234
|
+
batchConcurrency: options.batchConcurrency || defaults.batchConcurrency,
|
|
235
|
+
};
|
|
236
|
+
|
|
237
|
+
// Execute batch fetch
|
|
238
|
+
return defuddleFetchMultiple(urls, fetchOptions);
|
|
239
|
+
}
|
|
240
|
+
|
|
154
241
|
/**
|
|
155
242
|
* Register web tools with pi.
|
|
156
243
|
*/
|
|
@@ -221,43 +308,213 @@ export function registerWebTools(pi: ExtensionAPI): void {
|
|
|
221
308
|
},
|
|
222
309
|
});
|
|
223
310
|
|
|
224
|
-
// ---
|
|
311
|
+
// --- multi_web_content_read tool ---
|
|
225
312
|
pi.registerTool({
|
|
226
313
|
name: WEB_TOOLS.READ,
|
|
227
|
-
label: "Web Read",
|
|
314
|
+
label: "Multi Web Content Read",
|
|
228
315
|
description:
|
|
229
|
-
"Read and extract content from
|
|
230
|
-
"
|
|
231
|
-
|
|
316
|
+
"Read and extract content from URLs using the smart-fetch engine (default) or provider fallbacks. " +
|
|
317
|
+
"Supports single URL or batch URLs. " +
|
|
318
|
+
"Returns clean markdown with metadata (title, author, site, word count).",
|
|
319
|
+
promptSnippet: "Read content from one or more URLs.",
|
|
232
320
|
promptGuidelines: [
|
|
233
|
-
"Use
|
|
234
|
-
"
|
|
235
|
-
"
|
|
236
|
-
"
|
|
321
|
+
"Use multi_web_content_read to extract content from web pages.",
|
|
322
|
+
"Pass a single URL string or an array of URLs for batch reading.",
|
|
323
|
+
"Default source (0 or omitted) uses the local smart-fetch engine — free, no API key.",
|
|
324
|
+
"source 1-3 uses provider fallbacks: Jina Reader, Firecrawl, Perplexity.",
|
|
325
|
+
"Batch mode: pass an array of URLs, returns results for each.",
|
|
237
326
|
],
|
|
238
327
|
parameters: Type.Object({
|
|
239
|
-
url: Type.
|
|
328
|
+
url: Type.Union([
|
|
329
|
+
Type.String({ description: "Single URL to read" }),
|
|
330
|
+
Type.Array(Type.String(), { description: "Array of URLs to read in batch" }),
|
|
331
|
+
], { description: "URL or array of URLs to read" }),
|
|
240
332
|
source: Type.Optional(
|
|
241
333
|
Type.Number({
|
|
242
334
|
description:
|
|
243
|
-
"Provider selection (1=Jina Reader, 2=Firecrawl, 3=Perplexity). " +
|
|
244
|
-
"
|
|
245
|
-
minimum:
|
|
335
|
+
"Provider selection (0=smart-fetch engine, 1=Jina Reader, 2=Firecrawl, 3=Perplexity). " +
|
|
336
|
+
"Default is 0 (smart-fetch).",
|
|
337
|
+
minimum: 0,
|
|
246
338
|
maximum: 3,
|
|
247
339
|
})
|
|
248
340
|
),
|
|
341
|
+
browser: Type.Optional(
|
|
342
|
+
Type.String({
|
|
343
|
+
description: "TLS fingerprint browser profile (e.g., chrome_145). Default: chrome_145.",
|
|
344
|
+
})
|
|
345
|
+
),
|
|
346
|
+
os: Type.Optional(
|
|
347
|
+
Type.String({
|
|
348
|
+
description: "OS fingerprint (windows, macos, linux). Default: windows.",
|
|
349
|
+
})
|
|
350
|
+
),
|
|
351
|
+
format: Type.Optional(
|
|
352
|
+
Type.Union([
|
|
353
|
+
Type.Literal("markdown"),
|
|
354
|
+
Type.Literal("html"),
|
|
355
|
+
Type.Literal("text"),
|
|
356
|
+
Type.Literal("json"),
|
|
357
|
+
], { description: "Output format. Default: markdown." })
|
|
358
|
+
),
|
|
359
|
+
maxChars: Type.Optional(
|
|
360
|
+
Type.Number({
|
|
361
|
+
description: "Maximum characters in output. Default: 50000.",
|
|
362
|
+
})
|
|
363
|
+
),
|
|
364
|
+
timeoutMs: Type.Optional(
|
|
365
|
+
Type.Number({
|
|
366
|
+
description: "Request timeout in milliseconds. Default: 15000.",
|
|
367
|
+
})
|
|
368
|
+
),
|
|
369
|
+
removeImages: Type.Optional(
|
|
370
|
+
Type.Boolean({
|
|
371
|
+
description: "Strip image references from content. Default: false.",
|
|
372
|
+
})
|
|
373
|
+
),
|
|
374
|
+
includeReplies: Type.Optional(
|
|
375
|
+
Type.Union([
|
|
376
|
+
Type.Boolean(),
|
|
377
|
+
Type.Literal("extractors"),
|
|
378
|
+
], { description: "Include replies/comments. Default: extractors." })
|
|
379
|
+
),
|
|
380
|
+
proxy: Type.Optional(
|
|
381
|
+
Type.String({
|
|
382
|
+
description: "Proxy URL for requests.",
|
|
383
|
+
})
|
|
384
|
+
),
|
|
385
|
+
batchConcurrency: Type.Optional(
|
|
386
|
+
Type.Number({
|
|
387
|
+
description: "Concurrent requests for batch mode. Default: 8.",
|
|
388
|
+
})
|
|
389
|
+
),
|
|
390
|
+
verbose: Type.Optional(
|
|
391
|
+
Type.Boolean({
|
|
392
|
+
description: "Include metadata header in output. Default: true.",
|
|
393
|
+
})
|
|
394
|
+
),
|
|
249
395
|
}),
|
|
250
396
|
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
|
|
397
|
+
const source = params.source ?? 0;
|
|
398
|
+
const verbose = params.verbose ?? true;
|
|
399
|
+
|
|
251
400
|
try {
|
|
252
|
-
|
|
401
|
+
// Single URL
|
|
402
|
+
if (typeof params.url === "string") {
|
|
403
|
+
// Provider fallback
|
|
404
|
+
if (source >= 1) {
|
|
405
|
+
const result = await executeProviderRead(params.url, source);
|
|
406
|
+
return {
|
|
407
|
+
content: [
|
|
408
|
+
{
|
|
409
|
+
type: "text",
|
|
410
|
+
text: `Content from ${result.url}:\n\n${result.content}`,
|
|
411
|
+
},
|
|
412
|
+
],
|
|
413
|
+
details: {},
|
|
414
|
+
};
|
|
415
|
+
}
|
|
253
416
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
417
|
+
// Smart-fetch engine
|
|
418
|
+
const result = await executeSmartFetchRead(params.url, {
|
|
419
|
+
browser: params.browser,
|
|
420
|
+
os: params.os,
|
|
421
|
+
format: params.format as FetchOptions["format"],
|
|
422
|
+
maxChars: params.maxChars,
|
|
423
|
+
timeoutMs: params.timeoutMs,
|
|
424
|
+
removeImages: params.removeImages,
|
|
425
|
+
includeReplies: params.includeReplies as FetchOptions["includeReplies"],
|
|
426
|
+
proxy: params.proxy,
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
return {
|
|
430
|
+
content: [
|
|
431
|
+
{
|
|
432
|
+
type: "text",
|
|
433
|
+
text: formatSingleResult(result, verbose),
|
|
434
|
+
},
|
|
435
|
+
],
|
|
436
|
+
details: {
|
|
437
|
+
url: result.url,
|
|
438
|
+
finalUrl: result.finalUrl,
|
|
439
|
+
title: result.title,
|
|
440
|
+
wordCount: result.wordCount,
|
|
259
441
|
},
|
|
260
|
-
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// Batch URLs
|
|
446
|
+
if (Array.isArray(params.url)) {
|
|
447
|
+
if (params.url.length === 0) {
|
|
448
|
+
return {
|
|
449
|
+
content: [{ type: "text", text: "No URLs provided." }],
|
|
450
|
+
details: {},
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// Provider fallback for batch (fetch each individually)
|
|
455
|
+
if (source >= 1) {
|
|
456
|
+
const results = await Promise.all(
|
|
457
|
+
params.url.map(async (url) => {
|
|
458
|
+
try {
|
|
459
|
+
const result = await executeProviderRead(url, source);
|
|
460
|
+
return { url, status: "done", content: result.content };
|
|
461
|
+
} catch (error) {
|
|
462
|
+
return {
|
|
463
|
+
url,
|
|
464
|
+
status: "error",
|
|
465
|
+
error: error instanceof Error ? error.message : String(error),
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
})
|
|
469
|
+
);
|
|
470
|
+
|
|
471
|
+
const text = results
|
|
472
|
+
.map((r, i) => {
|
|
473
|
+
if (r.status === "done") {
|
|
474
|
+
return `[${i + 1}] ${r.url}\n${r.content}`;
|
|
475
|
+
}
|
|
476
|
+
return `[${i + 1}] ${r.url}\nError: ${r.error}`;
|
|
477
|
+
})
|
|
478
|
+
.join("\n\n---\n\n");
|
|
479
|
+
|
|
480
|
+
return {
|
|
481
|
+
content: [{ type: "text", text }],
|
|
482
|
+
details: { total: results.length },
|
|
483
|
+
};
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// Smart-fetch batch
|
|
487
|
+
const result = await executeSmartFetchBatch(params.url, {
|
|
488
|
+
browser: params.browser,
|
|
489
|
+
os: params.os,
|
|
490
|
+
format: params.format as FetchOptions["format"],
|
|
491
|
+
maxChars: params.maxChars,
|
|
492
|
+
timeoutMs: params.timeoutMs,
|
|
493
|
+
removeImages: params.removeImages,
|
|
494
|
+
includeReplies: params.includeReplies as FetchOptions["includeReplies"],
|
|
495
|
+
proxy: params.proxy,
|
|
496
|
+
batchConcurrency: params.batchConcurrency,
|
|
497
|
+
});
|
|
498
|
+
|
|
499
|
+
return {
|
|
500
|
+
content: [
|
|
501
|
+
{
|
|
502
|
+
type: "text",
|
|
503
|
+
text: formatBatchResult(result),
|
|
504
|
+
},
|
|
505
|
+
],
|
|
506
|
+
details: {
|
|
507
|
+
total: result.total,
|
|
508
|
+
succeeded: result.succeeded,
|
|
509
|
+
failed: result.failed,
|
|
510
|
+
},
|
|
511
|
+
};
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// Should never reach here
|
|
515
|
+
return {
|
|
516
|
+
content: [{ type: "text", text: "Invalid url parameter." }],
|
|
517
|
+
isError: true,
|
|
261
518
|
details: {},
|
|
262
519
|
};
|
|
263
520
|
} catch (error) {
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @unipi/web-api — TUI Progress Renderer
|
|
3
|
+
*
|
|
4
|
+
* Renders batch fetch progress for TUI display.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { FetchProgress, FetchProgressStatus } from "../engine/types.js";
|
|
8
|
+
|
|
9
|
+
/** Spinner frames for animation */
|
|
10
|
+
const SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
|
|
11
|
+
|
|
12
|
+
/** Status glyphs */
|
|
13
|
+
const STATUS_GLYPHS: Record<FetchProgressStatus, string> = {
|
|
14
|
+
queued: "○",
|
|
15
|
+
connecting: SPINNER_FRAMES[0],
|
|
16
|
+
waiting: SPINNER_FRAMES[0],
|
|
17
|
+
loading: SPINNER_FRAMES[0],
|
|
18
|
+
processing: SPINNER_FRAMES[0],
|
|
19
|
+
done: "✓",
|
|
20
|
+
error: "✗",
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Get a spinner frame for the given index.
|
|
25
|
+
* Cycles through spinner frames for animation.
|
|
26
|
+
*
|
|
27
|
+
* @param index - Animation frame index
|
|
28
|
+
* @returns Spinner character
|
|
29
|
+
*/
|
|
30
|
+
export function getSpinnerFrame(index: number): string {
|
|
31
|
+
return SPINNER_FRAMES[index % SPINNER_FRAMES.length];
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Render a progress bar.
|
|
36
|
+
*
|
|
37
|
+
* @param percent - Progress percentage (0-100)
|
|
38
|
+
* @param width - Bar width in characters
|
|
39
|
+
* @returns Progress bar string
|
|
40
|
+
*/
|
|
41
|
+
export function renderProgressBar(percent: number, width: number = 10): string {
|
|
42
|
+
const filled = Math.round((percent / 100) * width);
|
|
43
|
+
const empty = width - filled;
|
|
44
|
+
return "█".repeat(filled) + "░".repeat(empty);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Truncate a URL for display.
|
|
49
|
+
*
|
|
50
|
+
* @param url - URL to truncate
|
|
51
|
+
* @param maxLength - Maximum length
|
|
52
|
+
* @returns Truncated URL
|
|
53
|
+
*/
|
|
54
|
+
function truncateUrl(url: string, maxLength: number): string {
|
|
55
|
+
if (url.length <= maxLength) {
|
|
56
|
+
return url;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Try to keep the domain
|
|
60
|
+
try {
|
|
61
|
+
const parsed = new URL(url);
|
|
62
|
+
const domain = parsed.host;
|
|
63
|
+
const path = parsed.pathname + parsed.search;
|
|
64
|
+
|
|
65
|
+
if (domain.length + 3 >= maxLength) {
|
|
66
|
+
return url.slice(0, maxLength - 1) + "…";
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const remaining = maxLength - domain.length - 3;
|
|
70
|
+
if (path.length <= remaining) {
|
|
71
|
+
return domain + path;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return domain + path.slice(0, remaining - 1) + "…";
|
|
75
|
+
} catch {
|
|
76
|
+
return url.slice(0, maxLength - 1) + "…";
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Render a single progress item line.
|
|
82
|
+
*
|
|
83
|
+
* @param progress - Progress object
|
|
84
|
+
* @param width - Available width
|
|
85
|
+
* @param spinnerIndex - Animation frame index
|
|
86
|
+
* @returns Formatted line
|
|
87
|
+
*/
|
|
88
|
+
export function renderProgressLine(
|
|
89
|
+
progress: FetchProgress,
|
|
90
|
+
width: number = 80,
|
|
91
|
+
spinnerIndex: number = 0
|
|
92
|
+
): string {
|
|
93
|
+
// Status glyph
|
|
94
|
+
let glyph = STATUS_GLYPHS[progress.status];
|
|
95
|
+
if (["connecting", "waiting", "loading", "processing"].includes(progress.status)) {
|
|
96
|
+
glyph = getSpinnerFrame(spinnerIndex);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Truncate URL
|
|
100
|
+
const urlMax = Math.min(40, width - 30);
|
|
101
|
+
const url = truncateUrl(progress.url, urlMax);
|
|
102
|
+
|
|
103
|
+
// Progress bar
|
|
104
|
+
const bar = renderProgressBar(progress.percent, 8);
|
|
105
|
+
|
|
106
|
+
// Status text
|
|
107
|
+
const statusText = progress.phase || progress.status;
|
|
108
|
+
|
|
109
|
+
// Format line
|
|
110
|
+
return `${glyph} ${url.padEnd(urlMax)} ${statusText.padEnd(12)} [${bar}]`;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Render batch progress header.
|
|
115
|
+
*
|
|
116
|
+
* @param progress - All progress items
|
|
117
|
+
* @param concurrency - Current concurrency
|
|
118
|
+
* @returns Header line
|
|
119
|
+
*/
|
|
120
|
+
export function renderBatchProgressHeader(
|
|
121
|
+
progress: FetchProgress[],
|
|
122
|
+
concurrency: number
|
|
123
|
+
): string {
|
|
124
|
+
const total = progress.length;
|
|
125
|
+
const done = progress.filter((p) => p.status === "done").length;
|
|
126
|
+
const error = progress.filter((p) => p.status === "error").length;
|
|
127
|
+
const active = progress.filter(
|
|
128
|
+
(p) => !["queued", "done", "error"].includes(p.status)
|
|
129
|
+
).length;
|
|
130
|
+
|
|
131
|
+
return `batch_web_content_read ${done}/${total} done · ok ${done - error} · err ${error} · concurrency ${concurrency}`;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Render full batch progress display.
|
|
136
|
+
*
|
|
137
|
+
* @param progress - All progress items
|
|
138
|
+
* @param concurrency - Current concurrency
|
|
139
|
+
* @param width - Available width
|
|
140
|
+
* @param spinnerIndex - Animation frame index
|
|
141
|
+
* @returns Formatted string
|
|
142
|
+
*/
|
|
143
|
+
export function renderBatchProgress(
|
|
144
|
+
progress: FetchProgress[],
|
|
145
|
+
concurrency: number = 8,
|
|
146
|
+
width: number = 80,
|
|
147
|
+
spinnerIndex: number = 0
|
|
148
|
+
): string {
|
|
149
|
+
const lines: string[] = [];
|
|
150
|
+
|
|
151
|
+
// Header
|
|
152
|
+
lines.push(renderBatchProgressHeader(progress, concurrency));
|
|
153
|
+
lines.push("");
|
|
154
|
+
|
|
155
|
+
// Progress items (show up to 10)
|
|
156
|
+
const maxItems = 10;
|
|
157
|
+
const itemsToShow = progress.slice(0, maxItems);
|
|
158
|
+
|
|
159
|
+
for (const item of itemsToShow) {
|
|
160
|
+
lines.push(renderProgressLine(item, width, spinnerIndex));
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (progress.length > maxItems) {
|
|
164
|
+
lines.push(` ... and ${progress.length - maxItems} more`);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
return lines.join("\n");
|
|
168
|
+
}
|