glance-cli 0.13.0 â 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/dist/cli.js +136 -1065
- package/package.json +3 -2
- package/src/cli/commands.ts +832 -0
- package/src/cli/config.ts +24 -0
- package/src/cli/display.ts +269 -0
- package/src/cli/errors.ts +31 -0
- package/src/cli/index.ts +237 -0
- package/src/cli/logger.ts +43 -0
- package/src/cli/types.ts +114 -0
- package/src/cli/utils.ts +239 -0
- package/src/cli/validators.ts +176 -0
- package/src/cli.ts +17 -0
- package/src/core/compat.ts +96 -0
- package/src/core/extractor.ts +532 -0
- package/src/core/fetcher.ts +592 -0
- package/src/core/formatter.ts +742 -0
- package/src/core/language-detector.ts +382 -0
- package/src/core/screenshot.ts +444 -0
- package/src/core/service-detector.ts +411 -0
- package/src/core/summarizer.ts +656 -0
- package/src/core/text-cleaner.ts +150 -0
- package/src/core/voice.ts +708 -0
|
@@ -0,0 +1,742 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Production-Grade Output Formatter
|
|
3
|
+
*
|
|
4
|
+
* Features:
|
|
5
|
+
* - Multiple output formats (terminal, markdown, JSON, HTML, plain text)
|
|
6
|
+
* - Input validation and sanitization
|
|
7
|
+
* - Configurable styling and themes
|
|
8
|
+
* - Metadata enrichment
|
|
9
|
+
* - Error handling
|
|
10
|
+
* - Extensible architecture
|
|
11
|
+
* - Performance metrics inclusion
|
|
12
|
+
* - Export-ready formats
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import chalk from "chalk";
|
|
16
|
+
import { hasBinaryArtifacts, nuclearCleanText } from "./text-cleaner";
|
|
17
|
+
|
|
18
|
+
// === Types ===
|
|
19
|
+
|
|
20
|
+
export interface PageMetadata {
|
|
21
|
+
title?: string;
|
|
22
|
+
description?: string;
|
|
23
|
+
author?: string;
|
|
24
|
+
publishDate?: string;
|
|
25
|
+
keywords?: string[];
|
|
26
|
+
language?: string;
|
|
27
|
+
siteName?: string;
|
|
28
|
+
type?: string;
|
|
29
|
+
[key: string]: any;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface FormatOptions {
|
|
33
|
+
/** Output format */
|
|
34
|
+
format?: "terminal" | "markdown" | "json" | "html" | "plain";
|
|
35
|
+
/** Legacy: Enable markdown format */
|
|
36
|
+
markdown?: boolean;
|
|
37
|
+
/** Legacy: Enable JSON format */
|
|
38
|
+
json?: boolean;
|
|
39
|
+
/** Page metadata */
|
|
40
|
+
metadata?: PageMetadata;
|
|
41
|
+
/** Source URL */
|
|
42
|
+
url: string;
|
|
43
|
+
/** Custom question asked */
|
|
44
|
+
customQuestion?: string;
|
|
45
|
+
/** Include metadata in output */
|
|
46
|
+
includeMetadata?: boolean;
|
|
47
|
+
/** Include timestamp */
|
|
48
|
+
includeTimestamp?: boolean;
|
|
49
|
+
/** Model used for generation */
|
|
50
|
+
model?: string;
|
|
51
|
+
/** Processing time in milliseconds */
|
|
52
|
+
processingTime?: number;
|
|
53
|
+
/** Cache hit indicator */
|
|
54
|
+
fromCache?: boolean;
|
|
55
|
+
/** Compact mode (less whitespace) */
|
|
56
|
+
compact?: boolean;
|
|
57
|
+
/** Custom title override */
|
|
58
|
+
customTitle?: string;
|
|
59
|
+
/** Flag indicating this is full content, not a summary */
|
|
60
|
+
isFullContent?: boolean;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export interface FormattedOutput {
|
|
64
|
+
/** Formatted content string */
|
|
65
|
+
content: string;
|
|
66
|
+
/** Output format used */
|
|
67
|
+
format: string;
|
|
68
|
+
/** Character count */
|
|
69
|
+
length: number;
|
|
70
|
+
/** Line count */
|
|
71
|
+
lines: number;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// === Constants ===
|
|
75
|
+
|
|
76
|
+
const EMOJI = {
|
|
77
|
+
source: "đ",
|
|
78
|
+
title: "đ",
|
|
79
|
+
summary: "â¨",
|
|
80
|
+
answer: "đĄ",
|
|
81
|
+
question: "â",
|
|
82
|
+
metadata: "âšī¸",
|
|
83
|
+
time: "âąī¸",
|
|
84
|
+
model: "đ¤",
|
|
85
|
+
cache: "âĄ",
|
|
86
|
+
date: "đ
",
|
|
87
|
+
} as const;
|
|
88
|
+
|
|
89
|
+
const _DEFAULT_TITLE = "Unknown Title";
|
|
90
|
+
const MAX_TITLE_LENGTH = 100;
|
|
91
|
+
|
|
92
|
+
// === Validation & Sanitization ===
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Validate and sanitize URL
|
|
96
|
+
*/
|
|
97
|
+
function sanitizeURL(url: string): string {
|
|
98
|
+
try {
|
|
99
|
+
const parsed = new URL(url);
|
|
100
|
+
return parsed.href;
|
|
101
|
+
} catch {
|
|
102
|
+
// Return as-is if not valid URL (might be relative or invalid)
|
|
103
|
+
return url;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Sanitize text for safe output with comprehensive encoding cleanup
|
|
109
|
+
*/
|
|
110
|
+
function sanitizeText(text: string, maxLength?: number): string {
|
|
111
|
+
if (!text) return "";
|
|
112
|
+
|
|
113
|
+
let sanitized = text.trim();
|
|
114
|
+
|
|
115
|
+
// Remove null bytes and control characters that can cause terminal issues
|
|
116
|
+
sanitized = sanitized.replace(/\x00/g, "");
|
|
117
|
+
// Remove DEL character and other problematic control characters
|
|
118
|
+
sanitized = sanitized.replace(/[\x01-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]/g, "");
|
|
119
|
+
// Remove Unicode replacement characters that indicate encoding problems
|
|
120
|
+
sanitized = sanitized.replace(/[\uFFFD\uFEFF]/g, "");
|
|
121
|
+
// Remove zero-width characters that can cause display issues
|
|
122
|
+
sanitized = sanitized.replace(/[\u200B-\u200D\u2060\uFEFF]/g, "");
|
|
123
|
+
// Fix common Windows-1252 to UTF-8 encoding artifacts
|
|
124
|
+
sanitized = sanitized
|
|
125
|
+
.replace(/ÃĸâŦâĸ/g, "'") // Smart apostrophe
|
|
126
|
+
.replace(/ÃĸâŦÅ/g, '"') // Smart quote open
|
|
127
|
+
.replace(/ÃĸâŦ\x9D/g, '"') // Smart quote close
|
|
128
|
+
.replace(/ÃĸâŦ"/g, "â") // Em dash
|
|
129
|
+
.replace(/ÃĸâŦ\x93/g, "â") // En dash
|
|
130
|
+
.replace(/Ã /g, " ") // Non-breaking space issues
|
|
131
|
+
.replace(/ÃĸÂĸ/g, "âĸ") // Bullet point
|
|
132
|
+
.replace(/Ê/g, "Ê") // e with acute
|
|
133
|
+
.replace(/ÃÂĄ/g, "ÃĄ") // a with acute
|
|
134
|
+
.replace(/ÃÂ/g, "Ã") // i with acute
|
|
135
|
+
.replace(/ÃÂŗ/g, "Ãŗ") // o with acute
|
|
136
|
+
.replace(/ÃÂē/g, "Ãē") // u with acute
|
|
137
|
+
.replace(/ÃÂą/g, "Ãą") // n with tilde
|
|
138
|
+
.replace(/Ã\x87/g, "Ã"); // C with cedilla
|
|
139
|
+
|
|
140
|
+
// Remove remaining suspicious high-bit sequences that look like artifacts
|
|
141
|
+
sanitized = sanitized.replace(/[^\x00-\x7F\u00A0-\uFFFF]/g, "");
|
|
142
|
+
|
|
143
|
+
// Truncate if needed
|
|
144
|
+
if (maxLength && sanitized.length > maxLength) {
|
|
145
|
+
sanitized = `${sanitized.slice(0, maxLength - 3)}...`;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return sanitized;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Sanitize metadata
|
|
153
|
+
*/
|
|
154
|
+
function sanitizeMetadata(metadata: any): PageMetadata {
|
|
155
|
+
if (!metadata || typeof metadata !== "object") {
|
|
156
|
+
return {};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return {
|
|
160
|
+
title: sanitizeText(metadata.title, MAX_TITLE_LENGTH),
|
|
161
|
+
description: sanitizeText(metadata.description, 500),
|
|
162
|
+
author: sanitizeText(metadata.author, 100),
|
|
163
|
+
publishDate: sanitizeText(metadata.publishDate, 50),
|
|
164
|
+
keywords: Array.isArray(metadata.keywords)
|
|
165
|
+
? metadata.keywords
|
|
166
|
+
.slice(0, 10)
|
|
167
|
+
.map((k: any) => sanitizeText(String(k), 50))
|
|
168
|
+
: undefined,
|
|
169
|
+
language: sanitizeText(metadata.language, 10),
|
|
170
|
+
siteName: sanitizeText(metadata.siteName, 100),
|
|
171
|
+
type: sanitizeText(metadata.type, 50),
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// === Format Implementations ===
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Format for terminal output with colors and emojis
|
|
179
|
+
*/
|
|
180
|
+
function formatTerminal(
|
|
181
|
+
summary: string,
|
|
182
|
+
options: Required<FormatOptions>,
|
|
183
|
+
): string {
|
|
184
|
+
const parts: string[] = [];
|
|
185
|
+
|
|
186
|
+
// Question (if custom question was asked)
|
|
187
|
+
if (options.customQuestion) {
|
|
188
|
+
parts.push(
|
|
189
|
+
chalk.bold.yellow(`${EMOJI.question} Question: `) +
|
|
190
|
+
chalk.white(options.customQuestion),
|
|
191
|
+
);
|
|
192
|
+
parts.push("");
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Source URL
|
|
196
|
+
parts.push(
|
|
197
|
+
chalk.bold.cyan(`${EMOJI.source} Source: `) + chalk.underline(options.url),
|
|
198
|
+
);
|
|
199
|
+
|
|
200
|
+
// Page metadata
|
|
201
|
+
const meta = options.metadata;
|
|
202
|
+
if (meta.title) {
|
|
203
|
+
parts.push(
|
|
204
|
+
chalk.bold.green(`${EMOJI.title} Title: `) + chalk.white(meta.title),
|
|
205
|
+
);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Additional metadata (only if includeMetadata is true)
|
|
209
|
+
if (options.includeMetadata && !options.compact) {
|
|
210
|
+
if (meta.author) {
|
|
211
|
+
parts.push(chalk.gray(` Author: ${meta.author}`));
|
|
212
|
+
}
|
|
213
|
+
if (meta.publishDate) {
|
|
214
|
+
parts.push(chalk.gray(` Published: ${meta.publishDate}`));
|
|
215
|
+
}
|
|
216
|
+
if (meta.siteName) {
|
|
217
|
+
parts.push(chalk.gray(` Site: ${meta.siteName}`));
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Processing info (if available)
|
|
222
|
+
if (!options.compact) {
|
|
223
|
+
const processingParts: string[] = [];
|
|
224
|
+
|
|
225
|
+
if (options.model) {
|
|
226
|
+
processingParts.push(`${EMOJI.model} ${options.model}`);
|
|
227
|
+
}
|
|
228
|
+
if (options.processingTime !== undefined) {
|
|
229
|
+
processingParts.push(
|
|
230
|
+
`${EMOJI.time} ${(options.processingTime / 1000).toFixed(1)}s`,
|
|
231
|
+
);
|
|
232
|
+
}
|
|
233
|
+
if (options.fromCache) {
|
|
234
|
+
processingParts.push(`${EMOJI.cache} cached`);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if (processingParts.length > 0) {
|
|
238
|
+
parts.push(chalk.dim(processingParts.join(" | ")));
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
parts.push(""); // Blank line
|
|
243
|
+
|
|
244
|
+
// Main content
|
|
245
|
+
const contentTitle = options.customQuestion
|
|
246
|
+
? "Answer"
|
|
247
|
+
: options.isFullContent
|
|
248
|
+
? "Full Content"
|
|
249
|
+
: options.customTitle || "Summary";
|
|
250
|
+
const contentEmoji = options.customQuestion
|
|
251
|
+
? EMOJI.answer
|
|
252
|
+
: options.isFullContent
|
|
253
|
+
? "đ"
|
|
254
|
+
: EMOJI.summary;
|
|
255
|
+
parts.push(chalk.bold.magenta(`${contentEmoji} ${contentTitle}:`));
|
|
256
|
+
parts.push("");
|
|
257
|
+
parts.push(chalk.white(summary));
|
|
258
|
+
|
|
259
|
+
// Timestamp (if enabled)
|
|
260
|
+
if (options.includeTimestamp && !options.compact) {
|
|
261
|
+
parts.push("");
|
|
262
|
+
parts.push(
|
|
263
|
+
chalk.dim(`${EMOJI.date} Generated: ${new Date().toLocaleString()}`),
|
|
264
|
+
);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
return parts.join("\n");
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Format as Markdown
|
|
272
|
+
*/
|
|
273
|
+
function formatMarkdown(
|
|
274
|
+
summary: string,
|
|
275
|
+
options: Required<FormatOptions>,
|
|
276
|
+
): string {
|
|
277
|
+
const parts: string[] = [];
|
|
278
|
+
|
|
279
|
+
// Title
|
|
280
|
+
const title = options.customQuestion
|
|
281
|
+
? "Answer"
|
|
282
|
+
: options.customTitle || "Summary";
|
|
283
|
+
parts.push(`# ${title}`);
|
|
284
|
+
parts.push("");
|
|
285
|
+
|
|
286
|
+
// Question (if asked)
|
|
287
|
+
if (options.customQuestion) {
|
|
288
|
+
parts.push(`**Question:** ${options.customQuestion}`);
|
|
289
|
+
parts.push("");
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Source
|
|
293
|
+
parts.push(`**Source:** [${options.url}](${options.url})`);
|
|
294
|
+
|
|
295
|
+
// Metadata
|
|
296
|
+
const meta = options.metadata;
|
|
297
|
+
if (meta.title) {
|
|
298
|
+
parts.push(`**Title:** ${meta.title}`);
|
|
299
|
+
}
|
|
300
|
+
if (options.includeMetadata) {
|
|
301
|
+
if (meta.author) {
|
|
302
|
+
parts.push(`**Author:** ${meta.author}`);
|
|
303
|
+
}
|
|
304
|
+
if (meta.publishDate) {
|
|
305
|
+
parts.push(`**Published:** ${meta.publishDate}`);
|
|
306
|
+
}
|
|
307
|
+
if (meta.description) {
|
|
308
|
+
parts.push(`**Description:** ${meta.description}`);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
parts.push("");
|
|
313
|
+
|
|
314
|
+
// Processing info
|
|
315
|
+
if (options.model || options.processingTime !== undefined) {
|
|
316
|
+
const processingParts: string[] = [];
|
|
317
|
+
if (options.model) {
|
|
318
|
+
processingParts.push(`Model: ${options.model}`);
|
|
319
|
+
}
|
|
320
|
+
if (options.processingTime !== undefined) {
|
|
321
|
+
processingParts.push(
|
|
322
|
+
`Time: ${(options.processingTime / 1000).toFixed(1)}s`,
|
|
323
|
+
);
|
|
324
|
+
}
|
|
325
|
+
if (options.fromCache) {
|
|
326
|
+
processingParts.push("Cached: Yes");
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
if (processingParts.length > 0) {
|
|
330
|
+
parts.push(`*${processingParts.join(" | ")}*`);
|
|
331
|
+
parts.push("");
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Main content
|
|
336
|
+
parts.push("---");
|
|
337
|
+
parts.push("");
|
|
338
|
+
parts.push(summary);
|
|
339
|
+
|
|
340
|
+
// Timestamp
|
|
341
|
+
if (options.includeTimestamp) {
|
|
342
|
+
parts.push("");
|
|
343
|
+
parts.push("---");
|
|
344
|
+
parts.push(`*Generated: ${new Date().toISOString()}*`);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
return parts.join("\n");
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
/**
|
|
351
|
+
* Format as JSON
|
|
352
|
+
*/
|
|
353
|
+
function formatJSON(summary: string, options: Required<FormatOptions>): string {
|
|
354
|
+
const output: any = {
|
|
355
|
+
url: options.url,
|
|
356
|
+
content: summary,
|
|
357
|
+
metadata: {
|
|
358
|
+
title: options.metadata.title || null,
|
|
359
|
+
description: options.metadata.description || null,
|
|
360
|
+
author: options.metadata.author || null,
|
|
361
|
+
publishDate: options.metadata.publishDate || null,
|
|
362
|
+
},
|
|
363
|
+
};
|
|
364
|
+
|
|
365
|
+
if (options.customQuestion) {
|
|
366
|
+
output.question = options.customQuestion;
|
|
367
|
+
output.type = "answer";
|
|
368
|
+
} else if (options.isFullContent) {
|
|
369
|
+
output.type = "full_content";
|
|
370
|
+
} else {
|
|
371
|
+
output.type = "summary";
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
if (options.includeMetadata) {
|
|
375
|
+
output.metadata = {
|
|
376
|
+
...output.metadata,
|
|
377
|
+
...options.metadata,
|
|
378
|
+
};
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
if (options.model) {
|
|
382
|
+
output.model = options.model;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
if (options.processingTime !== undefined) {
|
|
386
|
+
output.processingTimeMs = options.processingTime;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
if (options.fromCache !== undefined) {
|
|
390
|
+
output.fromCache = options.fromCache;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
if (options.includeTimestamp) {
|
|
394
|
+
output.generatedAt = new Date().toISOString();
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
return JSON.stringify(output, null, options.compact ? 0 : 2);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
/**
|
|
401
|
+
* Format as HTML
|
|
402
|
+
*/
|
|
403
|
+
function formatHTML(summary: string, options: Required<FormatOptions>): string {
|
|
404
|
+
const meta = options.metadata;
|
|
405
|
+
const title = options.customQuestion
|
|
406
|
+
? "Answer"
|
|
407
|
+
: options.customTitle || "Summary";
|
|
408
|
+
|
|
409
|
+
// Escape HTML
|
|
410
|
+
const escapeHTML = (str: string) =>
|
|
411
|
+
str.replace(
|
|
412
|
+
/[&<>"']/g,
|
|
413
|
+
(char) =>
|
|
414
|
+
({
|
|
415
|
+
"&": "&",
|
|
416
|
+
"<": "<",
|
|
417
|
+
">": ">",
|
|
418
|
+
'"': """,
|
|
419
|
+
"'": "'",
|
|
420
|
+
})[char] || char,
|
|
421
|
+
);
|
|
422
|
+
|
|
423
|
+
const parts: string[] = [];
|
|
424
|
+
|
|
425
|
+
parts.push("<!DOCTYPE html>");
|
|
426
|
+
parts.push('<html lang="en">');
|
|
427
|
+
parts.push("<head>");
|
|
428
|
+
parts.push(' <meta charset="UTF-8">');
|
|
429
|
+
parts.push(
|
|
430
|
+
' <meta name="viewport" content="width=device-width, initial-scale=1.0">',
|
|
431
|
+
);
|
|
432
|
+
parts.push(` <title>${escapeHTML(meta.title || title)}</title>`);
|
|
433
|
+
parts.push(" <style>");
|
|
434
|
+
parts.push(
|
|
435
|
+
" body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; max-width: 800px; margin: 40px auto; padding: 20px; line-height: 1.6; color: #333; }",
|
|
436
|
+
);
|
|
437
|
+
parts.push(
|
|
438
|
+
" h1 { color: #2c3e50; border-bottom: 3px solid #3498db; padding-bottom: 10px; }",
|
|
439
|
+
);
|
|
440
|
+
parts.push(
|
|
441
|
+
" .metadata { background: #f8f9fa; padding: 15px; border-radius: 5px; margin: 20px 0; }",
|
|
442
|
+
);
|
|
443
|
+
parts.push(" .metadata p { margin: 5px 0; color: #666; }");
|
|
444
|
+
parts.push(" .content { margin: 30px 0; }");
|
|
445
|
+
parts.push(
|
|
446
|
+
" .question { background: #fff3cd; padding: 15px; border-left: 4px solid #ffc107; margin: 20px 0; }",
|
|
447
|
+
);
|
|
448
|
+
parts.push(
|
|
449
|
+
" .footer { margin-top: 40px; padding-top: 20px; border-top: 1px solid #ddd; font-size: 0.9em; color: #666; }",
|
|
450
|
+
);
|
|
451
|
+
parts.push(" a { color: #3498db; text-decoration: none; }");
|
|
452
|
+
parts.push(" a:hover { text-decoration: underline; }");
|
|
453
|
+
parts.push(" </style>");
|
|
454
|
+
parts.push("</head>");
|
|
455
|
+
parts.push("<body>");
|
|
456
|
+
|
|
457
|
+
// Title
|
|
458
|
+
parts.push(` <h1>${escapeHTML(title)}</h1>`);
|
|
459
|
+
|
|
460
|
+
// Question
|
|
461
|
+
if (options.customQuestion) {
|
|
462
|
+
parts.push(' <div class="question">');
|
|
463
|
+
parts.push(
|
|
464
|
+
` <strong>Question:</strong> ${escapeHTML(options.customQuestion)}`,
|
|
465
|
+
);
|
|
466
|
+
parts.push(" </div>");
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// Metadata
|
|
470
|
+
parts.push(' <div class="metadata">');
|
|
471
|
+
parts.push(
|
|
472
|
+
` <p><strong>Source:</strong> <a href="${escapeHTML(options.url)}" target="_blank">${escapeHTML(options.url)}</a></p>`,
|
|
473
|
+
);
|
|
474
|
+
if (meta.title) {
|
|
475
|
+
parts.push(` <p><strong>Title:</strong> ${escapeHTML(meta.title)}</p>`);
|
|
476
|
+
}
|
|
477
|
+
if (options.includeMetadata) {
|
|
478
|
+
if (meta.author) {
|
|
479
|
+
parts.push(
|
|
480
|
+
` <p><strong>Author:</strong> ${escapeHTML(meta.author)}</p>`,
|
|
481
|
+
);
|
|
482
|
+
}
|
|
483
|
+
if (meta.publishDate) {
|
|
484
|
+
parts.push(
|
|
485
|
+
` <p><strong>Published:</strong> ${escapeHTML(meta.publishDate)}</p>`,
|
|
486
|
+
);
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
if (options.model) {
|
|
490
|
+
parts.push(
|
|
491
|
+
` <p><strong>Model:</strong> ${escapeHTML(options.model)}</p>`,
|
|
492
|
+
);
|
|
493
|
+
}
|
|
494
|
+
parts.push(" </div>");
|
|
495
|
+
|
|
496
|
+
// Content
|
|
497
|
+
parts.push(' <div class="content">');
|
|
498
|
+
// Convert line breaks to paragraphs
|
|
499
|
+
const paragraphs = summary.split("\n\n");
|
|
500
|
+
paragraphs.forEach((para) => {
|
|
501
|
+
if (para.trim()) {
|
|
502
|
+
parts.push(` <p>${escapeHTML(para).replace(/\n/g, "<br>")}</p>`);
|
|
503
|
+
}
|
|
504
|
+
});
|
|
505
|
+
parts.push(" </div>");
|
|
506
|
+
|
|
507
|
+
// Footer
|
|
508
|
+
if (options.includeTimestamp) {
|
|
509
|
+
parts.push(' <div class="footer">');
|
|
510
|
+
parts.push(` <p>Generated: ${new Date().toLocaleString()}</p>`);
|
|
511
|
+
parts.push(" </div>");
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
parts.push("</body>");
|
|
515
|
+
parts.push("</html>");
|
|
516
|
+
|
|
517
|
+
return parts.join("\n");
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
/**
|
|
521
|
+
* Format as plain text (no colors, minimal formatting)
|
|
522
|
+
*/
|
|
523
|
+
function formatPlainText(
|
|
524
|
+
summary: string,
|
|
525
|
+
options: Required<FormatOptions>,
|
|
526
|
+
): string {
|
|
527
|
+
const parts: string[] = [];
|
|
528
|
+
|
|
529
|
+
// Title
|
|
530
|
+
const title = options.customQuestion
|
|
531
|
+
? "ANSWER"
|
|
532
|
+
: (options.customTitle || "SUMMARY").toUpperCase();
|
|
533
|
+
parts.push(title);
|
|
534
|
+
parts.push("=".repeat(title.length));
|
|
535
|
+
parts.push("");
|
|
536
|
+
|
|
537
|
+
// Question
|
|
538
|
+
if (options.customQuestion) {
|
|
539
|
+
parts.push(`Question: ${options.customQuestion}`);
|
|
540
|
+
parts.push("");
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// Source
|
|
544
|
+
parts.push(`Source: ${options.url}`);
|
|
545
|
+
|
|
546
|
+
// Metadata
|
|
547
|
+
const meta = options.metadata;
|
|
548
|
+
if (meta.title) {
|
|
549
|
+
parts.push(`Title: ${meta.title}`);
|
|
550
|
+
}
|
|
551
|
+
if (options.includeMetadata) {
|
|
552
|
+
if (meta.author) {
|
|
553
|
+
parts.push(`Author: ${meta.author}`);
|
|
554
|
+
}
|
|
555
|
+
if (meta.publishDate) {
|
|
556
|
+
parts.push(`Published: ${meta.publishDate}`);
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
parts.push("");
|
|
561
|
+
parts.push("-".repeat(60));
|
|
562
|
+
parts.push("");
|
|
563
|
+
|
|
564
|
+
// Content
|
|
565
|
+
parts.push(summary);
|
|
566
|
+
|
|
567
|
+
// Footer
|
|
568
|
+
if (options.includeTimestamp) {
|
|
569
|
+
parts.push("");
|
|
570
|
+
parts.push("-".repeat(60));
|
|
571
|
+
parts.push(`Generated: ${new Date().toISOString()}`);
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
return parts.join("\n");
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
// === Main Export Function ===
|
|
578
|
+
|
|
579
|
+
/**
|
|
580
|
+
* Format content for output
|
|
581
|
+
*
|
|
582
|
+
* @param summary - The content to format
|
|
583
|
+
* @param options - Formatting options
|
|
584
|
+
* @returns Formatted content string
|
|
585
|
+
*
|
|
586
|
+
* @example
|
|
587
|
+
* // Terminal output (default)
|
|
588
|
+
* const output = formatOutput(summary, { url: "https://example.com" });
|
|
589
|
+
*
|
|
590
|
+
* @example
|
|
591
|
+
* // Markdown format
|
|
592
|
+
* const md = formatOutput(summary, {
|
|
593
|
+
* url: "https://example.com",
|
|
594
|
+
* format: "markdown",
|
|
595
|
+
* includeMetadata: true
|
|
596
|
+
* });
|
|
597
|
+
*
|
|
598
|
+
* @example
|
|
599
|
+
* // JSON format
|
|
600
|
+
* const json = formatOutput(summary, {
|
|
601
|
+
* url: "https://example.com",
|
|
602
|
+
* json: true,
|
|
603
|
+
* model: "gpt-4o-mini",
|
|
604
|
+
* processingTime: 2500
|
|
605
|
+
* });
|
|
606
|
+
*/
|
|
607
|
+
export function formatOutput(summary: string, options: FormatOptions): string {
|
|
608
|
+
// Input validation
|
|
609
|
+
if (!summary || typeof summary !== "string") {
|
|
610
|
+
throw new Error("Summary must be a non-empty string");
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
if (!options || typeof options !== "object") {
|
|
614
|
+
throw new Error("Options must be an object");
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
if (!options.url) {
|
|
618
|
+
throw new Error("URL is required in options");
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
// Smart cleaning: Preserve formatting while removing artifacts
|
|
622
|
+
let cleanSummary = summary.trim();
|
|
623
|
+
|
|
624
|
+
// Only apply nuclear cleaning if we detect actual binary artifacts
|
|
625
|
+
if (hasBinaryArtifacts(cleanSummary)) {
|
|
626
|
+
console.error(
|
|
627
|
+
"â ī¸ Binary artifacts detected in summary, applying nuclear cleaning...",
|
|
628
|
+
);
|
|
629
|
+
cleanSummary = nuclearCleanText(cleanSummary);
|
|
630
|
+
} else {
|
|
631
|
+
// For clean text, preserve formatting and only do minimal cleaning
|
|
632
|
+
cleanSummary = cleanSummary
|
|
633
|
+
// Remove dangerous artifacts but preserve newlines
|
|
634
|
+
.replace(
|
|
635
|
+
/\b(console|warn|error|log|TextDecoder|Buffer|ArrayBuffer)\b/gi,
|
|
636
|
+
"",
|
|
637
|
+
)
|
|
638
|
+
.replace(/\b(cache|hits|lastAccessed|accessCount|Decompression)\b/gi, "")
|
|
639
|
+
.replace(/\b0x[0-9A-Fa-f]+/g, "")
|
|
640
|
+
// Clean up extra spaces within lines but preserve paragraph structure
|
|
641
|
+
.replace(/ {2,}/g, " ") // Multiple spaces become single space
|
|
642
|
+
.replace(/\t/g, " ") // Tabs become spaces
|
|
643
|
+
.trim();
|
|
644
|
+
}
|
|
645
|
+
const cleanURL = sanitizeURL(options.url);
|
|
646
|
+
const cleanMetadata = sanitizeMetadata(options.metadata);
|
|
647
|
+
|
|
648
|
+
// Determine format (priority: format > json > markdown > default)
|
|
649
|
+
let format: "terminal" | "markdown" | "json" | "html" | "plain" = "terminal";
|
|
650
|
+
|
|
651
|
+
if (options.format) {
|
|
652
|
+
format = options.format;
|
|
653
|
+
} else if (options.json) {
|
|
654
|
+
format = "json";
|
|
655
|
+
} else if (options.markdown) {
|
|
656
|
+
format = "markdown";
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
// Build complete options with defaults
|
|
660
|
+
const completeOptions: Required<FormatOptions> = {
|
|
661
|
+
format,
|
|
662
|
+
markdown: options.markdown ?? false,
|
|
663
|
+
json: options.json ?? false,
|
|
664
|
+
metadata: cleanMetadata,
|
|
665
|
+
url: cleanURL,
|
|
666
|
+
customQuestion: options.customQuestion || "",
|
|
667
|
+
includeMetadata: options.includeMetadata ?? false,
|
|
668
|
+
includeTimestamp: options.includeTimestamp ?? false,
|
|
669
|
+
model: options.model || "",
|
|
670
|
+
processingTime: options.processingTime || 0,
|
|
671
|
+
fromCache: options.fromCache || false,
|
|
672
|
+
compact: options.compact ?? false,
|
|
673
|
+
customTitle: options.customTitle || "",
|
|
674
|
+
isFullContent: options.isFullContent ?? false,
|
|
675
|
+
};
|
|
676
|
+
|
|
677
|
+
// Format based on type
|
|
678
|
+
let formatted: string;
|
|
679
|
+
|
|
680
|
+
switch (format) {
|
|
681
|
+
case "markdown":
|
|
682
|
+
formatted = formatMarkdown(cleanSummary, completeOptions);
|
|
683
|
+
break;
|
|
684
|
+
case "json":
|
|
685
|
+
formatted = formatJSON(cleanSummary, completeOptions);
|
|
686
|
+
break;
|
|
687
|
+
case "html":
|
|
688
|
+
formatted = formatHTML(cleanSummary, completeOptions);
|
|
689
|
+
break;
|
|
690
|
+
case "plain":
|
|
691
|
+
formatted = formatPlainText(cleanSummary, completeOptions);
|
|
692
|
+
break;
|
|
693
|
+
default:
|
|
694
|
+
formatted = formatTerminal(cleanSummary, completeOptions);
|
|
695
|
+
break;
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
return formatted;
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
/**
|
|
702
|
+
* Get formatted output with metadata
|
|
703
|
+
*/
|
|
704
|
+
export function formatOutputWithMetadata(
|
|
705
|
+
summary: string,
|
|
706
|
+
options: FormatOptions,
|
|
707
|
+
): FormattedOutput {
|
|
708
|
+
const content = formatOutput(summary, options);
|
|
709
|
+
|
|
710
|
+
return {
|
|
711
|
+
content,
|
|
712
|
+
format:
|
|
713
|
+
options.format ||
|
|
714
|
+
(options.json ? "json" : options.markdown ? "markdown" : "terminal"),
|
|
715
|
+
length: content.length,
|
|
716
|
+
lines: content.split("\n").length,
|
|
717
|
+
};
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
/**
|
|
721
|
+
* Auto-detect best format based on file extension
|
|
722
|
+
*/
|
|
723
|
+
export function autoDetectFormat(
|
|
724
|
+
filePath: string,
|
|
725
|
+
): "markdown" | "json" | "html" | "plain" {
|
|
726
|
+
const ext = filePath.toLowerCase().split(".").pop();
|
|
727
|
+
|
|
728
|
+
switch (ext) {
|
|
729
|
+
case "md":
|
|
730
|
+
case "markdown":
|
|
731
|
+
return "markdown";
|
|
732
|
+
case "json":
|
|
733
|
+
return "json";
|
|
734
|
+
case "html":
|
|
735
|
+
case "htm":
|
|
736
|
+
return "html";
|
|
737
|
+
case "txt":
|
|
738
|
+
return "plain";
|
|
739
|
+
default:
|
|
740
|
+
return "markdown"; // Default for unknown extensions
|
|
741
|
+
}
|
|
742
|
+
}
|