glance-cli 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +9 -0
- package/dist/cli.js +198 -1064
- package/package.json +4 -2
- package/src/cli/commands.ts +854 -0
- package/src/cli/config.ts +24 -0
- package/src/cli/display.ts +270 -0
- package/src/cli/errors.ts +31 -0
- package/src/cli/index.ts +239 -0
- package/src/cli/logger.ts +43 -0
- package/src/cli/types.ts +114 -0
- package/src/cli/utils.ts +239 -0
- package/src/cli/validators.ts +176 -0
- package/src/cli.ts +17 -0
- package/src/core/compat.ts +96 -0
- package/src/core/extractor.ts +532 -0
- package/src/core/fetcher.ts +592 -0
- package/src/core/formatter.ts +742 -0
- package/src/core/language-detector.ts +382 -0
- package/src/core/screenshot.ts +444 -0
- package/src/core/service-detector.ts +411 -0
- package/src/core/summarizer.ts +656 -0
- package/src/core/text-cleaner.ts +150 -0
- package/src/core/voice.ts +708 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Production-grade text cleaner to eliminate ALL binary artifacts and corrupted data
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Nuclear-level text cleaner - eliminates ALL suspicious patterns
|
|
7
|
+
* Uses whitelist approach - only allows verified safe characters
|
|
8
|
+
*
|
|
9
|
+
* @param text - The text to clean
|
|
10
|
+
* @returns The cleaned text
|
|
11
|
+
*/
|
|
12
|
+
export function nuclearCleanText(text: string): string {
|
|
13
|
+
if (!text || typeof text !== "string") {
|
|
14
|
+
return "";
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Step 1: Split into lines and process each separately
|
|
18
|
+
const lines = text.split("\n");
|
|
19
|
+
const cleanLines: string[] = [];
|
|
20
|
+
|
|
21
|
+
for (let line of lines) {
|
|
22
|
+
// Step 2: Remove ALL control characters except tab and newline
|
|
23
|
+
line = line.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]/g, "");
|
|
24
|
+
|
|
25
|
+
// Step 3: Remove ALL high-bit characters that could be binary
|
|
26
|
+
line = line.replace(/[\x80-\xFF]+/g, "");
|
|
27
|
+
|
|
28
|
+
// Step 4: Remove Unicode replacement/control characters
|
|
29
|
+
line = line.replace(/[\uFFFD\uFEFF\u200B-\u200D\u2060]/g, "");
|
|
30
|
+
|
|
31
|
+
// Step 5: Remove patterns that look like JavaScript/system artifacts (be more specific)
|
|
32
|
+
line = line.replace(/\bconsole\.log\b/g, "");
|
|
33
|
+
line = line.replace(/\bconsole\.warn\b/g, "");
|
|
34
|
+
line = line.replace(/\bTextDecoder\b/g, "");
|
|
35
|
+
line = line.replace(/\bDecompression failed\b/g, "");
|
|
36
|
+
line = line.replace(/\baccessCount:\s*\d+/g, "");
|
|
37
|
+
line = line.replace(/\blastAccessedAt:\s*[\d-]+/g, "");
|
|
38
|
+
line = line.replace(/\b_Cache get error\b/g, "");
|
|
39
|
+
line = line.replace(/\berror\.message\b/g, "");
|
|
40
|
+
// Note: Removed overly broad patterns like 'hits', 'message', 'decode'
|
|
41
|
+
|
|
42
|
+
// Step 6: Remove memory address patterns
|
|
43
|
+
line = line.replace(/0x[0-9A-Fa-f]+/g, "");
|
|
44
|
+
line = line.replace(/[0-9A-Fa-f]{8,}/g, "");
|
|
45
|
+
line = line.replace(/@@[A-Z@]+@@/g, "");
|
|
46
|
+
|
|
47
|
+
// Step 7: Remove repeated special character patterns
|
|
48
|
+
line = line.replace(/[^\w\s]{3,}/g, "");
|
|
49
|
+
line = line.replace(/(.)\1{5,}/g, "$1");
|
|
50
|
+
|
|
51
|
+
// Step 8: Remove patterns with mixed numbers and symbols
|
|
52
|
+
line = line.replace(/[A-Za-z]\d+[A-Za-z]\d+/g, "");
|
|
53
|
+
line = line.replace(/\d+[^\w\s]\d+[^\w\s]/g, "");
|
|
54
|
+
|
|
55
|
+
// Step 9: Remove encoding artifacts
|
|
56
|
+
line = line.replace(/’/g, "'");
|
|
57
|
+
line = line.replace(/“/g, '"');
|
|
58
|
+
line = line.replace(/â€\x9D/g, '"');
|
|
59
|
+
line = line.replace(/â€"/g, "—");
|
|
60
|
+
line = line.replace(/â€\x93/g, "–");
|
|
61
|
+
line = line.replace(/Â /g, " ");
|
|
62
|
+
|
|
63
|
+
// Step 10: Final whitelist - only allow safe printable characters
|
|
64
|
+
line = line.replace(/[^\x09\x20-\x7E\u00A0-\u024F]/g, "");
|
|
65
|
+
|
|
66
|
+
// Step 11: Clean up whitespace
|
|
67
|
+
line = line.replace(/\s+/g, " ").trim();
|
|
68
|
+
|
|
69
|
+
// Only keep lines with actual content
|
|
70
|
+
if (line.length > 0 && !/^[\s\W]*$/.test(line)) {
|
|
71
|
+
cleanLines.push(line);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Step 12: Join and final cleanup
|
|
76
|
+
let result = cleanLines.join("\n");
|
|
77
|
+
|
|
78
|
+
// Step 13: Remove any remaining suspicious patterns
|
|
79
|
+
result = result
|
|
80
|
+
.replace(/\n{3,}/g, "\n\n") // Max 2 consecutive newlines
|
|
81
|
+
.replace(/^\s+|\s+$/g, "") // Trim
|
|
82
|
+
.replace(/[\x80-\xFF]/g, ""); // Final pass to remove any high-bit chars
|
|
83
|
+
|
|
84
|
+
return result;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Sanitize AI response text specifically
|
|
89
|
+
* Removes patterns that commonly appear in corrupted AI responses
|
|
90
|
+
*/
|
|
91
|
+
export function sanitizeAIResponse(text: string): string {
|
|
92
|
+
if (!text || typeof text !== "string") {
|
|
93
|
+
return "";
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return (
|
|
97
|
+
text
|
|
98
|
+
// Remove JavaScript/system function references (more specific)
|
|
99
|
+
.replace(/\bconsole\.(log|warn|error)\b/gi, "")
|
|
100
|
+
.replace(/\b(TextDecoder|Buffer|ArrayBuffer)\b/gi, "")
|
|
101
|
+
// Remove cache-related artifacts (be more specific to avoid legitimate content)
|
|
102
|
+
.replace(/\b(lastAccessed|accessCount|Decompression)\b/gi, "")
|
|
103
|
+
// Remove error message patterns (more specific)
|
|
104
|
+
.replace(/\b(failed|error)\s*:/gi, "")
|
|
105
|
+
// Remove memory/pointer patterns
|
|
106
|
+
.replace(/[0-9A-Fa-f]{8,}/g, "")
|
|
107
|
+
.replace(/\b0x[0-9A-Fa-f]+/g, "")
|
|
108
|
+
// Remove repeated symbols
|
|
109
|
+
.replace(/[^\w\s]{4,}/g, "")
|
|
110
|
+
// Clean up - but preserve paragraph structure
|
|
111
|
+
.replace(/ {2,}/g, " ") // Multiple spaces become single space
|
|
112
|
+
.replace(/\t/g, " ") // Tabs become spaces
|
|
113
|
+
.trim()
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Emergency text cleaner - most aggressive possible
|
|
119
|
+
* Only allows basic ASCII printable characters and common punctuation
|
|
120
|
+
*/
|
|
121
|
+
export function emergencyTextClean(text: string): string {
|
|
122
|
+
if (!text || typeof text !== "string") {
|
|
123
|
+
return "";
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Only allow: letters, numbers, basic punctuation, spaces, newlines
|
|
127
|
+
return text
|
|
128
|
+
.replace(/[^a-zA-Z0-9\s.,!?'"\-:;()[\]]/g, "")
|
|
129
|
+
.replace(/\s+/g, " ")
|
|
130
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
131
|
+
.trim();
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Detect if text contains suspicious binary artifacts
|
|
136
|
+
*/
|
|
137
|
+
export function hasBinaryArtifacts(text: string): boolean {
|
|
138
|
+
if (!text) return false;
|
|
139
|
+
|
|
140
|
+
// Check for common corruption patterns
|
|
141
|
+
const suspiciousPatterns = [
|
|
142
|
+
/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]/, // Control chars
|
|
143
|
+
/[\x80-\xFF]{3,}/, // High-bit sequences
|
|
144
|
+
/[0-9A-Fa-f]{8,}/, // Hex patterns
|
|
145
|
+
/\b(console\.|TextDecoder|_Cache)\b/i, // System artifacts (more specific)
|
|
146
|
+
/[^\w\s]{5,}/, // Symbol sequences
|
|
147
|
+
];
|
|
148
|
+
|
|
149
|
+
return suspiciousPatterns.some((pattern) => pattern.test(text));
|
|
150
|
+
}
|