glance-cli 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,150 @@
1
+ /**
2
+ * Production-grade text cleaner to eliminate ALL binary artifacts and corrupted data
3
+ */
4
+
5
+ /**
6
+ * Nuclear-level text cleaner - eliminates ALL suspicious patterns
7
+ * Uses whitelist approach - only allows verified safe characters
8
+ *
9
+ * @param text - The text to clean
10
+ * @returns The cleaned text
11
+ */
12
+ export function nuclearCleanText(text: string): string {
13
+ if (!text || typeof text !== "string") {
14
+ return "";
15
+ }
16
+
17
+ // Step 1: Split into lines and process each separately
18
+ const lines = text.split("\n");
19
+ const cleanLines: string[] = [];
20
+
21
+ for (let line of lines) {
22
+ // Step 2: Remove ALL control characters except tab and newline
23
+ line = line.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]/g, "");
24
+
25
+ // Step 3: Remove ALL high-bit characters that could be binary
26
+ line = line.replace(/[\x80-\xFF]+/g, "");
27
+
28
+ // Step 4: Remove Unicode replacement/control characters
29
+ line = line.replace(/[\uFFFD\uFEFF\u200B-\u200D\u2060]/g, "");
30
+
31
+ // Step 5: Remove patterns that look like JavaScript/system artifacts (be more specific)
32
+ line = line.replace(/\bconsole\.log\b/g, "");
33
+ line = line.replace(/\bconsole\.warn\b/g, "");
34
+ line = line.replace(/\bTextDecoder\b/g, "");
35
+ line = line.replace(/\bDecompression failed\b/g, "");
36
+ line = line.replace(/\baccessCount:\s*\d+/g, "");
37
+ line = line.replace(/\blastAccessedAt:\s*[\d-]+/g, "");
38
+ line = line.replace(/\b_Cache get error\b/g, "");
39
+ line = line.replace(/\berror\.message\b/g, "");
40
+ // Note: Removed overly broad patterns like 'hits', 'message', 'decode'
41
+
42
+ // Step 6: Remove memory address patterns
43
+ line = line.replace(/0x[0-9A-Fa-f]+/g, "");
44
+ line = line.replace(/[0-9A-Fa-f]{8,}/g, "");
45
+ line = line.replace(/@@[A-Z@]+@@/g, "");
46
+
47
+ // Step 7: Remove repeated special character patterns
48
+ line = line.replace(/[^\w\s]{3,}/g, "");
49
+ line = line.replace(/(.)\1{5,}/g, "$1");
50
+
51
+ // Step 8: Remove patterns with mixed numbers and symbols
52
+ line = line.replace(/[A-Za-z]\d+[A-Za-z]\d+/g, "");
53
+ line = line.replace(/\d+[^\w\s]\d+[^\w\s]/g, "");
54
+
55
+ // Step 9: Remove encoding artifacts
56
+ line = line.replace(/’/g, "'");
57
+ line = line.replace(/“/g, '"');
58
+ line = line.replace(/â€\x9D/g, '"');
59
+ line = line.replace(/â€"/g, "—");
60
+ line = line.replace(/â€\x93/g, "–");
61
+ line = line.replace(/Â /g, " ");
62
+
63
+ // Step 10: Final whitelist - only allow safe printable characters
64
+ line = line.replace(/[^\x09\x20-\x7E\u00A0-\u024F]/g, "");
65
+
66
+ // Step 11: Clean up whitespace
67
+ line = line.replace(/\s+/g, " ").trim();
68
+
69
+ // Only keep lines with actual content
70
+ if (line.length > 0 && !/^[\s\W]*$/.test(line)) {
71
+ cleanLines.push(line);
72
+ }
73
+ }
74
+
75
+ // Step 12: Join and final cleanup
76
+ let result = cleanLines.join("\n");
77
+
78
+ // Step 13: Remove any remaining suspicious patterns
79
+ result = result
80
+ .replace(/\n{3,}/g, "\n\n") // Max 2 consecutive newlines
81
+ .replace(/^\s+|\s+$/g, "") // Trim
82
+ .replace(/[\x80-\xFF]/g, ""); // Final pass to remove any high-bit chars
83
+
84
+ return result;
85
+ }
86
+
87
+ /**
88
+ * Sanitize AI response text specifically
89
+ * Removes patterns that commonly appear in corrupted AI responses
90
+ */
91
+ export function sanitizeAIResponse(text: string): string {
92
+ if (!text || typeof text !== "string") {
93
+ return "";
94
+ }
95
+
96
+ return (
97
+ text
98
+ // Remove JavaScript/system function references (more specific)
99
+ .replace(/\bconsole\.(log|warn|error)\b/gi, "")
100
+ .replace(/\b(TextDecoder|Buffer|ArrayBuffer)\b/gi, "")
101
+ // Remove cache-related artifacts (be more specific to avoid legitimate content)
102
+ .replace(/\b(lastAccessed|accessCount|Decompression)\b/gi, "")
103
+ // Remove error message patterns (more specific)
104
+ .replace(/\b(failed|error)\s*:/gi, "")
105
+ // Remove memory/pointer patterns
106
+ .replace(/[0-9A-Fa-f]{8,}/g, "")
107
+ .replace(/\b0x[0-9A-Fa-f]+/g, "")
108
+ // Remove repeated symbols
109
+ .replace(/[^\w\s]{4,}/g, "")
110
+ // Clean up - but preserve paragraph structure
111
+ .replace(/ {2,}/g, " ") // Multiple spaces become single space
112
+ .replace(/\t/g, " ") // Tabs become spaces
113
+ .trim()
114
+ );
115
+ }
116
+
117
+ /**
118
+ * Emergency text cleaner - most aggressive possible
119
+ * Only allows basic ASCII printable characters and common punctuation
120
+ */
121
+ export function emergencyTextClean(text: string): string {
122
+ if (!text || typeof text !== "string") {
123
+ return "";
124
+ }
125
+
126
+ // Only allow: letters, numbers, basic punctuation, spaces, newlines
127
+ return text
128
+ .replace(/[^a-zA-Z0-9\s.,!?'"\-:;()[\]]/g, "")
129
+ .replace(/\s+/g, " ")
130
+ .replace(/\n{3,}/g, "\n\n")
131
+ .trim();
132
+ }
133
+
134
+ /**
135
+ * Detect if text contains suspicious binary artifacts
136
+ */
137
+ export function hasBinaryArtifacts(text: string): boolean {
138
+ if (!text) return false;
139
+
140
+ // Check for common corruption patterns
141
+ const suspiciousPatterns = [
142
+ /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]/, // Control chars
143
+ /[\x80-\xFF]{3,}/, // High-bit sequences
144
+ /[0-9A-Fa-f]{8,}/, // Hex patterns
145
+ /\b(console\.|TextDecoder|_Cache)\b/i, // System artifacts (more specific)
146
+ /[^\w\s]{5,}/, // Symbol sequences
147
+ ];
148
+
149
+ return suspiciousPatterns.some((pattern) => pattern.test(text));
150
+ }