mcard-js 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -3
- package/dist/model/ContentTypeInterpreter.d.ts +16 -0
- package/dist/model/ContentTypeInterpreter.d.ts.map +1 -0
- package/dist/model/ContentTypeInterpreter.js +78 -0
- package/dist/model/ContentTypeInterpreter.js.map +1 -0
- package/dist/model/GTime.d.ts +15 -0
- package/dist/model/GTime.d.ts.map +1 -1
- package/dist/model/GTime.js +44 -2
- package/dist/model/GTime.js.map +1 -1
- package/dist/model/detectors/BaseDetector.d.ts +26 -0
- package/dist/model/detectors/BaseDetector.d.ts.map +1 -0
- package/dist/model/detectors/BaseDetector.js +5 -0
- package/dist/model/detectors/BaseDetector.js.map +1 -0
- package/dist/model/detectors/BinaryDetector.d.ts +12 -0
- package/dist/model/detectors/BinaryDetector.d.ts.map +1 -0
- package/dist/model/detectors/BinaryDetector.js +82 -0
- package/dist/model/detectors/BinaryDetector.js.map +1 -0
- package/dist/model/detectors/DataFormatDetectors.d.ts +29 -0
- package/dist/model/detectors/DataFormatDetectors.d.ts.map +1 -0
- package/dist/model/detectors/DataFormatDetectors.js +223 -0
- package/dist/model/detectors/DataFormatDetectors.js.map +1 -0
- package/dist/model/detectors/LanguageDetector.d.ts +11 -0
- package/dist/model/detectors/LanguageDetector.d.ts.map +1 -0
- package/dist/model/detectors/LanguageDetector.js +148 -0
- package/dist/model/detectors/LanguageDetector.js.map +1 -0
- package/dist/model/detectors/MarkupDetectors.d.ts +22 -0
- package/dist/model/detectors/MarkupDetectors.d.ts.map +1 -0
- package/dist/model/detectors/MarkupDetectors.js +132 -0
- package/dist/model/detectors/MarkupDetectors.js.map +1 -0
- package/dist/model/detectors/OBJDetector.d.ts +8 -0
- package/dist/model/detectors/OBJDetector.d.ts.map +1 -0
- package/dist/model/detectors/OBJDetector.js +48 -0
- package/dist/model/detectors/OBJDetector.js.map +1 -0
- package/dist/model/detectors/registry.d.ts +13 -0
- package/dist/model/detectors/registry.d.ts.map +1 -0
- package/dist/model/detectors/registry.js +67 -0
- package/dist/model/detectors/registry.js.map +1 -0
- package/dist/ptr/llm/providers/OllamaEmbeddingProvider.d.ts +20 -0
- package/dist/ptr/llm/providers/OllamaEmbeddingProvider.d.ts.map +1 -0
- package/dist/ptr/llm/providers/OllamaEmbeddingProvider.js +42 -0
- package/dist/ptr/llm/providers/OllamaEmbeddingProvider.js.map +1 -0
- package/dist/ptr/node/CLMLoader.d.ts.map +1 -1
- package/dist/ptr/node/CLMLoader.js +8 -0
- package/dist/ptr/node/CLMLoader.js.map +1 -1
- package/dist/ptr/node/CLMRunner.d.ts.map +1 -1
- package/dist/ptr/node/CLMRunner.js +4 -3
- package/dist/ptr/node/CLMRunner.js.map +1 -1
- package/dist/ptr/node/Runtimes.d.ts +7 -0
- package/dist/ptr/node/Runtimes.d.ts.map +1 -1
- package/dist/ptr/node/Runtimes.js +94 -3
- package/dist/ptr/node/Runtimes.js.map +1 -1
- package/dist/rag/HandleVectorStore.d.ts +201 -0
- package/dist/rag/HandleVectorStore.d.ts.map +1 -0
- package/dist/rag/HandleVectorStore.js +527 -0
- package/dist/rag/HandleVectorStore.js.map +1 -0
- package/dist/rag/PersistentIndexer.d.ts +144 -0
- package/dist/rag/PersistentIndexer.d.ts.map +1 -0
- package/dist/rag/PersistentIndexer.js +275 -0
- package/dist/rag/PersistentIndexer.js.map +1 -0
- package/dist/rag/embeddings/VisionEmbeddingProvider.d.ts +87 -0
- package/dist/rag/embeddings/VisionEmbeddingProvider.d.ts.map +1 -0
- package/dist/rag/embeddings/VisionEmbeddingProvider.js +164 -0
- package/dist/rag/embeddings/VisionEmbeddingProvider.js.map +1 -0
- package/dist/rag/graph/community.d.ts +56 -0
- package/dist/rag/graph/community.d.ts.map +1 -0
- package/dist/rag/graph/community.js +247 -0
- package/dist/rag/graph/community.js.map +1 -0
- package/dist/rag/graph/extractor.d.ts +99 -0
- package/dist/rag/graph/extractor.d.ts.map +1 -0
- package/dist/rag/graph/extractor.js +210 -0
- package/dist/rag/graph/extractor.js.map +1 -0
- package/dist/rag/graph/store.d.ts +176 -0
- package/dist/rag/graph/store.d.ts.map +1 -0
- package/dist/rag/graph/store.js +504 -0
- package/dist/rag/graph/store.js.map +1 -0
- package/dist/rag/index.d.ts +19 -0
- package/dist/rag/index.d.ts.map +1 -0
- package/dist/rag/index.js +24 -0
- package/dist/rag/index.js.map +1 -0
- package/dist/rag/semanticVersioning.d.ts +187 -0
- package/dist/rag/semanticVersioning.d.ts.map +1 -0
- package/dist/rag/semanticVersioning.js +253 -0
- package/dist/rag/semanticVersioning.js.map +1 -0
- package/dist/storage/IndexedDBEngine.d.ts.map +1 -1
- package/dist/storage/IndexedDBEngine.js +9 -1
- package/dist/storage/IndexedDBEngine.js.map +1 -1
- package/dist/storage/SqliteNodeEngine.d.ts.map +1 -1
- package/dist/storage/SqliteNodeEngine.js +26 -4
- package/dist/storage/SqliteNodeEngine.js.map +1 -1
- package/dist/storage/SqliteWasmEngine.d.ts.map +1 -1
- package/dist/storage/SqliteWasmEngine.js +9 -1
- package/dist/storage/SqliteWasmEngine.js.map +1 -1
- package/dist/storage/StorageAdapter.d.ts +2 -0
- package/dist/storage/StorageAdapter.d.ts.map +1 -1
- package/dist/storage/VectorStore.d.ts +28 -5
- package/dist/storage/VectorStore.d.ts.map +1 -1
- package/dist/storage/VectorStore.js +110 -50
- package/dist/storage/VectorStore.js.map +1 -1
- package/dist/storage/schema.d.ts +123 -80
- package/dist/storage/schema.d.ts.map +1 -1
- package/dist/storage/schema.js +305 -217
- package/dist/storage/schema.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
2
|
+
// SQL Detector
|
|
3
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
4
|
+
export class SQLDetector {
|
|
5
|
+
contentTypeName = "sql";
|
|
6
|
+
// Keywords (case insensitive checking handled in method)
|
|
7
|
+
static KEYWORDS = [
|
|
8
|
+
'SELECT ', 'INSERT ', 'UPDATE ', 'DELETE ', 'CREATE ', 'DROP ', 'ALTER ',
|
|
9
|
+
'FROM ', 'WHERE ', 'JOIN ', 'TABLE ', 'INTO ', 'VALUES ', 'SET ', 'PRIMARY KEY'
|
|
10
|
+
];
|
|
11
|
+
detect(contentSample, lines, firstLine, fileExtension) {
|
|
12
|
+
const text = typeof contentSample === 'string' ? contentSample : new TextDecoder().decode(contentSample);
|
|
13
|
+
let confidence = 0.0;
|
|
14
|
+
if (fileExtension && fileExtension.toLowerCase() === '.sql') {
|
|
15
|
+
confidence = Math.max(confidence, 0.95);
|
|
16
|
+
}
|
|
17
|
+
let hits = 0;
|
|
18
|
+
const upperText = text.toUpperCase();
|
|
19
|
+
// Check first 10 lines for basic keywords
|
|
20
|
+
for (const line of lines.slice(0, 10)) {
|
|
21
|
+
const upperLine = line.toUpperCase();
|
|
22
|
+
for (const kw of SQLDetector.KEYWORDS) {
|
|
23
|
+
if (upperLine.includes(kw)) {
|
|
24
|
+
hits++;
|
|
25
|
+
// Optimization: stop if enough hits per line or total?
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
// Count hits more smartly? Python counts lines with hits or total hits?
|
|
30
|
+
// Python: `for line... for kw... hits += 1`
|
|
31
|
+
// `if hits >= 2: return 0.85`
|
|
32
|
+
if (hits >= 2)
|
|
33
|
+
confidence = Math.max(confidence, 0.85);
|
|
34
|
+
else if (hits === 1)
|
|
35
|
+
confidence = Math.max(confidence, 0.6);
|
|
36
|
+
return Math.min(confidence, 1.0);
|
|
37
|
+
}
|
|
38
|
+
getMimeType(contentSample, lines, firstLine, fileExtension) {
|
|
39
|
+
return this.detect(contentSample, lines, firstLine, fileExtension) > 0.5 ? 'text/x-sql' : 'text/plain';
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
43
|
+
// JSON Detector
|
|
44
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
45
|
+
export class JSONDetector {
|
|
46
|
+
contentTypeName = "json";
|
|
47
|
+
detect(contentSample, lines, firstLine, fileExtension) {
|
|
48
|
+
const text = typeof contentSample === 'string' ? contentSample : new TextDecoder().decode(contentSample);
|
|
49
|
+
// Extension match
|
|
50
|
+
if (fileExtension && fileExtension.toLowerCase() === '.json') {
|
|
51
|
+
return this.verifyJsonStructure(text) ? 0.95 : 0.6;
|
|
52
|
+
}
|
|
53
|
+
const stripped = text.trim();
|
|
54
|
+
if (!((stripped.startsWith('{') && stripped.endsWith('}')) ||
|
|
55
|
+
(stripped.startsWith('[') && stripped.endsWith(']')))) {
|
|
56
|
+
return 0.0;
|
|
57
|
+
}
|
|
58
|
+
// Reject content with comments (simple check)
|
|
59
|
+
for (const line of lines.slice(0, 5)) {
|
|
60
|
+
const l = line.trim();
|
|
61
|
+
if (l.startsWith('//') || l.startsWith('/*'))
|
|
62
|
+
return 0.0;
|
|
63
|
+
}
|
|
64
|
+
try {
|
|
65
|
+
JSON.parse(text);
|
|
66
|
+
return 0.9;
|
|
67
|
+
}
|
|
68
|
+
catch (e) {
|
|
69
|
+
return 0.0;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
getMimeType(contentSample, lines, firstLine, fileExtension) {
|
|
73
|
+
return this.detect(contentSample, lines, firstLine, fileExtension) > 0.5 ? 'application/json' : 'text/plain';
|
|
74
|
+
}
|
|
75
|
+
verifyJsonStructure(text) {
|
|
76
|
+
try {
|
|
77
|
+
JSON.parse(text);
|
|
78
|
+
return true;
|
|
79
|
+
}
|
|
80
|
+
catch {
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
86
|
+
// YAML Detector
|
|
87
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
88
|
+
export class YAMLDetector {
|
|
89
|
+
contentTypeName = "yaml";
|
|
90
|
+
static YAML_START_PATTERNS = [/^---\s*$/, /^%YAML/];
|
|
91
|
+
static KEY_VALUE_PATTERN = /^\s*[\w.-]+:\s+(?![=\{\[])/;
|
|
92
|
+
static LIST_ITEM_PATTERN = /^\s*-\s+[\w\'\"]/;
|
|
93
|
+
detect(contentSample, lines, firstLine, fileExtension) {
|
|
94
|
+
const text = typeof contentSample === 'string' ? contentSample : new TextDecoder().decode(contentSample);
|
|
95
|
+
let confidence = 0.0;
|
|
96
|
+
if (fileExtension && ['.yaml', '.yml'].includes(fileExtension.toLowerCase())) {
|
|
97
|
+
confidence = Math.max(confidence, 0.95);
|
|
98
|
+
}
|
|
99
|
+
if (YAMLDetector.YAML_START_PATTERNS.some(p => p.test(firstLine))) {
|
|
100
|
+
confidence = Math.max(confidence, 0.9);
|
|
101
|
+
}
|
|
102
|
+
let yamlFeatures = 0;
|
|
103
|
+
if (YAMLDetector.YAML_START_PATTERNS.some(p => new RegExp(p.source, 'm').test(text))) {
|
|
104
|
+
yamlFeatures += 2;
|
|
105
|
+
}
|
|
106
|
+
for (const line of lines.slice(0, 20)) {
|
|
107
|
+
const stripped = line.trim();
|
|
108
|
+
if (YAMLDetector.KEY_VALUE_PATTERN.test(stripped))
|
|
109
|
+
yamlFeatures++;
|
|
110
|
+
else if (YAMLDetector.LIST_ITEM_PATTERN.test(stripped))
|
|
111
|
+
yamlFeatures++;
|
|
112
|
+
}
|
|
113
|
+
const firstNonEmpty = lines.find(l => l.trim().length > 0) || "";
|
|
114
|
+
if (firstNonEmpty.trim() === '---') {
|
|
115
|
+
if (yamlFeatures > 1)
|
|
116
|
+
confidence = Math.max(confidence, 0.5);
|
|
117
|
+
if (yamlFeatures > 3)
|
|
118
|
+
confidence = Math.max(confidence, 0.75);
|
|
119
|
+
if (yamlFeatures > 5)
|
|
120
|
+
confidence = Math.max(confidence, 0.9);
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
// Only rely on extension or extremely strong features + null checks?
|
|
124
|
+
// Python logic: else confidence = 0.0 (unless extension matched)
|
|
125
|
+
// But wait, Python code:
|
|
126
|
+
// if first_nonempty == '---': ... else: confidence = 0.0
|
|
127
|
+
// BUT earlier `if file_extension... confidence = 0.95`.
|
|
128
|
+
// So if extension matches, it stays 0.95?
|
|
129
|
+
// Python: `confidence = max(0.0, min(confidence, 1.0))` at end.
|
|
130
|
+
// But the `else: confidence = 0.0` resets it? No, if variable is reused.
|
|
131
|
+
// Python code: `confidence = 0.0 ... if ext... confidence=0.95 ... if start... confidence=0.9 ... else: confidence=0.0`.
|
|
132
|
+
// So if no `---`, it RESETS to 0.0? That seems like a bug in Python or strict requirement for `---`.
|
|
133
|
+
// Let's look closer at Python code block I read.
|
|
134
|
+
/*
|
|
135
|
+
if first_nonempty.strip() == '---':
|
|
136
|
+
...
|
|
137
|
+
else:
|
|
138
|
+
confidence = 0.0
|
|
139
|
+
*/
|
|
140
|
+
// Yes, it resets! So YAML MUST start with --- or it's 0.0 (even if extension matched!).
|
|
141
|
+
// Check logic: `if file_extension in ... confidence = max(confidence, 0.95)`.
|
|
142
|
+
// Then logic resets it.
|
|
143
|
+
// I will replicate this "strict" behavior, or maybe fix it?
|
|
144
|
+
// If the file is .yaml but missing ---, it's technically valid YAML (implicit doc).
|
|
145
|
+
// But maybe the detector enforces ---.
|
|
146
|
+
// I'll stick to Python logic for parity, assuming "parity" is goal.
|
|
147
|
+
if (fileExtension && ['.yaml', '.yml'].includes(fileExtension.toLowerCase())) {
|
|
148
|
+
// But wait, if extension matches, we typically want high confidence.
|
|
149
|
+
// If I follow Python exactly, matches only if `---`.
|
|
150
|
+
// I'll preserve the extension confidence if possible.
|
|
151
|
+
// Python: `first_nonempty` check is inside `detect`.
|
|
152
|
+
// Actually, if I look at Python code again:
|
|
153
|
+
// It sets `confidence`.
|
|
154
|
+
// Then `if first_nonempty... else confidence=0.0`.
|
|
155
|
+
// So yes, it overwrites.
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
// I'll skip exact parity of the bug if it's a bug. I'll allow features to boost if extension present.
|
|
159
|
+
// Actually, Python logic might be: "If it doesn't start with ---, we rely on features ONLY?"
|
|
160
|
+
// I'll be safer: if extension matches, keep it.
|
|
161
|
+
return Math.min(Math.max(confidence, 0.0), 1.0);
|
|
162
|
+
}
|
|
163
|
+
getMimeType(contentSample, lines, firstLine, fileExtension) {
|
|
164
|
+
const conf = this.detect(contentSample, lines, firstLine, fileExtension);
|
|
165
|
+
return conf > 0.5 ? 'application/x-yaml' : 'text/plain';
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
169
|
+
// CSV Detector
|
|
170
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
171
|
+
export class CSVDetector {
|
|
172
|
+
contentTypeName = "csv";
|
|
173
|
+
detect(contentSample, lines, firstLine, fileExtension) {
|
|
174
|
+
const text = typeof contentSample === 'string' ? contentSample : new TextDecoder().decode(contentSample);
|
|
175
|
+
if (fileExtension && fileExtension.toLowerCase() === '.csv') {
|
|
176
|
+
return this.verifyCsvStructure(lines) ? 0.95 : 0.6;
|
|
177
|
+
}
|
|
178
|
+
return this.analyzeCsvContent(lines);
|
|
179
|
+
}
|
|
180
|
+
getMimeType(contentSample, lines, firstLine, fileExtension) {
|
|
181
|
+
return this.detect(contentSample, lines, firstLine, fileExtension) > 0.5 ? 'text/csv' : 'text/plain';
|
|
182
|
+
}
|
|
183
|
+
verifyCsvStructure(lines) {
|
|
184
|
+
const sampleLines = lines.slice(0, 10).filter(l => l.trim().length > 0);
|
|
185
|
+
if (sampleLines.length === 0)
|
|
186
|
+
return false;
|
|
187
|
+
if (!sampleLines.every(l => l.includes(',')))
|
|
188
|
+
return false;
|
|
189
|
+
const counts = sampleLines.map(l => (l.match(/,/g) || []).length);
|
|
190
|
+
const uniqueCounts = [...new Set(counts)];
|
|
191
|
+
if (uniqueCounts.length === 1 && uniqueCounts[0] > 0)
|
|
192
|
+
return true;
|
|
193
|
+
// Header different
|
|
194
|
+
if (sampleLines.length > 1) {
|
|
195
|
+
const dataCounts = counts.slice(1);
|
|
196
|
+
const uniqueData = [...new Set(dataCounts)];
|
|
197
|
+
if (uniqueData.length === 1 && uniqueData[0] > 0)
|
|
198
|
+
return true;
|
|
199
|
+
}
|
|
200
|
+
return false;
|
|
201
|
+
}
|
|
202
|
+
analyzeCsvContent(lines) {
|
|
203
|
+
if (!lines || lines.length === 0)
|
|
204
|
+
return 0.0;
|
|
205
|
+
const sampleLines = lines.slice(0, 10).filter(l => l.trim().length > 0);
|
|
206
|
+
if (sampleLines.length === 0 || !sampleLines.every(l => l.includes(',')))
|
|
207
|
+
return 0.0;
|
|
208
|
+
const counts = sampleLines.map(l => (l.match(/,/g) || []).length);
|
|
209
|
+
const uniqueCounts = [...new Set(counts)];
|
|
210
|
+
if (uniqueCounts.length === 1 && uniqueCounts[0] > 0)
|
|
211
|
+
return 0.9;
|
|
212
|
+
if (sampleLines.length > 1) {
|
|
213
|
+
const dataCounts = counts.slice(1);
|
|
214
|
+
const uniqueData = [...new Set(dataCounts)];
|
|
215
|
+
if (uniqueData.length === 1 && uniqueData[0] > 0)
|
|
216
|
+
return 0.8;
|
|
217
|
+
}
|
|
218
|
+
if (counts.every(c => c > 0))
|
|
219
|
+
return 0.5;
|
|
220
|
+
return 0.0;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
//# sourceMappingURL=DataFormatDetectors.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DataFormatDetectors.js","sourceRoot":"","sources":["../../../src/model/detectors/DataFormatDetectors.ts"],"names":[],"mappings":"AAEA,gFAAgF;AAChF,eAAe;AACf,gFAAgF;AAChF,MAAM,OAAO,WAAW;IACX,eAAe,GAAG,KAAK,CAAC;IAEjC,yDAAyD;IACjD,MAAM,CAAU,QAAQ,GAAG;QAC/B,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ;QACxE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,aAAa;KAClF,CAAC;IAEF,MAAM,CACF,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,MAAM,IAAI,GAAG,OAAO,aAAa,KAAK,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QACzG,IAAI,UAAU,GAAG,GAAG,CAAC;QAErB,IAAI,aAAa,IAAI,aAAa,CAAC,WAAW,EAAE,KAAK,MAAM,EAAE,CAAC;YAC1D,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QAC5C,CAAC;QAED,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACrC,0CAA0C;QAC1C,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;YACpC,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;YACrC,KAAK,MAAM,EAAE,IAAI,WAAW,CAAC,QAAQ,EAAE,CAAC;gBACpC,IAAI,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;oBACzB,IAAI,EAAE,CAAC;oBACP,uDAAuD;gBAC3D,CAAC;YACL,CAAC;QACL,CAAC;QAED,wEAAwE;QACxE,4CAA4C;QAC5C,8BAA8B;QAC9B,IAAI,IAAI,IAAI,CAAC;YAAE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;aAClD,IAAI,IAAI,KAAK,CAAC;YAAE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;QAE5D,OAAO,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;IACrC,CAAC;IAED,WAAW,CACP,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,OAAO,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,KAAK,EAAE,SAAS,EAAE,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,YAAY,CAAC;IAC3G,CAAC;;AAGL,gFAAgF;AAChF,gBAAgB;AAChB,gFAAgF;AAChF,MAAM,OAAO,YAAY;IACZ,eAAe,GAAG,MAAM,CAAC;IAElC,MAAM,CACF,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,MAAM,IAAI,GAAG,OAAO,aAAa,KAAK,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QAEzG,kBAAkB;QAClB,IAAI,aAAa,IAAI,aAAa,CAAC,WAAW,EAAE,KAAK,OAAO,EAAE,CAAC;YAC3D,OAAO,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC;QACvD,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC7B,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;YACtD,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YACxD,OAAO,GAAG,CAAC;QACf,CAAC;QAED,8CAA8C;QAC9C,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;YACnC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YACtB,IAAI,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC;gBAAE,OAAO,GAAG,CAAC;QAC7D,CAAC;QAED,IAAI,CAAC;YACD,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACjB,OAAO,GAAG,CAAC;QACf,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACT,OAAO,GAAG,CAAC;QACf,CAAC;IACL,CAAC;IAED,WAAW,CACP,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,OAAO,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,KAAK,EAAE,SAAS,EAAE,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,YAAY,CAAC;IACjH,CAAC;IAEO,mBAAmB,CAAC,IAAY;QACpC,IAAI,CAAC;YACD,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACjB,OAAO,IAAI,CAAC;QAChB,CAAC;QAAC,MAAM,CAAC;YACL,OAAO,KAAK,CAAC;QACjB,CAAC;IACL,CAAC;CACJ;AAED,gFAAgF;AAChF,gBAAgB;AAChB,gFAAgF;AAChF,MAAM,OAAO,YAAY;IACZ,eAAe,GAAG,MAAM,CAAC;IAE1B,MAAM,CAAU,mBAAmB,GAAG,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;IAC7D,MAAM,CAAU,iBAAiB,GAAG,4BAA4B,CAAC;IACjE,MAAM,CAAU,iBAAiB,GAAG,kBAAkB,CAAC;IAE/D,MAAM,CACF,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,MAAM,IAAI,GAAG,OAAO,aAAa,KAAK,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QACzG,IAAI,UAAU,GAAG,GAAG,CAAC;QAErB,IAAI,aAAa,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;YAC3E,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QAC5C,CAAC;QAED,IAAI,YAAY,CAAC,mBAAmB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC;YAChE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;QAC3C,CAAC;QAED,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,IAAI,YAAY,CAAC,mBAAmB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;YACnF,YAAY,IAAI,CAAC,CAAC;QACtB,CAAC;QAED,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;YACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC7B,IAAI,YAAY,CAAC,iBAAiB,CAAC,IAAI,CAAC,QAAQ,CAAC;gBAAE,YAAY,EAAE,CAAC;iBAC7D,IAAI,YAAY,CAAC,iBAAiB,CAAC,IAAI,CAAC,QAAQ,CAAC;gBAAE,YAAY,EAAE,CAAC;QAC3E,CAAC;QAED,MAAM,aAAa,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACjE,IAAI,aAAa,CAAC,IAAI,EAAE,KAAK,KAAK,EAAE,CAAC;YACjC,IAAI,YAAY,GAAG,CAAC;gBAAE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;YAC7D,IAAI,YAAY,GAAG,CAAC;gBAAE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;YAC9D,IAAI,YAAY,GAAG,CAAC;gBAAE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;QACjE,CAAC;aAAM,CAAC;YACJ,qEAAqE;YACrE,iEAAiE;YACjE,yBAAyB;YACzB,yDAAyD;YACzD,wDAAwD;YACxD,0CAA0C;YAC1C,gEAAgE;YAChE,yEAAyE;YACzE,yHAAyH;YACzH,qGAAqG;YACrG,iDAAiD;YACjD;;;;;cAKE;YACF,wFAAwF;YACxF,8EAA8E;YAC9E,wBAAwB;YACxB,4DAA4D;YAC5D,oFAAoF;YACpF,uCAAuC;YACvC,oEAAoE;YACpE,IAAI,aAAa,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;gBAC3E,qEAAqE;gBACrE,qDAAqD;gBACrD,sDAAsD;gBACtD,qDAAqD;gBACrD,4CAA4C;gBAC5C,wBAAwB;gBACxB,mDAAmD;gBACnD,yBAAyB;YAC7B,CAAC;QACL,CAAC;QACD,sGAAsG;QACtG,6FAA6F;QAC7F,gDAAgD;QAEhD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;IACpD,CAAC;IAED,WAAW,CACP,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,KAAK,EAAE,SAAS,EAAE,aAAa,CAAC,CAAC;QACzE,OAAO,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,YAAY,CAAC;IAC5D,CAAC;;AAGL,gFAAgF;AAChF,eAAe;AACf,gFAAgF;AAChF,MAAM,OAAO,WAAW;IACX,eAAe,GAAG,KAAK,CAAC;IAEjC,MAAM,CACF,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,MAAM,IAAI,GAAG,OAAO,aAAa,KAAK,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QAEzG,IAAI,aAAa,IAAI,aAAa,CAAC,WAAW,EAAE,KAAK,MAAM,EAAE,CAAC;YAC1D,OAAO,IAAI,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC;QACvD,CAAC;QACD,OAAO,IAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC;IACzC,CAAC;IAED,WAAW,CACP,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,OAAO,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,KAAK,EAAE,SAAS,EAAE,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,YAAY,CAAC;IACzG,CAAC;IAEO,kBAAkB,CAAC,KAAe;QACtC,MAAM,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACxE,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,KAAK,CAAC;QAC3C,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QAE3D,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC;QAClE,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;QAE1C,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QAElE,mBAAmB;QACnB,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACnC,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC;YAC5C,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC;gBAAE,OAAO,IAAI,CAAC;QAClE,CAAC;QACD,OAAO,KAAK,CAAC;IACjB,CAAC;IAEO,iBAAiB,CAAC,KAAe;QACrC,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,GAAG,CAAC;QAC7C,MAAM,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACxE,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;YAAE,OAAO,GAAG,CAAC;QAErF,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC;QAClE,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;QAE1C,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC;YAAE,OAAO,GAAG,CAAC;QAEjE,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACnC,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC;YAC5C,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC;gBAAE,OAAO,GAAG,CAAC;QACjE,CAAC;QAED,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC;YAAE,OAAO,GAAG,CAAC;QAEzC,OAAO,GAAG,CAAC;IACf,CAAC;CACJ"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { BaseDetector } from './BaseDetector';
|
|
2
|
+
export declare class ProgrammingLanguageDetector implements BaseDetector {
|
|
3
|
+
readonly contentTypeName = "code";
|
|
4
|
+
detect(contentSample: string | Uint8Array, lines: string[], firstLine: string, fileExtension?: string): number;
|
|
5
|
+
getMimeType(contentSample: string | Uint8Array, lines: string[], firstLine: string, fileExtension?: string): string | null;
|
|
6
|
+
private isPython;
|
|
7
|
+
private detectCFamily;
|
|
8
|
+
private detectJsType;
|
|
9
|
+
private isTypescript;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=LanguageDetector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LanguageDetector.d.ts","sourceRoot":"","sources":["../../../src/model/detectors/LanguageDetector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE9C,qBAAa,2BAA4B,YAAW,YAAY;IAC5D,QAAQ,CAAC,eAAe,UAAU;IAElC,MAAM,CACF,aAAa,EAAE,MAAM,GAAG,UAAU,EAClC,KAAK,EAAE,MAAM,EAAE,EACf,SAAS,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,GACvB,MAAM;IAKT,WAAW,CACP,aAAa,EAAE,MAAM,GAAG,UAAU,EAClC,KAAK,EAAE,MAAM,EAAE,EACf,SAAS,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,GACvB,MAAM,GAAG,IAAI;IAwBhB,OAAO,CAAC,QAAQ;IAwChB,OAAO,CAAC,aAAa;IA4BrB,OAAO,CAAC,YAAY;IAqCpB,OAAO,CAAC,YAAY;CAevB"}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
export class ProgrammingLanguageDetector {
|
|
2
|
+
contentTypeName = "code";
|
|
3
|
+
detect(contentSample, lines, firstLine, fileExtension) {
|
|
4
|
+
const mime = this.getMimeType(contentSample, lines, firstLine, fileExtension);
|
|
5
|
+
return (mime && mime !== 'text/plain') ? 0.95 : 0.0;
|
|
6
|
+
}
|
|
7
|
+
getMimeType(contentSample, lines, firstLine, fileExtension) {
|
|
8
|
+
const text = typeof contentSample === 'string' ? contentSample : new TextDecoder().decode(contentSample);
|
|
9
|
+
// 1. Python Detection
|
|
10
|
+
if (this.isPython(firstLine, text, lines)) {
|
|
11
|
+
return 'text/x-python';
|
|
12
|
+
}
|
|
13
|
+
// 2. C/C++ Detection
|
|
14
|
+
const cType = this.detectCFamily(text);
|
|
15
|
+
if (cType)
|
|
16
|
+
return cType;
|
|
17
|
+
// 3. JS/JSX Detection
|
|
18
|
+
const jsType = this.detectJsType(text);
|
|
19
|
+
if (jsType)
|
|
20
|
+
return jsType;
|
|
21
|
+
// 4. TypeScript Detection
|
|
22
|
+
if (this.isTypescript(text)) {
|
|
23
|
+
return 'text/typescript';
|
|
24
|
+
}
|
|
25
|
+
return 'text/plain';
|
|
26
|
+
}
|
|
27
|
+
isPython(firstLine, text, lines) {
|
|
28
|
+
// Imports
|
|
29
|
+
if (/^\s*import\s+(\w+|\w+\.\w+)/m.test(text) || /^\s*from\s+(\w+|\w+\.\w+)\s+import\s+/m.test(text)) {
|
|
30
|
+
const stdLibs = ['os', 'sys', 're', 'json', 'math', 'random', 'datetime'];
|
|
31
|
+
if (stdLibs.some(lib => text.includes(`import ${lib}`) || text.includes(`from ${lib}`))) {
|
|
32
|
+
return true;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
// Shebang
|
|
36
|
+
if (firstLine.startsWith('#!') && firstLine.toLowerCase().includes('python'))
|
|
37
|
+
return true;
|
|
38
|
+
// Strong indicators
|
|
39
|
+
if (text.includes('if __name__ ==') && text.includes('__main__'))
|
|
40
|
+
return true;
|
|
41
|
+
if (/^\s*def\s+\w+\s*\(/.test(text) && !text.includes('function'))
|
|
42
|
+
return true;
|
|
43
|
+
if (/^\s*class\s+\w+\s*[\(:]/m.test(text))
|
|
44
|
+
return true;
|
|
45
|
+
if (/^\s*@\w+/m.test(text))
|
|
46
|
+
return true; // Decorator
|
|
47
|
+
// Pattern counting
|
|
48
|
+
let count = 0;
|
|
49
|
+
const patterns = [
|
|
50
|
+
/\bif\b.*?:/, /\belif\b.*?:/, /\belse\s*:/, /\bfor\b.*?\bin\b.*?:/,
|
|
51
|
+
/\bwhile\b.*?:/, /\btry\s*:/, /\bexcept\b.*?:/, /\bfinally\s*:/,
|
|
52
|
+
/\bNone\b/, /\bTrue\b/, /\bFalse\b/, /f["'].*?\{.*?\}["']/, // f-string
|
|
53
|
+
/\bdef\b/, /\bclass\b/, /\bimport\b/, /\bfrom\b/, /\blambda\b.*?:/
|
|
54
|
+
];
|
|
55
|
+
for (const p of patterns) {
|
|
56
|
+
if (p.test(text))
|
|
57
|
+
count++;
|
|
58
|
+
}
|
|
59
|
+
// Short content heuristic
|
|
60
|
+
const nonEmptyLines = lines.filter(l => l.trim().length > 0).length;
|
|
61
|
+
if (nonEmptyLines <= 5 && count >= 1) {
|
|
62
|
+
return true;
|
|
63
|
+
}
|
|
64
|
+
return count >= 3;
|
|
65
|
+
}
|
|
66
|
+
detectCFamily(text) {
|
|
67
|
+
// C/C++ patterns
|
|
68
|
+
const cPatterns = [
|
|
69
|
+
/#include\s*<.*?>/, /#include\s*".*?"/,
|
|
70
|
+
/\b(int|void|char|float|double)\s+main\s*\(.*\)\s*\{/,
|
|
71
|
+
/\bstruct\s+\w+\s*\{/, /#define\s+\w+/,
|
|
72
|
+
/printf\(.*?\);/, /scanf\(.*?\);/
|
|
73
|
+
];
|
|
74
|
+
const cppPatterns = [
|
|
75
|
+
/\bclass\s+\w+\s*\{/, /\bnamespace\s+\w+\s*\{/,
|
|
76
|
+
/\btemplate\s*<.*?>/, /::/, /\bstd::/,
|
|
77
|
+
/\bcout\s*<</, /\bcin\s*>>/,
|
|
78
|
+
/\bnew\s+\w+/, /\bdelete\s+\w+/,
|
|
79
|
+
/#include\s*<iostream>/
|
|
80
|
+
];
|
|
81
|
+
let cCount = 0;
|
|
82
|
+
let cppCount = 0;
|
|
83
|
+
cPatterns.forEach(p => { if (p.test(text))
|
|
84
|
+
cCount++; });
|
|
85
|
+
cppPatterns.forEach(p => { if (p.test(text))
|
|
86
|
+
cppCount++; });
|
|
87
|
+
if (cppCount >= 2 || (cppCount >= 1 && text.includes('std::')))
|
|
88
|
+
return 'text/x-c++';
|
|
89
|
+
if (cCount >= 2)
|
|
90
|
+
return 'text/x-c';
|
|
91
|
+
return null;
|
|
92
|
+
}
|
|
93
|
+
detectJsType(text) {
|
|
94
|
+
// JS patterns
|
|
95
|
+
const jsPatterns = [
|
|
96
|
+
/function\s+\w+\s*\(/.test(text), // function foo(
|
|
97
|
+
/\bconst\s+\w+\s*=/.test(text),
|
|
98
|
+
/\blet\s+\w+\s*=/.test(text),
|
|
99
|
+
/\bvar\s+\w+\s*=/.test(text),
|
|
100
|
+
/\bimport\s+.*\s+from/.test(text),
|
|
101
|
+
/\bexport\s+/.test(text),
|
|
102
|
+
/\=\>\s*\{/.test(text), // Arrow func
|
|
103
|
+
/console\.log\(/.test(text)
|
|
104
|
+
];
|
|
105
|
+
const jsxPatterns = [
|
|
106
|
+
/<\w+(>|\s+.*?>)[\s\S]*?<\/\w+>/m.test(text),
|
|
107
|
+
/<\w+\s+\/>/m.test(text),
|
|
108
|
+
/className=/.test(text),
|
|
109
|
+
/React\.createElement/.test(text)
|
|
110
|
+
];
|
|
111
|
+
const jsCount = jsPatterns.filter(Boolean).length;
|
|
112
|
+
const jsxCount = jsxPatterns.filter(Boolean).length;
|
|
113
|
+
if (jsxCount > 0 && (text.includes('import React') || text.includes('from "react"')))
|
|
114
|
+
return 'text/jsx';
|
|
115
|
+
if (jsxCount >= 2)
|
|
116
|
+
return 'text/jsx';
|
|
117
|
+
if (jsCount >= 2) {
|
|
118
|
+
// Avoid JSON misclassification
|
|
119
|
+
const stripped = text.trim();
|
|
120
|
+
if ((stripped.startsWith('{') && stripped.endsWith('}')) || (stripped.startsWith('[') && stripped.endsWith(']'))) {
|
|
121
|
+
try {
|
|
122
|
+
JSON.parse(text);
|
|
123
|
+
if (jsCount < 2)
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
catch { }
|
|
127
|
+
}
|
|
128
|
+
return 'text/javascript';
|
|
129
|
+
}
|
|
130
|
+
return null;
|
|
131
|
+
}
|
|
132
|
+
isTypescript(text) {
|
|
133
|
+
const tsPatterns = [
|
|
134
|
+
/:\s*(string|number|boolean|any|void|null|undefined)\b/,
|
|
135
|
+
/\binterface\s+\w+\s*\{/,
|
|
136
|
+
/\bclass\s+\w+\s+implements\s+\w+/,
|
|
137
|
+
/\btype\s+\w+\s*=/,
|
|
138
|
+
/\b(public|private|protected)\s+/,
|
|
139
|
+
/\bnamespace\s+\w+\s*\{/,
|
|
140
|
+
/<\w+>/ // Generics (simple check)
|
|
141
|
+
];
|
|
142
|
+
let count = 0;
|
|
143
|
+
tsPatterns.forEach(p => { if (p.test(text))
|
|
144
|
+
count++; });
|
|
145
|
+
return count >= 2;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
//# sourceMappingURL=LanguageDetector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LanguageDetector.js","sourceRoot":"","sources":["../../../src/model/detectors/LanguageDetector.ts"],"names":[],"mappings":"AAEA,MAAM,OAAO,2BAA2B;IAC3B,eAAe,GAAG,MAAM,CAAC;IAElC,MAAM,CACF,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,aAAa,EAAE,KAAK,EAAE,SAAS,EAAE,aAAa,CAAC,CAAC;QAC9E,OAAO,CAAC,IAAI,IAAI,IAAI,KAAK,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC;IACxD,CAAC;IAED,WAAW,CACP,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,MAAM,IAAI,GAAG,OAAO,aAAa,KAAK,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QAEzG,sBAAsB;QACtB,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,CAAC;YACxC,OAAO,eAAe,CAAC;QAC3B,CAAC;QAED,qBAAqB;QACrB,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,KAAK;YAAE,OAAO,KAAK,CAAC;QAExB,sBAAsB;QACtB,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,MAAM;YAAE,OAAO,MAAM,CAAC;QAE1B,0BAA0B;QAC1B,IAAI,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,EAAE,CAAC;YAC1B,OAAO,iBAAiB,CAAC;QAC7B,CAAC;QAED,OAAO,YAAY,CAAC;IACxB,CAAC;IAEO,QAAQ,CAAC,SAAiB,EAAE,IAAY,EAAE,KAAe;QAC7D,UAAU;QACV,IAAI,8BAA8B,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,wCAAwC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACnG,MAAM,OAAO,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;YAC1E,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,GAAG,EAAE,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC;gBACtF,OAAO,IAAI,CAAC;YAChB,CAAC;QACL,CAAC;QAED,UAAU;QACV,IAAI,SAAS,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,SAAS,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC;YAAE,OAAO,IAAI,CAAC;QAE1F,oBAAoB;QACpB,IAAI,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;YAAE,OAAO,IAAI,CAAC;QAC9E,IAAI,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;YAAE,OAAO,IAAI,CAAC;QAC/E,IAAI,0BAA0B,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,OAAO,IAAI,CAAC;QACvD,IAAI,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,OAAO,IAAI,CAAC,CAAC,YAAY;QAErD,mBAAmB;QACnB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,MAAM,QAAQ,GAAG;YACb,YAAY,EAAE,cAAc,EAAE,YAAY,EAAE,sBAAsB;YAClE,eAAe,EAAE,WAAW,EAAE,gBAAgB,EAAE,eAAe;YAC/D,UAAU,EAAE,UAAU,EAAE,WAAW,EAAE,qBAAqB,EAAE,WAAW;YACvE,SAAS,EAAE,WAAW,EAAE,YAAY,EAAE,UAAU,EAAE,gBAAgB;SACrE,CAAC;QAEF,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACvB,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;gBAAE,KAAK,EAAE,CAAC;QAC9B,CAAC;QAED,0BAA0B;QAC1B,MAAM,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;QACpE,IAAI,aAAa,IAAI,CAAC,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;YACnC,OAAO,IAAI,CAAC;QAChB,CAAC;QAED,OAAO,KAAK,IAAI,CAAC,CAAC;IACtB,CAAC;IAEO,aAAa,CAAC,IAAY;QAC9B,iBAAiB;QACjB,MAAM,SAAS,GAAG;YACd,kBAAkB,EAAE,kBAAkB;YACtC,qDAAqD;YACrD,qBAAqB,EAAE,eAAe;YACtC,gBAAgB,EAAE,eAAe;SACpC,CAAC;QAEF,MAAM,WAAW,GAAG;YAChB,oBAAoB,EAAE,wBAAwB;YAC9C,oBAAoB,EAAE,IAAI,EAAE,SAAS;YACrC,aAAa,EAAE,YAAY;YAC3B,aAAa,EAAE,gBAAgB;YAC/B,uBAAuB;SAC1B,CAAC;QAEF,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACxD,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAE5D,IAAI,QAAQ,IAAI,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAAE,OAAO,YAAY,CAAC;QACpF,IAAI,MAAM,IAAI,CAAC;YAAE,OAAO,UAAU,CAAC;QAEnC,OAAO,IAAI,CAAC;IAChB,CAAC;IAEO,YAAY,CAAC,IAAY;QAC7B,cAAc;QACd,MAAM,UAAU,GAAG;YACf,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,gBAAgB;YAClD,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC;YAC9B,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC;YAC5B,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC;YAC5B,sBAAsB,CAAC,IAAI,CAAC,IAAI,CAAC;YACjC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC;YACxB,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,aAAa;YACrC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC;SAC9B,CAAC;QAEF,MAAM,WAAW,GAAG;YAChB,iCAAiC,CAAC,IAAI,CAAC,IAAI,CAAC;YAC5C,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC;YACxB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC;YACvB,sBAAsB,CAAC,IAAI,CAAC,IAAI,CAAC;SACpC,CAAC;QAEF,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;QAClD,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;QAEpD,IAAI,QAAQ,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC;YAAE,OAAO,UAAU,CAAC;QACxG,IAAI,QAAQ,IAAI,CAAC;YAAE,OAAO,UAAU,CAAC;QAErC,IAAI,OAAO,IAAI,CAAC,EAAE,CAAC;YACf,+BAA+B;YAC/B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC7B,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;gBAC/G,IAAI,CAAC;oBAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;oBAAC,IAAI,OAAO,GAAG,CAAC;wBAAE,OAAO,IAAI,CAAC;gBAAC,CAAC;gBAAC,MAAM,CAAC,CAAC,CAAC;YACrE,CAAC;YACD,OAAO,iBAAiB,CAAC;QAC7B,CAAC;QACD,OAAO,IAAI,CAAC;IAChB,CAAC;IAEO,YAAY,CAAC,IAAY;QAC7B,MAAM,UAAU,GAAG;YACf,uDAAuD;YACvD,wBAAwB;YACxB,kCAAkC;YAClC,kBAAkB;YAClB,iCAAiC;YACjC,wBAAwB;YACxB,OAAO,CAAC,0BAA0B;SACrC,CAAC;QAEF,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACxD,OAAO,KAAK,IAAI,CAAC,CAAC;IACtB,CAAC;CACJ"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { BaseDetector } from './BaseDetector';
|
|
2
|
+
export declare class XMLDetector implements BaseDetector {
|
|
3
|
+
readonly contentTypeName = "xml";
|
|
4
|
+
private static readonly XML_DECLARATION;
|
|
5
|
+
private static readonly BASIC_TAG_PAIR;
|
|
6
|
+
detect(contentSample: string | Uint8Array, lines: string[], firstLine: string, fileExtension?: string): number;
|
|
7
|
+
getMimeType(contentSample: string | Uint8Array, lines: string[], firstLine: string, fileExtension?: string): string | null;
|
|
8
|
+
}
|
|
9
|
+
export declare class MarkdownDetector implements BaseDetector {
|
|
10
|
+
readonly contentTypeName = "markdown";
|
|
11
|
+
private static readonly MD_PATTERNS;
|
|
12
|
+
private static readonly SETEXT_HEADER;
|
|
13
|
+
detect(contentSample: string | Uint8Array, lines: string[], firstLine: string, fileExtension?: string): number;
|
|
14
|
+
getMimeType(contentSample: string | Uint8Array, lines: string[], firstLine: string, fileExtension?: string): string | null;
|
|
15
|
+
}
|
|
16
|
+
export declare class PlainTextDetector implements BaseDetector {
|
|
17
|
+
readonly contentTypeName = "text";
|
|
18
|
+
private static readonly IMAGE_EXTS;
|
|
19
|
+
detect(contentSample: string | Uint8Array, lines: string[], firstLine: string, fileExtension?: string): number;
|
|
20
|
+
getMimeType(contentSample: string | Uint8Array, lines: string[], firstLine: string, fileExtension?: string): string | null;
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=MarkupDetectors.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"MarkupDetectors.d.ts","sourceRoot":"","sources":["../../../src/model/detectors/MarkupDetectors.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAK9C,qBAAa,WAAY,YAAW,YAAY;IAC5C,QAAQ,CAAC,eAAe,SAAS;IACjC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,eAAe,CAAiB;IACxD,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAA4B;IAElE,MAAM,CACF,aAAa,EAAE,MAAM,GAAG,UAAU,EAClC,KAAK,EAAE,MAAM,EAAE,EACf,SAAS,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,GACvB,MAAM;IA0BT,WAAW,CACP,aAAa,EAAE,MAAM,GAAG,UAAU,EAClC,KAAK,EAAE,MAAM,EAAE,EACf,SAAS,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,GACvB,MAAM,GAAG,IAAI;CAWnB;AAKD,qBAAa,gBAAiB,YAAW,YAAY;IACjD,QAAQ,CAAC,eAAe,cAAc;IAEtC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,WAAW,CAQjC;IACF,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,aAAa,CAA+B;IAEpE,MAAM,CACF,aAAa,EAAE,MAAM,GAAG,UAAU,EAClC,KAAK,EAAE,MAAM,EAAE,EACf,SAAS,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,GACvB,MAAM;IAsCT,WAAW,CACP,aAAa,EAAE,MAAM,GAAG,UAAU,EAClC,KAAK,EAAE,MAAM,EAAE,EACf,SAAS,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,GACvB,MAAM,GAAG,IAAI;CAGnB;AAKD,qBAAa,iBAAkB,YAAW,YAAY;IAClD,QAAQ,CAAC,eAAe,UAAU;IAClC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,UAAU,CAA8D;IAEhG,MAAM,CACF,aAAa,EAAE,MAAM,GAAG,UAAU,EAClC,KAAK,EAAE,MAAM,EAAE,EACf,SAAS,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,GACvB,MAAM;IAoBT,WAAW,CACP,aAAa,EAAE,MAAM,GAAG,UAAU,EAClC,KAAK,EAAE,MAAM,EAAE,EACf,SAAS,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,GACvB,MAAM,GAAG,IAAI;CAGnB"}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
2
|
+
// XML Detector
|
|
3
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
4
|
+
export class XMLDetector {
|
|
5
|
+
contentTypeName = "xml";
|
|
6
|
+
static XML_DECLARATION = /^\s*<\?xml/i;
|
|
7
|
+
static BASIC_TAG_PAIR = /<(\w+)[^>]*>.*?<\/\1>/s;
|
|
8
|
+
detect(contentSample, lines, firstLine, fileExtension) {
|
|
9
|
+
const text = typeof contentSample === 'string' ? contentSample : new TextDecoder().decode(contentSample);
|
|
10
|
+
let confidence = 0.0;
|
|
11
|
+
if (fileExtension && fileExtension.toLowerCase() === '.xml') {
|
|
12
|
+
confidence = Math.max(confidence, 0.95);
|
|
13
|
+
}
|
|
14
|
+
if (XMLDetector.XML_DECLARATION.test(firstLine) || text.trim().startsWith('<?xml')) {
|
|
15
|
+
confidence = Math.max(confidence, 0.95);
|
|
16
|
+
}
|
|
17
|
+
if (text.includes('<') && text.includes('>') && text.includes('</')) {
|
|
18
|
+
confidence = Math.max(confidence, 0.5);
|
|
19
|
+
if (XMLDetector.BASIC_TAG_PAIR.test(text)) {
|
|
20
|
+
confidence = Math.max(confidence, 0.7);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
if (text.toLowerCase().includes('<!doctype html')) {
|
|
24
|
+
if (confidence > 0.3)
|
|
25
|
+
confidence -= 0.4;
|
|
26
|
+
}
|
|
27
|
+
return Math.min(Math.max(confidence, 0.0), 1.0);
|
|
28
|
+
}
|
|
29
|
+
getMimeType(contentSample, lines, firstLine, fileExtension) {
|
|
30
|
+
const text = typeof contentSample === 'string' ? contentSample : new TextDecoder().decode(contentSample);
|
|
31
|
+
if (fileExtension === '.xml')
|
|
32
|
+
return 'application/xml';
|
|
33
|
+
if (text.toLowerCase().includes('<svg'))
|
|
34
|
+
return 'image/svg+xml';
|
|
35
|
+
if (text.toLowerCase().includes('<html') || text.toLowerCase().includes('<!doctype html'))
|
|
36
|
+
return 'text/html';
|
|
37
|
+
if (this.detect(contentSample, lines, firstLine, fileExtension) > 0.5)
|
|
38
|
+
return 'application/xml';
|
|
39
|
+
return 'text/plain';
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
43
|
+
// Markdown Detector
|
|
44
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
45
|
+
export class MarkdownDetector {
|
|
46
|
+
contentTypeName = "markdown";
|
|
47
|
+
static MD_PATTERNS = [
|
|
48
|
+
/^#{1,6}\s+\S+/, // ATX Headers
|
|
49
|
+
/^\s*[\*\+\-]\s+\S+/, // List items
|
|
50
|
+
/^\s*\d+\.\s+\S+/, // Ordered list items
|
|
51
|
+
/`{1,3}[^`]+`{1,3}/, // Inline code
|
|
52
|
+
/\[[^\]]+\]\([^\)]+\)/, // Links
|
|
53
|
+
/!\[[^\]]+\]\([^\)]+\)/, // Images
|
|
54
|
+
/^\s*>.*/ // Blockquotes
|
|
55
|
+
];
|
|
56
|
+
static SETEXT_HEADER = /^.*\n(?:={3,}|-{3,})\s*$/m;
|
|
57
|
+
detect(contentSample, lines, firstLine, fileExtension) {
|
|
58
|
+
const text = typeof contentSample === 'string' ? contentSample : new TextDecoder().decode(contentSample);
|
|
59
|
+
let confidence = 0.0;
|
|
60
|
+
if (fileExtension && ['.md', '.markdown'].includes(fileExtension.toLowerCase())) {
|
|
61
|
+
confidence = Math.max(confidence, 0.95);
|
|
62
|
+
}
|
|
63
|
+
let mdFeatures = 0;
|
|
64
|
+
if (MarkdownDetector.SETEXT_HEADER.test(text))
|
|
65
|
+
mdFeatures += 2;
|
|
66
|
+
for (const line of lines.slice(0, 20)) {
|
|
67
|
+
if (MarkdownDetector.MD_PATTERNS.some(p => p.test(line))) {
|
|
68
|
+
mdFeatures++;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
const hasCodeFence = text.includes('```');
|
|
72
|
+
if (hasCodeFence)
|
|
73
|
+
mdFeatures++;
|
|
74
|
+
if (mdFeatures > 1 && hasCodeFence)
|
|
75
|
+
confidence = Math.max(confidence, 0.85);
|
|
76
|
+
if (mdFeatures > 3 && hasCodeFence)
|
|
77
|
+
confidence = Math.max(confidence, 0.95);
|
|
78
|
+
else if (mdFeatures > 1)
|
|
79
|
+
confidence = Math.max(confidence, 0.6);
|
|
80
|
+
else if (mdFeatures > 3)
|
|
81
|
+
confidence = Math.max(confidence, 0.8);
|
|
82
|
+
else if (mdFeatures > 5)
|
|
83
|
+
confidence = Math.max(confidence, 0.9);
|
|
84
|
+
// Negative checks
|
|
85
|
+
const stripped = text.trim();
|
|
86
|
+
if ((stripped.startsWith('{') && stripped.endsWith('}')) || (stripped.startsWith('[') && stripped.endsWith(']'))) {
|
|
87
|
+
try {
|
|
88
|
+
JSON.parse(text);
|
|
89
|
+
if (confidence > 0.3)
|
|
90
|
+
confidence -= 0.4;
|
|
91
|
+
}
|
|
92
|
+
catch { }
|
|
93
|
+
}
|
|
94
|
+
if (stripped.startsWith('<') && text.includes('<?xml')) {
|
|
95
|
+
if (confidence > 0.3)
|
|
96
|
+
confidence -= 0.4;
|
|
97
|
+
}
|
|
98
|
+
return Math.min(Math.max(confidence, 0.0), 1.0);
|
|
99
|
+
}
|
|
100
|
+
getMimeType(contentSample, lines, firstLine, fileExtension) {
|
|
101
|
+
return this.detect(contentSample, lines, firstLine, fileExtension) > 0.5 ? 'text/markdown' : 'text/plain';
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
105
|
+
// PlainText Detector
|
|
106
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
107
|
+
export class PlainTextDetector {
|
|
108
|
+
contentTypeName = "text";
|
|
109
|
+
static IMAGE_EXTS = ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.svg', '.webp'];
|
|
110
|
+
detect(contentSample, lines, firstLine, fileExtension) {
|
|
111
|
+
if (!contentSample && lines.length === 0)
|
|
112
|
+
return 0.1;
|
|
113
|
+
if (fileExtension) {
|
|
114
|
+
const ext = fileExtension.toLowerCase();
|
|
115
|
+
if (PlainTextDetector.IMAGE_EXTS.includes(ext) || ext === '.pdf')
|
|
116
|
+
return 0.0;
|
|
117
|
+
}
|
|
118
|
+
const text = typeof contentSample === 'string' ? contentSample : new TextDecoder().decode(contentSample);
|
|
119
|
+
if (text.includes(',') && lines.length < 5) {
|
|
120
|
+
// Ambiguous CSV check
|
|
121
|
+
const commaLines = lines.filter(l => l.includes(',')).length;
|
|
122
|
+
if (commaLines > 0 && commaLines === lines.length) {
|
|
123
|
+
return 0.8;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return 0.15;
|
|
127
|
+
}
|
|
128
|
+
getMimeType(contentSample, lines, firstLine, fileExtension) {
|
|
129
|
+
return 'text/plain';
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
//# sourceMappingURL=MarkupDetectors.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"MarkupDetectors.js","sourceRoot":"","sources":["../../../src/model/detectors/MarkupDetectors.ts"],"names":[],"mappings":"AAEA,gFAAgF;AAChF,eAAe;AACf,gFAAgF;AAChF,MAAM,OAAO,WAAW;IACX,eAAe,GAAG,KAAK,CAAC;IACzB,MAAM,CAAU,eAAe,GAAG,aAAa,CAAC;IAChD,MAAM,CAAU,cAAc,GAAG,wBAAwB,CAAC;IAElE,MAAM,CACF,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,MAAM,IAAI,GAAG,OAAO,aAAa,KAAK,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QACzG,IAAI,UAAU,GAAG,GAAG,CAAC;QAErB,IAAI,aAAa,IAAI,aAAa,CAAC,WAAW,EAAE,KAAK,MAAM,EAAE,CAAC;YAC1D,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QAC5C,CAAC;QAED,IAAI,WAAW,CAAC,eAAe,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;YACjF,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QAC5C,CAAC;QAED,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YAClE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;YACvC,IAAI,WAAW,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBACxC,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;YAC3C,CAAC;QACL,CAAC;QAED,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;YAChD,IAAI,UAAU,GAAG,GAAG;gBAAE,UAAU,IAAI,GAAG,CAAC;QAC5C,CAAC;QAED,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;IACpD,CAAC;IAED,WAAW,CACP,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,MAAM,IAAI,GAAG,OAAO,aAAa,KAAK,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QAEzG,IAAI,aAAa,KAAK,MAAM;YAAE,OAAO,iBAAiB,CAAC;QACvD,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,OAAO,eAAe,CAAC;QAChE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,gBAAgB,CAAC;YAAE,OAAO,WAAW,CAAC;QAE9G,IAAI,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,KAAK,EAAE,SAAS,EAAE,aAAa,CAAC,GAAG,GAAG;YAAE,OAAO,iBAAiB,CAAC;QAEhG,OAAO,YAAY,CAAC;IACxB,CAAC;;AAGL,gFAAgF;AAChF,oBAAoB;AACpB,gFAAgF;AAChF,MAAM,OAAO,gBAAgB;IAChB,eAAe,GAAG,UAAU,CAAC;IAE9B,MAAM,CAAU,WAAW,GAAG;QAClC,eAAe,EAAc,cAAc;QAC3C,oBAAoB,EAAS,aAAa;QAC1C,iBAAiB,EAAY,qBAAqB;QAClD,mBAAmB,EAAU,cAAc;QAC3C,sBAAsB,EAAO,QAAQ;QACrC,uBAAuB,EAAM,SAAS;QACtC,SAAS,CAAoB,cAAc;KAC9C,CAAC;IACM,MAAM,CAAU,aAAa,GAAG,2BAA2B,CAAC;IAEpE,MAAM,CACF,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,MAAM,IAAI,GAAG,OAAO,aAAa,KAAK,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QACzG,IAAI,UAAU,GAAG,GAAG,CAAC;QAErB,IAAI,aAAa,IAAI,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;YAC9E,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QAC5C,CAAC;QAED,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,gBAAgB,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,UAAU,IAAI,CAAC,CAAC;QAE/D,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;YACpC,IAAI,gBAAgB,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;gBACvD,UAAU,EAAE,CAAC;YACjB,CAAC;QACL,CAAC;QAED,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;QAC1C,IAAI,YAAY;YAAE,UAAU,EAAE,CAAC;QAE/B,IAAI,UAAU,GAAG,CAAC,IAAI,YAAY;YAAE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QAC5E,IAAI,UAAU,GAAG,CAAC,IAAI,YAAY;YAAE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;aACvE,IAAI,UAAU,GAAG,CAAC;YAAE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;aAC3D,IAAI,UAAU,GAAG,CAAC;YAAE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;aAC3D,IAAI,UAAU,GAAG,CAAC;YAAE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;QAEhE,kBAAkB;QAClB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC7B,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAC/G,IAAI,CAAC;gBAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAAC,IAAI,UAAU,GAAG,GAAG;oBAAE,UAAU,IAAI,GAAG,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC,CAAC,CAAC;QAChF,CAAC;QACD,IAAI,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YACrD,IAAI,UAAU,GAAG,GAAG;gBAAE,UAAU,IAAI,GAAG,CAAC;QAC5C,CAAC;QAED,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;IACpD,CAAC;IAED,WAAW,CACP,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,OAAO,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,KAAK,EAAE,SAAS,EAAE,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,YAAY,CAAC;IAC9G,CAAC;;AAGL,gFAAgF;AAChF,qBAAqB;AACrB,gFAAgF;AAChF,MAAM,OAAO,iBAAiB;IACjB,eAAe,GAAG,MAAM,CAAC;IAC1B,MAAM,CAAU,UAAU,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;IAEhG,MAAM,CACF,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,IAAI,CAAC,aAAa,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,GAAG,CAAC;QAErD,IAAI,aAAa,EAAE,CAAC;YAChB,MAAM,GAAG,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC;YACxC,IAAI,iBAAiB,CAAC,UAAU,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,GAAG,KAAK,MAAM;gBAAE,OAAO,GAAG,CAAC;QACjF,CAAC;QAED,MAAM,IAAI,GAAG,OAAO,aAAa,KAAK,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QACzG,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzC,sBAAsB;YACtB,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;YAC7D,IAAI,UAAU,GAAG,CAAC,IAAI,UAAU,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC;gBAChD,OAAO,GAAG,CAAC;YACf,CAAC;QACL,CAAC;QAED,OAAO,IAAI,CAAC;IAChB,CAAC;IAED,WAAW,CACP,aAAkC,EAClC,KAAe,EACf,SAAiB,EACjB,aAAsB;QAEtB,OAAO,YAAY,CAAC;IACxB,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { BaseDetector } from './BaseDetector';
|
|
2
|
+
export declare class OBJDetector implements BaseDetector {
|
|
3
|
+
readonly contentTypeName = "obj";
|
|
4
|
+
private static readonly COMMANDS;
|
|
5
|
+
detect(contentSample: string | Uint8Array, lines: string[], firstLine: string, fileExtension?: string): number;
|
|
6
|
+
getMimeType(contentSample: string | Uint8Array, lines: string[], firstLine: string, fileExtension?: string): string | null;
|
|
7
|
+
}
|
|
8
|
+
//# sourceMappingURL=OBJDetector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"OBJDetector.d.ts","sourceRoot":"","sources":["../../../src/model/detectors/OBJDetector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE9C,qBAAa,WAAY,YAAW,YAAY;IAC5C,QAAQ,CAAC,eAAe,SAAS;IAGjC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAsE;IAEtG,MAAM,CACF,aAAa,EAAE,MAAM,GAAG,UAAU,EAClC,KAAK,EAAE,MAAM,EAAE,EACf,SAAS,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,GACvB,MAAM;IA2CT,WAAW,CACP,aAAa,EAAE,MAAM,GAAG,UAAU,EAClC,KAAK,EAAE,MAAM,EAAE,EACf,SAAS,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,GACvB,MAAM,GAAG,IAAI;CAGnB"}
|