@j0hanz/superfetch 2.5.3 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +350 -226
- package/dist/assets/logo.svg +24837 -24835
- package/dist/cache.d.ts +28 -20
- package/dist/cache.js +292 -514
- package/dist/config.d.ts +41 -7
- package/dist/config.js +298 -148
- package/dist/crypto.js +25 -12
- package/dist/dom-noise-removal.js +379 -421
- package/dist/errors.d.ts +2 -2
- package/dist/errors.js +25 -8
- package/dist/fetch.d.ts +18 -16
- package/dist/fetch.js +1132 -526
- package/dist/host-normalization.js +40 -10
- package/dist/http-native.js +628 -287
- package/dist/index.js +67 -7
- package/dist/instructions.md +44 -31
- package/dist/ip-blocklist.d.ts +8 -0
- package/dist/ip-blocklist.js +65 -0
- package/dist/json.js +14 -9
- package/dist/language-detection.d.ts +2 -11
- package/dist/language-detection.js +289 -280
- package/dist/markdown-cleanup.d.ts +0 -1
- package/dist/markdown-cleanup.js +391 -429
- package/dist/mcp-validator.js +4 -2
- package/dist/mcp.js +184 -135
- package/dist/observability.js +89 -21
- package/dist/resources.js +16 -6
- package/dist/server-tuning.d.ts +2 -0
- package/dist/server-tuning.js +25 -23
- package/dist/session.d.ts +1 -0
- package/dist/session.js +41 -33
- package/dist/tasks.d.ts +2 -0
- package/dist/tasks.js +91 -9
- package/dist/timer-utils.d.ts +5 -0
- package/dist/timer-utils.js +20 -0
- package/dist/tools.d.ts +28 -5
- package/dist/tools.js +317 -183
- package/dist/transform-types.d.ts +5 -1
- package/dist/transform.d.ts +3 -2
- package/dist/transform.js +1138 -421
- package/dist/type-guards.d.ts +1 -0
- package/dist/type-guards.js +7 -0
- package/dist/workers/transform-child.d.ts +1 -0
- package/dist/workers/transform-child.js +118 -0
- package/dist/workers/transform-worker.js +87 -78
- package/package.json +14 -6
|
@@ -1,331 +1,340 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
lines: code.split('\n'),
|
|
10
|
-
trimmedStart: code.trimStart(),
|
|
11
|
-
};
|
|
12
|
-
}
|
|
13
|
-
/* -------------------------------------------------------------------------------------------------
|
|
14
|
-
* Word boundary matcher (cached)
|
|
15
|
-
* ------------------------------------------------------------------------------------------------- */
|
|
16
|
-
class WordBoundaryMatcher {
|
|
17
|
-
cache = new Map();
|
|
18
|
-
containsWord(source, word) {
|
|
19
|
-
return this.getRegex(word).test(source);
|
|
1
|
+
// This module provides a heuristic-based language detection mechanism for code snippets.
|
|
2
|
+
class DetectionContext {
|
|
3
|
+
code;
|
|
4
|
+
_lower;
|
|
5
|
+
_lines;
|
|
6
|
+
_trimmedStart;
|
|
7
|
+
constructor(code) {
|
|
8
|
+
this.code = code;
|
|
20
9
|
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
return cached;
|
|
25
|
-
// Keep behavior: compile `\b${word}\b` without escaping (words are controlled by patterns).
|
|
26
|
-
const compiled = new RegExp(`\\b${word}\\b`);
|
|
27
|
-
this.cache.set(word, compiled);
|
|
28
|
-
return compiled;
|
|
10
|
+
get lower() {
|
|
11
|
+
this._lower ??= this.code.toLowerCase();
|
|
12
|
+
return this._lower;
|
|
29
13
|
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
* Attribute-based language resolution
|
|
34
|
-
* ------------------------------------------------------------------------------------------------- */
|
|
35
|
-
class LanguageAttributeResolver {
|
|
36
|
-
resolve(className, dataLang) {
|
|
37
|
-
const classMatch = this.extractFromClassName(className);
|
|
38
|
-
return classMatch ?? this.resolveFromDataAttribute(dataLang);
|
|
14
|
+
get lines() {
|
|
15
|
+
this._lines ??= this.code.split(/\r?\n/);
|
|
16
|
+
return this._lines;
|
|
39
17
|
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
18
|
+
get trimmedStart() {
|
|
19
|
+
this._trimmedStart ??= this.code.trimStart();
|
|
20
|
+
return this._trimmedStart;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
const BASH_COMMANDS = new Set([
|
|
24
|
+
'sudo',
|
|
25
|
+
'chmod',
|
|
26
|
+
'mkdir',
|
|
27
|
+
'cd',
|
|
28
|
+
'ls',
|
|
29
|
+
'cat',
|
|
30
|
+
'echo',
|
|
31
|
+
]);
|
|
32
|
+
const BASH_PACKAGE_MANAGERS = [
|
|
33
|
+
'npm',
|
|
34
|
+
'yarn',
|
|
35
|
+
'pnpm',
|
|
36
|
+
'npx',
|
|
37
|
+
'brew',
|
|
38
|
+
'apt',
|
|
39
|
+
'pip',
|
|
40
|
+
'cargo',
|
|
41
|
+
'go',
|
|
42
|
+
];
|
|
43
|
+
const BASH_VERBS = new Set(['install', 'add', 'run', 'build', 'start']);
|
|
44
|
+
const TYPESCRIPT_HINTS = [
|
|
45
|
+
': string',
|
|
46
|
+
':string',
|
|
47
|
+
': number',
|
|
48
|
+
':number',
|
|
49
|
+
': boolean',
|
|
50
|
+
':boolean',
|
|
51
|
+
': void',
|
|
52
|
+
':void',
|
|
53
|
+
': any',
|
|
54
|
+
':any',
|
|
55
|
+
': unknown',
|
|
56
|
+
':unknown',
|
|
57
|
+
': never',
|
|
58
|
+
':never',
|
|
59
|
+
];
|
|
60
|
+
const HTML_TAGS = [
|
|
61
|
+
'<!doctype',
|
|
62
|
+
'<html',
|
|
63
|
+
'<head',
|
|
64
|
+
'<body',
|
|
65
|
+
'<div',
|
|
66
|
+
'<span',
|
|
67
|
+
'<p',
|
|
68
|
+
'<a',
|
|
69
|
+
'<script',
|
|
70
|
+
'<style',
|
|
71
|
+
];
|
|
72
|
+
const RUST_REGEX = /\b(?:fn|impl|struct|enum)\b/;
|
|
73
|
+
const JS_REGEX = /\b(?:const|let|var|function|class|async|await|export|import)\b/;
|
|
74
|
+
const PYTHON_REGEX = /\b(?:def|class|import|from)\b/;
|
|
75
|
+
const CSS_REGEX = /@media|@import|@keyframes/;
|
|
76
|
+
function containsJsxTag(code) {
|
|
77
|
+
const len = code.length;
|
|
78
|
+
for (let i = 0; i < len - 1; i++) {
|
|
79
|
+
if (code.charCodeAt(i) === 60 /* < */) {
|
|
80
|
+
const next = code.charCodeAt(i + 1);
|
|
81
|
+
if (next >= 65 && next <= 90)
|
|
82
|
+
return true; // A-Z
|
|
56
83
|
}
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
84
|
+
}
|
|
85
|
+
return false;
|
|
86
|
+
}
|
|
87
|
+
function isBashLine(line) {
|
|
88
|
+
const trimmed = line.trimStart();
|
|
89
|
+
if (trimmed.length === 0)
|
|
90
|
+
return false;
|
|
91
|
+
// Shell Prefix
|
|
92
|
+
if (trimmed.startsWith('#!') ||
|
|
93
|
+
trimmed.startsWith('$ ') ||
|
|
94
|
+
trimmed.startsWith('# ')) {
|
|
95
|
+
return true;
|
|
96
|
+
}
|
|
97
|
+
const spaceIdx = trimmed.indexOf(' ');
|
|
98
|
+
const firstWord = spaceIdx === -1 ? trimmed : trimmed.slice(0, spaceIdx);
|
|
99
|
+
if (BASH_COMMANDS.has(firstWord))
|
|
100
|
+
return true;
|
|
101
|
+
// Package Managers
|
|
102
|
+
let isPkgMgr = false;
|
|
103
|
+
for (const mgr of BASH_PACKAGE_MANAGERS) {
|
|
104
|
+
if (firstWord === mgr) {
|
|
105
|
+
isPkgMgr = true;
|
|
106
|
+
break;
|
|
61
107
|
}
|
|
62
|
-
return undefined;
|
|
63
108
|
}
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
if (!trimmed)
|
|
71
|
-
return undefined;
|
|
72
|
-
return /^\w+$/.test(trimmed) ? trimmed : undefined;
|
|
109
|
+
if (isPkgMgr && spaceIdx !== -1) {
|
|
110
|
+
const rest = trimmed.slice(spaceIdx + 1);
|
|
111
|
+
const secondSpaceIdx = rest.indexOf(' ');
|
|
112
|
+
const secondWord = secondSpaceIdx === -1 ? rest : rest.slice(0, secondSpaceIdx);
|
|
113
|
+
if (BASH_VERBS.has(secondWord))
|
|
114
|
+
return true;
|
|
73
115
|
}
|
|
116
|
+
return false;
|
|
74
117
|
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
118
|
+
function detectBashIndicators(lines) {
|
|
119
|
+
for (const line of lines) {
|
|
120
|
+
if (isBashLine(line))
|
|
121
|
+
return true;
|
|
122
|
+
}
|
|
123
|
+
return false;
|
|
124
|
+
}
|
|
125
|
+
function detectCssStructure(lines) {
|
|
126
|
+
for (const line of lines) {
|
|
127
|
+
const trimmed = line.trimStart();
|
|
128
|
+
if (trimmed.length === 0)
|
|
129
|
+
continue;
|
|
130
|
+
const hasSelector = (trimmed.startsWith('.') || trimmed.startsWith('#')) &&
|
|
131
|
+
trimmed.includes('{');
|
|
132
|
+
if (hasSelector || (trimmed.includes(':') && trimmed.includes(';'))) {
|
|
133
|
+
return true;
|
|
90
134
|
}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
matchesCommand(line) {
|
|
111
|
-
return Heuristics.bash.commands.some((cmd) => line === cmd || line.startsWith(`${cmd} `));
|
|
112
|
-
},
|
|
113
|
-
matchesPackageManagerVerb(line) {
|
|
114
|
-
for (const mgr of Heuristics.bash.pkgManagers) {
|
|
115
|
-
if (!line.startsWith(`${mgr} `))
|
|
116
|
-
continue;
|
|
117
|
-
const rest = line.slice(mgr.length + 1);
|
|
118
|
-
if (Heuristics.bash.verbs.some((v) => rest === v || rest.startsWith(`${v} `))) {
|
|
119
|
-
return true;
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
return false;
|
|
123
|
-
},
|
|
124
|
-
detectIndicators(lines) {
|
|
125
|
-
for (const line of lines) {
|
|
126
|
-
const trimmed = line.trimStart();
|
|
127
|
-
if (trimmed &&
|
|
128
|
-
(Heuristics.bash.isShellPrefix(trimmed) ||
|
|
129
|
-
Heuristics.bash.matchesCommand(trimmed) ||
|
|
130
|
-
Heuristics.bash.matchesPackageManagerVerb(trimmed))) {
|
|
131
|
-
return true;
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
return false;
|
|
135
|
-
},
|
|
136
|
-
},
|
|
137
|
-
css: {
|
|
138
|
-
detectStructure(lines) {
|
|
139
|
-
for (const line of lines) {
|
|
140
|
-
const trimmed = line.trimStart();
|
|
141
|
-
if (!trimmed)
|
|
142
|
-
continue;
|
|
143
|
-
const hasSelector = (trimmed.startsWith('.') || trimmed.startsWith('#')) &&
|
|
144
|
-
trimmed.includes('{');
|
|
145
|
-
if (hasSelector || (trimmed.includes(':') && trimmed.includes(';'))) {
|
|
146
|
-
return true;
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
return false;
|
|
150
|
-
},
|
|
151
|
-
},
|
|
152
|
-
yaml: {
|
|
153
|
-
detectStructure(lines) {
|
|
154
|
-
for (const line of lines) {
|
|
155
|
-
const trimmed = line.trim();
|
|
156
|
-
if (!trimmed)
|
|
157
|
-
continue;
|
|
158
|
-
const colonIdx = trimmed.indexOf(':');
|
|
159
|
-
if (colonIdx > 0) {
|
|
160
|
-
const after = trimmed[colonIdx + 1];
|
|
161
|
-
if (after === ' ' || after === '\t')
|
|
162
|
-
return true;
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
return false;
|
|
166
|
-
},
|
|
167
|
-
},
|
|
168
|
-
};
|
|
169
|
-
/* -------------------------------------------------------------------------------------------------
|
|
170
|
-
* Pattern engine
|
|
171
|
-
* ------------------------------------------------------------------------------------------------- */
|
|
172
|
-
const LANGUAGE_PATTERNS = [
|
|
135
|
+
}
|
|
136
|
+
return false;
|
|
137
|
+
}
|
|
138
|
+
function detectYamlStructure(lines) {
|
|
139
|
+
for (const line of lines) {
|
|
140
|
+
const trimmed = line.trim();
|
|
141
|
+
if (trimmed.length === 0)
|
|
142
|
+
continue;
|
|
143
|
+
const colonIdx = trimmed.indexOf(':');
|
|
144
|
+
if (colonIdx <= 0)
|
|
145
|
+
continue;
|
|
146
|
+
const after = trimmed.charCodeAt(colonIdx + 1);
|
|
147
|
+
// space (32) or tab (9)
|
|
148
|
+
if (after === 32 || after === 9)
|
|
149
|
+
return true;
|
|
150
|
+
}
|
|
151
|
+
return false;
|
|
152
|
+
}
|
|
153
|
+
const LANGUAGES = [
|
|
173
154
|
{
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
155
|
+
lang: 'rust',
|
|
156
|
+
weight: 25,
|
|
157
|
+
match: (ctx) => {
|
|
158
|
+
if (ctx.lower.includes('let mut'))
|
|
159
|
+
return true;
|
|
160
|
+
if (RUST_REGEX.test(ctx.lower))
|
|
161
|
+
return true;
|
|
162
|
+
return ctx.lower.includes('use ') && ctx.lower.includes('::');
|
|
178
163
|
},
|
|
179
164
|
},
|
|
180
165
|
{
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
': number',
|
|
188
|
-
':number',
|
|
189
|
-
': boolean',
|
|
190
|
-
':boolean',
|
|
191
|
-
': void',
|
|
192
|
-
':void',
|
|
193
|
-
': any',
|
|
194
|
-
':any',
|
|
195
|
-
': unknown',
|
|
196
|
-
':unknown',
|
|
197
|
-
': never',
|
|
198
|
-
':never',
|
|
199
|
-
].some((hint) => lower.includes(hint)),
|
|
166
|
+
lang: 'go',
|
|
167
|
+
weight: 22,
|
|
168
|
+
match: (ctx) => {
|
|
169
|
+
if (ctx.lower.includes('import "'))
|
|
170
|
+
return true;
|
|
171
|
+
return /\b(?:package|func)\b/.test(ctx.lower);
|
|
200
172
|
},
|
|
201
173
|
},
|
|
202
174
|
{
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
175
|
+
lang: 'jsx',
|
|
176
|
+
weight: 22,
|
|
177
|
+
match: (ctx) => {
|
|
178
|
+
const l = ctx.lower;
|
|
179
|
+
if (l.includes('classname=') ||
|
|
180
|
+
l.includes('jsx:') ||
|
|
181
|
+
l.includes("from 'react'") ||
|
|
182
|
+
l.includes('from "react"')) {
|
|
183
|
+
return true;
|
|
184
|
+
}
|
|
185
|
+
return containsJsxTag(ctx.code);
|
|
208
186
|
},
|
|
209
187
|
},
|
|
210
188
|
{
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
189
|
+
lang: 'typescript',
|
|
190
|
+
weight: 20,
|
|
191
|
+
match: (ctx) => {
|
|
192
|
+
if (/\b(?:interface|type)\b/.test(ctx.lower))
|
|
193
|
+
return true;
|
|
194
|
+
const l = ctx.lower;
|
|
195
|
+
for (const hint of TYPESCRIPT_HINTS) {
|
|
196
|
+
if (l.includes(hint))
|
|
197
|
+
return true;
|
|
198
|
+
}
|
|
199
|
+
return false;
|
|
214
200
|
},
|
|
215
201
|
},
|
|
216
202
|
{
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
203
|
+
lang: 'sql',
|
|
204
|
+
weight: 20,
|
|
205
|
+
match: (ctx) => {
|
|
206
|
+
const l = ctx.lower;
|
|
207
|
+
return /\b(?:select|insert|update|delete|create|alter|drop)\b/.test(l);
|
|
221
208
|
},
|
|
222
209
|
},
|
|
223
210
|
{
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
211
|
+
lang: 'python',
|
|
212
|
+
weight: 18,
|
|
213
|
+
match: (ctx) => {
|
|
214
|
+
const l = ctx.lower;
|
|
215
|
+
if (l.includes('print(') || l.includes('__name__'))
|
|
216
|
+
return true;
|
|
217
|
+
return PYTHON_REGEX.test(l);
|
|
227
218
|
},
|
|
228
219
|
},
|
|
229
220
|
{
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
221
|
+
lang: 'css',
|
|
222
|
+
weight: 18,
|
|
223
|
+
match: (ctx) => {
|
|
224
|
+
if (CSS_REGEX.test(ctx.lower))
|
|
225
|
+
return true;
|
|
226
|
+
return detectCssStructure(ctx.lines);
|
|
234
227
|
},
|
|
235
228
|
},
|
|
236
229
|
{
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
'<!doctype',
|
|
241
|
-
'<html',
|
|
242
|
-
'<head',
|
|
243
|
-
'<body',
|
|
244
|
-
'<div',
|
|
245
|
-
'<span',
|
|
246
|
-
'<p',
|
|
247
|
-
'<a',
|
|
248
|
-
'<script',
|
|
249
|
-
'<style',
|
|
250
|
-
],
|
|
251
|
-
},
|
|
230
|
+
lang: 'bash',
|
|
231
|
+
weight: 15,
|
|
232
|
+
match: (ctx) => detectBashIndicators(ctx.lines),
|
|
252
233
|
},
|
|
253
234
|
{
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
},
|
|
235
|
+
lang: 'yaml',
|
|
236
|
+
weight: 15,
|
|
237
|
+
match: (ctx) => detectYamlStructure(ctx.lines),
|
|
258
238
|
},
|
|
259
239
|
{
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
},
|
|
240
|
+
lang: 'javascript',
|
|
241
|
+
weight: 12,
|
|
242
|
+
match: (ctx) => JS_REGEX.test(ctx.lower),
|
|
264
243
|
},
|
|
265
244
|
{
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
'drop',
|
|
276
|
-
],
|
|
245
|
+
lang: 'html',
|
|
246
|
+
weight: 12,
|
|
247
|
+
match: (ctx) => {
|
|
248
|
+
const l = ctx.lower;
|
|
249
|
+
for (const tag of HTML_TAGS) {
|
|
250
|
+
if (l.includes(tag))
|
|
251
|
+
return true;
|
|
252
|
+
}
|
|
253
|
+
return false;
|
|
277
254
|
},
|
|
278
255
|
},
|
|
279
256
|
{
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
257
|
+
lang: 'json',
|
|
258
|
+
weight: 10,
|
|
259
|
+
match: (ctx) => {
|
|
260
|
+
const s = ctx.trimmedStart;
|
|
261
|
+
return s.startsWith('{') || s.startsWith('[');
|
|
284
262
|
},
|
|
285
263
|
},
|
|
286
264
|
];
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
265
|
+
export function extractLanguageFromClassName(className) {
|
|
266
|
+
if (!className)
|
|
267
|
+
return undefined;
|
|
268
|
+
// Split by whitespace and check for language indicators
|
|
269
|
+
const tokens = className.match(/\S+/g);
|
|
270
|
+
if (!tokens)
|
|
271
|
+
return undefined;
|
|
272
|
+
// Fast path: check for prefixes
|
|
273
|
+
for (const token of tokens) {
|
|
274
|
+
const lower = token.toLowerCase();
|
|
275
|
+
if (lower.startsWith('language-'))
|
|
276
|
+
return token.slice(9);
|
|
277
|
+
if (lower.startsWith('lang-'))
|
|
278
|
+
return token.slice(5);
|
|
279
|
+
if (lower.startsWith('highlight-'))
|
|
280
|
+
return token.slice(10);
|
|
301
281
|
}
|
|
282
|
+
// Fallback: check for hljs context
|
|
283
|
+
if (!tokens.includes('hljs'))
|
|
284
|
+
return undefined;
|
|
285
|
+
const langClass = tokens.find((t) => {
|
|
286
|
+
const l = t.toLowerCase();
|
|
287
|
+
return l !== 'hljs' && !l.startsWith('hljs-');
|
|
288
|
+
});
|
|
289
|
+
return langClass;
|
|
302
290
|
}
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
const sample = createCodeSample(code);
|
|
307
|
-
for (const { language, pattern } of LANGUAGE_PATTERNS) {
|
|
308
|
-
if (this.engine.matches(sample, pattern))
|
|
309
|
-
return language;
|
|
310
|
-
}
|
|
291
|
+
function resolveLanguageFromDataAttribute(dataLang) {
|
|
292
|
+
const trimmed = dataLang.trim();
|
|
293
|
+
if (!trimmed)
|
|
311
294
|
return undefined;
|
|
295
|
+
// Check if \w+
|
|
296
|
+
for (let i = 0; i < trimmed.length; i++) {
|
|
297
|
+
const c = trimmed.charCodeAt(i);
|
|
298
|
+
// valid: A-Z, a-z, 0-9, _
|
|
299
|
+
const isUpper = c >= 65 && c <= 90;
|
|
300
|
+
const isLower = c >= 97 && c <= 122;
|
|
301
|
+
const isDigit = c >= 48 && c <= 57;
|
|
302
|
+
const isUnder = c === 95;
|
|
303
|
+
if (!isUpper && !isLower && !isDigit && !isUnder) {
|
|
304
|
+
return undefined;
|
|
305
|
+
}
|
|
312
306
|
}
|
|
307
|
+
return trimmed;
|
|
308
|
+
}
|
|
309
|
+
export function resolveLanguageFromAttributes(className, dataLang) {
|
|
310
|
+
return (extractLanguageFromClassName(className) ??
|
|
311
|
+
resolveLanguageFromDataAttribute(dataLang));
|
|
313
312
|
}
|
|
314
|
-
const detector = new LanguageDetector();
|
|
315
|
-
/* -------------------------------------------------------------------------------------------------
|
|
316
|
-
* Public API
|
|
317
|
-
* ------------------------------------------------------------------------------------------------- */
|
|
318
|
-
/**
|
|
319
|
-
* Detect programming language from code content using heuristics.
|
|
320
|
-
*/
|
|
321
313
|
export function detectLanguageFromCode(code) {
|
|
322
|
-
if (!code
|
|
314
|
+
if (!code)
|
|
323
315
|
return undefined;
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
316
|
+
// Fast path for empty/whitespace only
|
|
317
|
+
let empty = true;
|
|
318
|
+
for (let i = 0; i < code.length; i++) {
|
|
319
|
+
if (code.charCodeAt(i) > 32) {
|
|
320
|
+
empty = false;
|
|
321
|
+
break;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
if (empty)
|
|
325
|
+
return undefined;
|
|
326
|
+
const ctx = new DetectionContext(code);
|
|
327
|
+
let bestLang;
|
|
328
|
+
let bestScore = -1;
|
|
329
|
+
for (const def of LANGUAGES) {
|
|
330
|
+
if (def.match(ctx)) {
|
|
331
|
+
if (def.weight > bestScore) {
|
|
332
|
+
bestScore = def.weight;
|
|
333
|
+
bestLang = def.lang;
|
|
334
|
+
if (bestScore >= 25)
|
|
335
|
+
break;
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
return bestLang;
|
|
331
340
|
}
|
|
@@ -3,5 +3,4 @@ export declare function cleanupMarkdownArtifacts(content: string): string;
|
|
|
3
3
|
export declare function extractTitleFromRawMarkdown(content: string): string | undefined;
|
|
4
4
|
export declare function addSourceToMarkdown(content: string, url: string): string;
|
|
5
5
|
export declare function isRawTextContent(content: string): boolean;
|
|
6
|
-
export declare function isLikelyHtmlContent(content: string): boolean;
|
|
7
6
|
export declare function buildMetadataFooter(metadata?: MetadataBlock, fallbackUrl?: string): string;
|