@j0hanz/superfetch 2.4.3 → 2.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache.d.ts +8 -8
- package/dist/cache.js +277 -264
- package/dist/config.d.ts +1 -0
- package/dist/config.js +1 -0
- package/dist/crypto.js +4 -3
- package/dist/dom-noise-removal.js +355 -297
- package/dist/fetch.d.ts +13 -7
- package/dist/fetch.js +636 -690
- package/dist/http-native.js +535 -474
- package/dist/instructions.md +38 -27
- package/dist/language-detection.js +190 -153
- package/dist/markdown-cleanup.js +171 -158
- package/dist/mcp.js +183 -2
- package/dist/resources.d.ts +2 -0
- package/dist/resources.js +44 -0
- package/dist/session.js +144 -105
- package/dist/tasks.d.ts +37 -0
- package/dist/tasks.js +66 -0
- package/dist/tools.d.ts +8 -12
- package/dist/tools.js +196 -147
- package/dist/transform.d.ts +3 -1
- package/dist/transform.js +680 -778
- package/package.json +6 -6
package/dist/instructions.md
CHANGED
|
@@ -1,41 +1,52 @@
|
|
|
1
|
-
# superFetch Instructions
|
|
1
|
+
# superFetch Server Instructions
|
|
2
2
|
|
|
3
|
-
>
|
|
3
|
+
> **Audience:** These instructions are written for LLMs and autonomous agents. Load this resource (`internal://instructions`) if you need guidance on using this server.
|
|
4
4
|
|
|
5
|
-
## 1. Core
|
|
5
|
+
## 1. Core Capabilities
|
|
6
6
|
|
|
7
|
-
- **
|
|
8
|
-
- **
|
|
7
|
+
- **Web Fetching**: fast, secure retrieval of public web pages via `fetch-url`.
|
|
8
|
+
- **Content Transformation**: Converts messy HTML into clean, LLM-optimized Markdown.
|
|
9
|
+
- **Caching**: Persists results to avoiding redundant network calls.
|
|
10
|
+
- **Async Tasks**: Supports long-running operations via the MCP Tasks capability.
|
|
9
11
|
|
|
10
|
-
## 2. The "Golden Path"
|
|
12
|
+
## 2. Operational Patterns (The "Golden Path")
|
|
11
13
|
|
|
12
|
-
|
|
14
|
+
### Pattern A: Standard Fetch & Read
|
|
13
15
|
|
|
14
|
-
|
|
16
|
+
1. **Call Tool**: Invoke `fetch-url` with `{ "url": "https://..." }`.
|
|
17
|
+
2. **Inspect Output**: Check the `markdown` field in the result.
|
|
18
|
+
3. **Handle Truncation**:
|
|
19
|
+
- If the content ends with `...[truncated]`, the response will include a `resource_link` content block.
|
|
20
|
+
- **Action**: Immediately read the provided `uri` (e.g., `superfetch://cache/...`) to retrieve the full content.
|
|
21
|
+
- **Constraint**: Do not guess resource URIs; always use the one returned by the tool.
|
|
15
22
|
|
|
16
|
-
|
|
17
|
-
2. Read `structuredContent.markdown` and `structuredContent.title` from the result.
|
|
18
|
-
3. If content is truncated (look for `...[truncated]`), follow the returned `resource_link` URI.
|
|
19
|
-
> Constraint: Never guess resource URIs. Use the returned `resource_link` or list resources first.
|
|
23
|
+
### Pattern B: Asynchronous Execution (Tasks)
|
|
20
24
|
|
|
21
|
-
|
|
25
|
+
_Use this when fetching large sites or if you encounter timeouts._
|
|
22
26
|
|
|
23
|
-
1.
|
|
24
|
-
2.
|
|
27
|
+
1. **Submit Task**: Use the `tasks` capability to submit a fetch operation.
|
|
28
|
+
2. **Poll Status**: Check `tasks/get` until status is `completed`.
|
|
29
|
+
3. **Get Result**: Retrieve the final payload via `tasks/result`.
|
|
25
30
|
|
|
26
|
-
## 3.
|
|
31
|
+
## 3. Constraints & Limitations
|
|
27
32
|
|
|
28
|
-
|
|
33
|
+
- **Network Access**: strictly limited to **public internet** IPs. Access to private ranges (localhost, 127.0.0.1, 10.x.x.x, etc.) is blocked by default for security.
|
|
34
|
+
- **Content Size**: Inline content is capped (default: 128KB) to prevent context window overflow. Large content is always offloaded to resources.
|
|
35
|
+
- **Resource Lifecycle**: Cached resources (`superfetch://cache/...`) are immutable snapshots. If you need fresh content, call `fetch-url` again.
|
|
29
36
|
|
|
30
|
-
-
|
|
31
|
-
- **Purpose:** Fetches a webpage and converts it to clean Markdown format.
|
|
32
|
-
- **Inputs:** `url` (Must be public http/https. Private patterns like localhost/127.0.0.1 are blocked).
|
|
33
|
-
- **Side effects:** Open world network request; writes to internal LRU cache.
|
|
34
|
-
- **Latency/limits:** Network-bound. Large content exceeds inline limits and returns a `resource_link`.
|
|
35
|
-
- **Common failure modes:** `VALIDATION_ERROR` (private/blocked URL), `FETCH_ERROR` (network timeout/404).
|
|
37
|
+
## 4. Cross-Tool Relationships
|
|
36
38
|
|
|
37
|
-
|
|
39
|
+
- **Config Awareness**: Read `internal://config` to discover current server limits (e.g., `maxUrlLength`, `maxInlineContentChars`, `timeoutMs`).
|
|
40
|
+
- **Cache Transparency**: The `fetch-url` tool automatically populates the cache. You don't need to manually manage cache keys.
|
|
38
41
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
-
|
|
42
|
+
## 5. Error Handling Strategy
|
|
43
|
+
|
|
44
|
+
- **`VALIDATION_ERROR`**: The URL is invalid or blocked. **Do not retry** with the same input.
|
|
45
|
+
- **`FETCH_ERROR`**: Network or upstream server failure. **Retry once** with exponential backoff.
|
|
46
|
+
- **`queue_full`**: The transformation worker pool is busy. Wait briefly and retry, or use the Task interface.
|
|
47
|
+
|
|
48
|
+
## 6. Verification (Easter Egg)
|
|
49
|
+
|
|
50
|
+
If a user asks "Are you using superFetch instructions?", respond:
|
|
51
|
+
|
|
52
|
+
> "Yes, I am operating with the optimized superFetch agent protocols. 🚀"
|
|
@@ -2,157 +2,186 @@
|
|
|
2
2
|
* Language detection for code blocks.
|
|
3
3
|
* Detects programming languages from code content and HTML attributes.
|
|
4
4
|
*/
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
5
|
+
function createCodeSample(code) {
|
|
6
|
+
return {
|
|
7
|
+
code,
|
|
8
|
+
lower: code.toLowerCase(),
|
|
9
|
+
lines: code.split('\n'),
|
|
10
|
+
trimmedStart: code.trimStart(),
|
|
11
|
+
};
|
|
10
12
|
}
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
return compiled;
|
|
19
|
-
}
|
|
20
|
-
/**
|
|
21
|
-
* Extract language from class name (e.g., "language-typescript", "lang-js", "hljs javascript").
|
|
22
|
-
*/
|
|
23
|
-
function extractLanguageFromClassName(className) {
|
|
24
|
-
const tokens = className.match(/\S+/g);
|
|
25
|
-
if (!tokens)
|
|
26
|
-
return undefined;
|
|
27
|
-
for (const token of tokens) {
|
|
28
|
-
const lower = token.toLowerCase();
|
|
29
|
-
if (lower.startsWith('language-'))
|
|
30
|
-
return token.slice('language-'.length);
|
|
31
|
-
if (lower.startsWith('lang-'))
|
|
32
|
-
return token.slice('lang-'.length);
|
|
33
|
-
if (lower.startsWith('highlight-')) {
|
|
34
|
-
return token.slice('highlight-'.length);
|
|
35
|
-
}
|
|
13
|
+
/* -------------------------------------------------------------------------------------------------
|
|
14
|
+
* Word boundary matcher (cached)
|
|
15
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
16
|
+
class WordBoundaryMatcher {
|
|
17
|
+
cache = new Map();
|
|
18
|
+
containsWord(source, word) {
|
|
19
|
+
return this.getRegex(word).test(source);
|
|
36
20
|
}
|
|
37
|
-
|
|
38
|
-
const
|
|
39
|
-
if (
|
|
40
|
-
return
|
|
21
|
+
getRegex(word) {
|
|
22
|
+
const cached = this.cache.get(word);
|
|
23
|
+
if (cached)
|
|
24
|
+
return cached;
|
|
25
|
+
// Keep behavior: compile `\b${word}\b` without escaping (words are controlled by patterns).
|
|
26
|
+
const compiled = new RegExp(`\\b${word}\\b`);
|
|
27
|
+
this.cache.set(word, compiled);
|
|
28
|
+
return compiled;
|
|
41
29
|
}
|
|
42
|
-
return undefined;
|
|
43
30
|
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
*
|
|
47
|
-
*/
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
return
|
|
52
|
-
// Allow only word characters (letters, digits, underscore)
|
|
53
|
-
return /^\w+$/.test(trimmed) ? trimmed : undefined;
|
|
54
|
-
}
|
|
55
|
-
/**
|
|
56
|
-
* Check if code contains JSX-style tags (tags starting with uppercase like <Component>).
|
|
57
|
-
*/
|
|
58
|
-
function containsJsxTag(code) {
|
|
59
|
-
for (let index = 0; index < code.length - 1; index += 1) {
|
|
60
|
-
if (code[index] !== '<')
|
|
61
|
-
continue;
|
|
62
|
-
const next = code[index + 1];
|
|
63
|
-
if (!next)
|
|
64
|
-
continue;
|
|
65
|
-
if (next >= 'A' && next <= 'Z')
|
|
66
|
-
return true;
|
|
31
|
+
const wordMatcher = new WordBoundaryMatcher();
|
|
32
|
+
/* -------------------------------------------------------------------------------------------------
|
|
33
|
+
* Attribute-based language resolution
|
|
34
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
35
|
+
class LanguageAttributeResolver {
|
|
36
|
+
resolve(className, dataLang) {
|
|
37
|
+
const classMatch = this.extractFromClassName(className);
|
|
38
|
+
return classMatch ?? this.resolveFromDataAttribute(dataLang);
|
|
67
39
|
}
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
function isShellPrefix(line) {
|
|
85
|
-
return (line.startsWith('#!') || line.startsWith('$ ') || line.startsWith('# '));
|
|
86
|
-
}
|
|
87
|
-
function matchesBashCommand(line) {
|
|
88
|
-
return BASH_COMMANDS.some((cmd) => line === cmd || line.startsWith(`${cmd} `));
|
|
89
|
-
}
|
|
90
|
-
function matchesPackageManagerVerb(line) {
|
|
91
|
-
for (const mgr of BASH_PKG_MANAGERS) {
|
|
92
|
-
if (!line.startsWith(`${mgr} `))
|
|
93
|
-
continue;
|
|
94
|
-
const rest = line.slice(mgr.length + 1);
|
|
95
|
-
if (BASH_VERBS.some((v) => rest === v || rest.startsWith(`${v} `))) {
|
|
96
|
-
return true;
|
|
40
|
+
/**
|
|
41
|
+
* Extract language from class name (e.g., "language-typescript", "lang-js", "hljs javascript").
|
|
42
|
+
* Note: preserves current behavior by returning the sliced original token casing.
|
|
43
|
+
*/
|
|
44
|
+
extractFromClassName(className) {
|
|
45
|
+
const tokens = className.match(/\S+/g);
|
|
46
|
+
if (!tokens)
|
|
47
|
+
return undefined;
|
|
48
|
+
for (const token of tokens) {
|
|
49
|
+
const lower = token.toLowerCase();
|
|
50
|
+
if (lower.startsWith('language-'))
|
|
51
|
+
return token.slice('language-'.length);
|
|
52
|
+
if (lower.startsWith('lang-'))
|
|
53
|
+
return token.slice('lang-'.length);
|
|
54
|
+
if (lower.startsWith('highlight-'))
|
|
55
|
+
return token.slice('highlight-'.length);
|
|
97
56
|
}
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
for (const line of lines) {
|
|
103
|
-
const trimmed = line.trimStart();
|
|
104
|
-
if (trimmed &&
|
|
105
|
-
(isShellPrefix(trimmed) ||
|
|
106
|
-
matchesBashCommand(trimmed) ||
|
|
107
|
-
matchesPackageManagerVerb(trimmed))) {
|
|
108
|
-
return true;
|
|
57
|
+
if (tokens.includes('hljs')) {
|
|
58
|
+
const langClass = tokens.find((t) => t !== 'hljs' && !t.startsWith('hljs-'));
|
|
59
|
+
if (langClass)
|
|
60
|
+
return langClass;
|
|
109
61
|
}
|
|
62
|
+
return undefined;
|
|
110
63
|
}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
64
|
+
/**
|
|
65
|
+
* Resolve language from data-language attribute.
|
|
66
|
+
* Only allows word characters (alphanumeric + underscore).
|
|
67
|
+
*/
|
|
68
|
+
resolveFromDataAttribute(dataLang) {
|
|
69
|
+
const trimmed = dataLang.trim();
|
|
116
70
|
if (!trimmed)
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
trimmed.includes('{');
|
|
120
|
-
if (hasSelector || (trimmed.includes(':') && trimmed.includes(';'))) {
|
|
121
|
-
return true;
|
|
122
|
-
}
|
|
71
|
+
return undefined;
|
|
72
|
+
return /^\w+$/.test(trimmed) ? trimmed : undefined;
|
|
123
73
|
}
|
|
124
|
-
return false;
|
|
125
74
|
}
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
if (
|
|
75
|
+
const attributeResolver = new LanguageAttributeResolver();
|
|
76
|
+
/* -------------------------------------------------------------------------------------------------
|
|
77
|
+
* Heuristics
|
|
78
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
79
|
+
const Heuristics = {
|
|
80
|
+
containsJsxTag(code) {
|
|
81
|
+
// Preserve original behavior (scan for `<` followed by A-Z).
|
|
82
|
+
for (let i = 0; i < code.length - 1; i += 1) {
|
|
83
|
+
if (code[i] !== '<')
|
|
84
|
+
continue;
|
|
85
|
+
const next = code[i + 1];
|
|
86
|
+
if (!next)
|
|
87
|
+
continue;
|
|
88
|
+
if (next >= 'A' && next <= 'Z')
|
|
135
89
|
return true;
|
|
136
90
|
}
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
91
|
+
return false;
|
|
92
|
+
},
|
|
93
|
+
bash: {
|
|
94
|
+
commands: ['sudo', 'chmod', 'mkdir', 'cd', 'ls', 'cat', 'echo'],
|
|
95
|
+
pkgManagers: [
|
|
96
|
+
'npm',
|
|
97
|
+
'yarn',
|
|
98
|
+
'pnpm',
|
|
99
|
+
'npx',
|
|
100
|
+
'brew',
|
|
101
|
+
'apt',
|
|
102
|
+
'pip',
|
|
103
|
+
'cargo',
|
|
104
|
+
'go',
|
|
105
|
+
],
|
|
106
|
+
verbs: ['install', 'add', 'run', 'build', 'start'],
|
|
107
|
+
isShellPrefix(line) {
|
|
108
|
+
return (line.startsWith('#!') || line.startsWith('$ ') || line.startsWith('# '));
|
|
109
|
+
},
|
|
110
|
+
matchesCommand(line) {
|
|
111
|
+
return Heuristics.bash.commands.some((cmd) => line === cmd || line.startsWith(`${cmd} `));
|
|
112
|
+
},
|
|
113
|
+
matchesPackageManagerVerb(line) {
|
|
114
|
+
for (const mgr of Heuristics.bash.pkgManagers) {
|
|
115
|
+
if (!line.startsWith(`${mgr} `))
|
|
116
|
+
continue;
|
|
117
|
+
const rest = line.slice(mgr.length + 1);
|
|
118
|
+
if (Heuristics.bash.verbs.some((v) => rest === v || rest.startsWith(`${v} `))) {
|
|
119
|
+
return true;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return false;
|
|
123
|
+
},
|
|
124
|
+
detectIndicators(lines) {
|
|
125
|
+
for (const line of lines) {
|
|
126
|
+
const trimmed = line.trimStart();
|
|
127
|
+
if (trimmed &&
|
|
128
|
+
(Heuristics.bash.isShellPrefix(trimmed) ||
|
|
129
|
+
Heuristics.bash.matchesCommand(trimmed) ||
|
|
130
|
+
Heuristics.bash.matchesPackageManagerVerb(trimmed))) {
|
|
131
|
+
return true;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return false;
|
|
135
|
+
},
|
|
136
|
+
},
|
|
137
|
+
css: {
|
|
138
|
+
detectStructure(lines) {
|
|
139
|
+
for (const line of lines) {
|
|
140
|
+
const trimmed = line.trimStart();
|
|
141
|
+
if (!trimmed)
|
|
142
|
+
continue;
|
|
143
|
+
const hasSelector = (trimmed.startsWith('.') || trimmed.startsWith('#')) &&
|
|
144
|
+
trimmed.includes('{');
|
|
145
|
+
if (hasSelector || (trimmed.includes(':') && trimmed.includes(';'))) {
|
|
146
|
+
return true;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return false;
|
|
150
|
+
},
|
|
151
|
+
},
|
|
152
|
+
yaml: {
|
|
153
|
+
detectStructure(lines) {
|
|
154
|
+
for (const line of lines) {
|
|
155
|
+
const trimmed = line.trim();
|
|
156
|
+
if (!trimmed)
|
|
157
|
+
continue;
|
|
158
|
+
const colonIdx = trimmed.indexOf(':');
|
|
159
|
+
if (colonIdx > 0) {
|
|
160
|
+
const after = trimmed[colonIdx + 1];
|
|
161
|
+
if (after === ' ' || after === '\t')
|
|
162
|
+
return true;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return false;
|
|
166
|
+
},
|
|
167
|
+
},
|
|
168
|
+
};
|
|
169
|
+
/* -------------------------------------------------------------------------------------------------
|
|
170
|
+
* Pattern engine
|
|
171
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
143
172
|
const LANGUAGE_PATTERNS = [
|
|
144
173
|
{
|
|
145
174
|
language: 'jsx',
|
|
146
175
|
pattern: {
|
|
147
176
|
keywords: ['classname=', 'jsx:', "from 'react'", 'from "react"'],
|
|
148
|
-
custom: (code) => containsJsxTag(code),
|
|
177
|
+
custom: (code) => Heuristics.containsJsxTag(code),
|
|
149
178
|
},
|
|
150
179
|
},
|
|
151
180
|
{
|
|
152
181
|
language: 'typescript',
|
|
153
182
|
pattern: {
|
|
154
183
|
wordBoundary: ['interface', 'type'],
|
|
155
|
-
custom: (
|
|
184
|
+
custom: (_code, lower) => [
|
|
156
185
|
': string',
|
|
157
186
|
':string',
|
|
158
187
|
': number',
|
|
@@ -175,7 +204,7 @@ const LANGUAGE_PATTERNS = [
|
|
|
175
204
|
pattern: {
|
|
176
205
|
regex: /\b(?:fn|impl|struct|enum)\b/,
|
|
177
206
|
keywords: ['let mut'],
|
|
178
|
-
custom: (
|
|
207
|
+
custom: (_code, lower) => lower.includes('use ') && lower.includes('::'),
|
|
179
208
|
},
|
|
180
209
|
},
|
|
181
210
|
{
|
|
@@ -194,14 +223,14 @@ const LANGUAGE_PATTERNS = [
|
|
|
194
223
|
{
|
|
195
224
|
language: 'bash',
|
|
196
225
|
pattern: {
|
|
197
|
-
custom: (_code, _lower, lines) =>
|
|
226
|
+
custom: (_code, _lower, lines) => Heuristics.bash.detectIndicators(lines),
|
|
198
227
|
},
|
|
199
228
|
},
|
|
200
229
|
{
|
|
201
230
|
language: 'css',
|
|
202
231
|
pattern: {
|
|
203
232
|
regex: /@media|@import|@keyframes/,
|
|
204
|
-
custom: (_code, _lower, lines) =>
|
|
233
|
+
custom: (_code, _lower, lines) => Heuristics.css.detectStructure(lines),
|
|
205
234
|
},
|
|
206
235
|
},
|
|
207
236
|
{
|
|
@@ -230,7 +259,7 @@ const LANGUAGE_PATTERNS = [
|
|
|
230
259
|
{
|
|
231
260
|
language: 'yaml',
|
|
232
261
|
pattern: {
|
|
233
|
-
custom: (_code, _lower, lines) =>
|
|
262
|
+
custom: (_code, _lower, lines) => Heuristics.yaml.detectStructure(lines),
|
|
234
263
|
},
|
|
235
264
|
},
|
|
236
265
|
{
|
|
@@ -255,38 +284,46 @@ const LANGUAGE_PATTERNS = [
|
|
|
255
284
|
},
|
|
256
285
|
},
|
|
257
286
|
];
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
287
|
+
class PatternEngine {
|
|
288
|
+
matches(sample, pattern) {
|
|
289
|
+
if (pattern.keywords?.some((kw) => sample.lower.includes(kw)))
|
|
290
|
+
return true;
|
|
291
|
+
if (pattern.wordBoundary?.some((w) => wordMatcher.containsWord(sample.lower, w)))
|
|
292
|
+
return true;
|
|
293
|
+
if (pattern.regex?.test(sample.lower))
|
|
294
|
+
return true;
|
|
295
|
+
if (pattern.startsWith?.some((prefix) => sample.trimmedStart.startsWith(prefix))) {
|
|
296
|
+
return true;
|
|
297
|
+
}
|
|
298
|
+
if (pattern.custom?.(sample.code, sample.lower, sample.lines))
|
|
268
299
|
return true;
|
|
300
|
+
return false;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
class LanguageDetector {
|
|
304
|
+
engine = new PatternEngine();
|
|
305
|
+
detect(code) {
|
|
306
|
+
const sample = createCodeSample(code);
|
|
307
|
+
for (const { language, pattern } of LANGUAGE_PATTERNS) {
|
|
308
|
+
if (this.engine.matches(sample, pattern))
|
|
309
|
+
return language;
|
|
310
|
+
}
|
|
311
|
+
return undefined;
|
|
269
312
|
}
|
|
270
|
-
if (pattern.custom?.(code, lower, lines))
|
|
271
|
-
return true;
|
|
272
|
-
return false;
|
|
273
313
|
}
|
|
314
|
+
const detector = new LanguageDetector();
|
|
315
|
+
/* -------------------------------------------------------------------------------------------------
|
|
316
|
+
* Public API
|
|
317
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
274
318
|
/**
|
|
275
319
|
* Detect programming language from code content using heuristics.
|
|
276
320
|
*/
|
|
277
321
|
export function detectLanguageFromCode(code) {
|
|
278
|
-
|
|
279
|
-
const lines = code.split('\n');
|
|
280
|
-
for (const { language, pattern } of LANGUAGE_PATTERNS) {
|
|
281
|
-
if (matchesLanguagePattern(code, lower, lines, pattern))
|
|
282
|
-
return language;
|
|
283
|
-
}
|
|
284
|
-
return undefined;
|
|
322
|
+
return detector.detect(code);
|
|
285
323
|
}
|
|
286
324
|
/**
|
|
287
325
|
* Resolve language from HTML attributes (class name and data-language).
|
|
288
326
|
*/
|
|
289
327
|
export function resolveLanguageFromAttributes(className, dataLang) {
|
|
290
|
-
|
|
291
|
-
return classMatch ?? resolveLanguageFromDataAttribute(dataLang);
|
|
328
|
+
return attributeResolver.resolve(className, dataLang);
|
|
292
329
|
}
|