@nocturnium/svelte-ide 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/dist/components/ai/AIMessageContent.svelte +24 -14
- package/dist/components/ai/AIPanel.svelte +22 -0
- package/dist/components/editor/CollaborativeEditor.svelte +68 -5
- package/dist/components/editor/CollaborativeEditor.svelte.d.ts +14 -0
- package/dist/components/editor/CustomEditor.svelte +52 -33
- package/dist/components/editor/CustomEditor.svelte.d.ts +2 -2
- package/dist/components/editor/Editor.svelte +17 -0
- package/dist/components/editor/Editor.svelte.d.ts +9 -0
- package/dist/components/editor/EditorPane.svelte +18 -1
- package/dist/components/editor/EditorPane.svelte.d.ts +5 -0
- package/dist/components/editor/EditorSelections.svelte +27 -11
- package/dist/components/editor/EditorSelections.svelte.d.ts +1 -0
- package/dist/components/editor/core/folding.d.ts +11 -0
- package/dist/components/editor/core/folding.js +41 -0
- package/dist/components/editor/core/index.d.ts +0 -5
- package/dist/components/editor/core/index.js +4 -5
- package/dist/components/editor/core/state.d.ts +5 -0
- package/dist/components/editor/core/state.js +131 -12
- package/dist/components/editor/editor-find.d.ts +1 -0
- package/dist/components/editor/editor-find.js +6 -5
- package/dist/components/editor/editor-input.d.ts +1 -0
- package/dist/components/editor/editor-input.js +4 -1
- package/dist/components/editor/editor-scroll.d.ts +1 -0
- package/dist/components/editor/editor-scroll.js +2 -1
- package/dist/components/editor/index.d.ts +19 -3
- package/dist/components/editor/index.js +18 -4
- package/dist/components/editor/tokenizer/base.d.ts +1 -25
- package/dist/components/editor/tokenizer/base.js +0 -172
- package/dist/components/editor/tokenizer/index.d.ts +4 -0
- package/dist/components/editor/tokenizer/index.js +1 -1
- package/dist/components/editor/tokenizer/languages/html.d.ts +3 -2
- package/dist/components/editor/tokenizer/languages/html.js +64 -6
- package/dist/components/editor/tokenizer/languages/javascript.d.ts +13 -5
- package/dist/components/editor/tokenizer/languages/javascript.js +69 -57
- package/dist/components/editor/tokenizer/languages/svelte.d.ts +1 -1
- package/dist/components/editor/tokenizer/languages/svelte.js +6 -1
- package/dist/components/editor/tokenizer/types.d.ts +0 -28
- package/dist/crdt/awareness.d.ts +8 -2
- package/dist/crdt/awareness.js +11 -4
- package/dist/crdt/document.d.ts +10 -1
- package/dist/crdt/document.js +15 -7
- package/dist/crdt/index.d.ts +8 -2
- package/dist/crdt/index.js +5 -2
- package/dist/crdt/undo.d.ts +2 -7
- package/dist/crdt/undo.js +1 -8
- package/dist/index.d.ts +7 -9
- package/dist/index.js +7 -9
- package/dist/services/error-handling.d.ts +2 -11
- package/dist/services/error-handling.js +15 -4
- package/dist/services/lsp-client.d.ts +3 -0
- package/dist/services/lsp-client.js +55 -10
- package/dist/services/optimistic.d.ts +8 -5
- package/dist/services/optimistic.js +36 -10
- package/dist/services/vfs-client.js +11 -3
- package/dist/stores/agents.svelte.js +3 -2
- package/dist/stores/ai-persistence.svelte.js +7 -2
- package/dist/stores/ai.svelte.js +2 -1
- package/dist/stores/collaboration.svelte.d.ts +1 -1
- package/dist/stores/collaboration.svelte.js +3 -2
- package/dist/stores/editor.svelte.js +29 -5
- package/dist/stores/layout.svelte.js +3 -0
- package/dist/stores/plugin.svelte.js +9 -3
- package/dist/stores/vfs.svelte.js +26 -9
- package/dist/styles/theme.css +43 -0
- package/dist/types/vfs.d.ts +15 -1
- package/dist/types/vfs.js +9 -0
- package/dist/utils/language.d.ts +4 -3
- package/dist/utils/language.js +8 -18
- package/package.json +1 -1
- package/dist/components/editor/MinimalEditor.svelte +0 -75
- package/dist/components/editor/MinimalEditor.svelte.d.ts +0 -6
- package/dist/components/editor/MinimalEditor2.svelte +0 -84
- package/dist/components/editor/MinimalEditor2.svelte.d.ts +0 -6
|
@@ -4,13 +4,27 @@
|
|
|
4
4
|
// Components
|
|
5
5
|
export { default as Editor } from './Editor.svelte';
|
|
6
6
|
export { default as CustomEditor } from './CustomEditor.svelte';
|
|
7
|
-
export { default as CollaborativeEditor } from './CollaborativeEditor.svelte';
|
|
8
7
|
export { default as EditorTabs } from './EditorTabs.svelte';
|
|
9
8
|
export { default as EditorPane } from './EditorPane.svelte';
|
|
10
9
|
export { default as FileIcon } from './FileIcon.svelte';
|
|
11
10
|
export { default as FileExplorer } from './FileExplorer.svelte';
|
|
12
|
-
// Core utilities
|
|
13
|
-
export * from './core';
|
|
11
|
+
// Core utilities (explicitly excluding CRDT binding; use @nocturnium/svelte-ide/crdt)
|
|
12
|
+
export * from './core/state';
|
|
13
|
+
export * from './core/navigation';
|
|
14
|
+
export * from './core/keybindings';
|
|
15
|
+
export * from './core/search';
|
|
16
|
+
export * from './core/folding';
|
|
17
|
+
export * from './core/multi-cursor';
|
|
18
|
+
export * from './core/complexity-analyzer';
|
|
19
|
+
export * from './core/ai-awareness';
|
|
20
|
+
export * from './core/semantic-analyzer';
|
|
21
|
+
export * from './core/commands';
|
|
22
|
+
export * from './core/bracket-healer';
|
|
23
|
+
export * from './core/git-blame';
|
|
24
|
+
export * from './core/snippet-manager';
|
|
25
|
+
export * from './core/quick-actions';
|
|
26
|
+
export * from './core/diagnostics';
|
|
27
|
+
export * from './core/breakpoints';
|
|
14
28
|
// Theme
|
|
15
29
|
export * from './theme';
|
|
16
30
|
// Languages (explicit exports to avoid conflicts with tokenizer)
|
|
@@ -18,4 +32,4 @@ export { getLanguageExtension, getLanguageConfig, getLanguageFromExtension, getL
|
|
|
18
32
|
// Re-export from languages (these exist in both but we prefer languages versions)
|
|
19
33
|
export { getSupportedLanguages, isLanguageSupported } from './languages';
|
|
20
34
|
// Tokenizer (explicit exports to avoid conflicts)
|
|
21
|
-
export { getTokenizer, tokenize, getTokenClass, tokensToHTML, PlaintextTokenizer,
|
|
35
|
+
export { getTokenizer, tokenize, getTokenClass, tokensToHTML, PlaintextTokenizer, createToken } from './tokenizer';
|
|
@@ -1,35 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Base tokenizer with common functionality
|
|
3
3
|
*/
|
|
4
|
-
import type { Token, TokenizedLine, TokenizerState, TokenType,
|
|
4
|
+
import type { Token, TokenizedLine, TokenizerState, TokenType, LanguageTokenizer } from './types';
|
|
5
5
|
/**
|
|
6
6
|
* Create a token
|
|
7
7
|
*/
|
|
8
8
|
export declare function createToken(type: TokenType, text: string, start: number): Token;
|
|
9
|
-
/**
|
|
10
|
-
* Base tokenizer class using grammar rules
|
|
11
|
-
*/
|
|
12
|
-
export declare class GrammarTokenizer implements LanguageTokenizer {
|
|
13
|
-
language: string;
|
|
14
|
-
private grammar;
|
|
15
|
-
constructor(grammar: LanguageGrammar);
|
|
16
|
-
getInitialState(): TokenizerState;
|
|
17
|
-
tokenizeLine(line: string, lineNumber: number, prevState?: TokenizerState): TokenizedLine;
|
|
18
|
-
private updateState;
|
|
19
|
-
}
|
|
20
|
-
/**
|
|
21
|
-
* Simple regex-based tokenizer for basic languages
|
|
22
|
-
*/
|
|
23
|
-
export declare class SimpleTokenizer implements LanguageTokenizer {
|
|
24
|
-
language: string;
|
|
25
|
-
private patterns;
|
|
26
|
-
constructor(language: string, patterns: Array<{
|
|
27
|
-
type: TokenType;
|
|
28
|
-
regex: RegExp;
|
|
29
|
-
}>);
|
|
30
|
-
getInitialState(): TokenizerState;
|
|
31
|
-
tokenizeLine(line: string, lineNumber: number, prevState?: TokenizerState): TokenizedLine;
|
|
32
|
-
}
|
|
33
9
|
/**
|
|
34
10
|
* Plaintext tokenizer - no highlighting
|
|
35
11
|
*/
|
|
@@ -12,178 +12,6 @@ export function createToken(type, text, start) {
|
|
|
12
12
|
end: start + text.length
|
|
13
13
|
};
|
|
14
14
|
}
|
|
15
|
-
/**
|
|
16
|
-
* Base tokenizer class using grammar rules
|
|
17
|
-
*/
|
|
18
|
-
export class GrammarTokenizer {
|
|
19
|
-
language;
|
|
20
|
-
grammar;
|
|
21
|
-
constructor(grammar) {
|
|
22
|
-
this.language = grammar.language;
|
|
23
|
-
this.grammar = grammar;
|
|
24
|
-
}
|
|
25
|
-
getInitialState() {
|
|
26
|
-
return {};
|
|
27
|
-
}
|
|
28
|
-
tokenizeLine(line, lineNumber, prevState) {
|
|
29
|
-
const tokens = [];
|
|
30
|
-
let pos = 0;
|
|
31
|
-
const state = { ...prevState };
|
|
32
|
-
let currentRuleSet = 'root';
|
|
33
|
-
// Determine starting rule set based on state
|
|
34
|
-
if (state.inBlockComment) {
|
|
35
|
-
currentRuleSet = 'blockComment';
|
|
36
|
-
}
|
|
37
|
-
else if (state.inTemplateLiteral) {
|
|
38
|
-
currentRuleSet = 'templateLiteral';
|
|
39
|
-
}
|
|
40
|
-
else if (state.inMultilineString) {
|
|
41
|
-
currentRuleSet = 'multilineString';
|
|
42
|
-
}
|
|
43
|
-
while (pos < line.length) {
|
|
44
|
-
const remaining = line.slice(pos);
|
|
45
|
-
let matched = false;
|
|
46
|
-
// Try to match rules in current rule set
|
|
47
|
-
const rules = this.grammar.rules[currentRuleSet] || this.grammar.rules['root'] || [];
|
|
48
|
-
for (const rule of rules) {
|
|
49
|
-
const match = remaining.match(rule.pattern);
|
|
50
|
-
if (match && match.index === 0) {
|
|
51
|
-
const text = match[0];
|
|
52
|
-
tokens.push(createToken(rule.type, text, pos));
|
|
53
|
-
pos += text.length;
|
|
54
|
-
matched = true;
|
|
55
|
-
// Handle state transitions
|
|
56
|
-
if (rule.nextState) {
|
|
57
|
-
currentRuleSet = rule.nextState;
|
|
58
|
-
this.updateState(state, rule.nextState, true);
|
|
59
|
-
}
|
|
60
|
-
if (rule.popState) {
|
|
61
|
-
currentRuleSet = 'root';
|
|
62
|
-
this.updateState(state, currentRuleSet, false);
|
|
63
|
-
}
|
|
64
|
-
break;
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
// No match - consume single character as text
|
|
68
|
-
if (!matched) {
|
|
69
|
-
const char = line[pos];
|
|
70
|
-
// Try to merge with previous text token
|
|
71
|
-
const lastToken = tokens[tokens.length - 1];
|
|
72
|
-
if (lastToken && lastToken.type === 'text' && lastToken.end === pos) {
|
|
73
|
-
lastToken.text += char;
|
|
74
|
-
lastToken.end += 1;
|
|
75
|
-
}
|
|
76
|
-
else {
|
|
77
|
-
tokens.push(createToken('text', char, pos));
|
|
78
|
-
}
|
|
79
|
-
pos += 1;
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
// Handle empty lines
|
|
83
|
-
if (tokens.length === 0) {
|
|
84
|
-
tokens.push(createToken('text', '', 0));
|
|
85
|
-
}
|
|
86
|
-
return {
|
|
87
|
-
lineNumber,
|
|
88
|
-
tokens,
|
|
89
|
-
text: line,
|
|
90
|
-
state: { ...state }
|
|
91
|
-
};
|
|
92
|
-
}
|
|
93
|
-
updateState(state, ruleSet, entering) {
|
|
94
|
-
switch (ruleSet) {
|
|
95
|
-
case 'blockComment':
|
|
96
|
-
state.inBlockComment = entering;
|
|
97
|
-
break;
|
|
98
|
-
case 'templateLiteral':
|
|
99
|
-
state.inTemplateLiteral = entering;
|
|
100
|
-
break;
|
|
101
|
-
case 'multilineString':
|
|
102
|
-
state.inMultilineString = entering;
|
|
103
|
-
break;
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
/**
|
|
108
|
-
* Simple regex-based tokenizer for basic languages
|
|
109
|
-
*/
|
|
110
|
-
export class SimpleTokenizer {
|
|
111
|
-
language;
|
|
112
|
-
patterns;
|
|
113
|
-
constructor(language, patterns) {
|
|
114
|
-
this.language = language;
|
|
115
|
-
this.patterns = patterns;
|
|
116
|
-
}
|
|
117
|
-
getInitialState() {
|
|
118
|
-
return {};
|
|
119
|
-
}
|
|
120
|
-
tokenizeLine(line, lineNumber, prevState) {
|
|
121
|
-
const tokens = [];
|
|
122
|
-
let pos = 0;
|
|
123
|
-
const state = { ...prevState };
|
|
124
|
-
// Handle block comment continuation
|
|
125
|
-
if (state.inBlockComment) {
|
|
126
|
-
const endMatch = line.indexOf('*/');
|
|
127
|
-
if (endMatch !== -1) {
|
|
128
|
-
tokens.push(createToken('comment.block', line.slice(0, endMatch + 2), 0));
|
|
129
|
-
pos = endMatch + 2;
|
|
130
|
-
state.inBlockComment = false;
|
|
131
|
-
}
|
|
132
|
-
else {
|
|
133
|
-
tokens.push(createToken('comment.block', line, 0));
|
|
134
|
-
return { lineNumber, tokens, text: line, state };
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
while (pos < line.length) {
|
|
138
|
-
const remaining = line.slice(pos);
|
|
139
|
-
let matched = false;
|
|
140
|
-
// Check for block comment start
|
|
141
|
-
if (remaining.startsWith('/*')) {
|
|
142
|
-
const endMatch = remaining.indexOf('*/', 2);
|
|
143
|
-
if (endMatch !== -1) {
|
|
144
|
-
const text = remaining.slice(0, endMatch + 2);
|
|
145
|
-
tokens.push(createToken('comment.block', text, pos));
|
|
146
|
-
pos += text.length;
|
|
147
|
-
}
|
|
148
|
-
else {
|
|
149
|
-
tokens.push(createToken('comment.block', remaining, pos));
|
|
150
|
-
state.inBlockComment = true;
|
|
151
|
-
pos = line.length;
|
|
152
|
-
}
|
|
153
|
-
matched = true;
|
|
154
|
-
continue;
|
|
155
|
-
}
|
|
156
|
-
// Try each pattern
|
|
157
|
-
for (const { type, regex } of this.patterns) {
|
|
158
|
-
const match = remaining.match(regex);
|
|
159
|
-
if (match && match.index === 0) {
|
|
160
|
-
const text = match[0];
|
|
161
|
-
tokens.push(createToken(type, text, pos));
|
|
162
|
-
pos += text.length;
|
|
163
|
-
matched = true;
|
|
164
|
-
break;
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
// No match - consume character
|
|
168
|
-
if (!matched) {
|
|
169
|
-
const char = remaining[0];
|
|
170
|
-
const lastToken = tokens[tokens.length - 1];
|
|
171
|
-
if (lastToken && lastToken.type === 'text' && lastToken.end === pos) {
|
|
172
|
-
lastToken.text += char;
|
|
173
|
-
lastToken.end += 1;
|
|
174
|
-
}
|
|
175
|
-
else {
|
|
176
|
-
tokens.push(createToken('text', char, pos));
|
|
177
|
-
}
|
|
178
|
-
pos += 1;
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
if (tokens.length === 0) {
|
|
182
|
-
tokens.push(createToken('text', '', 0));
|
|
183
|
-
}
|
|
184
|
-
return { lineNumber, tokens, text: line, state };
|
|
185
|
-
}
|
|
186
|
-
}
|
|
187
15
|
/**
|
|
188
16
|
* Plaintext tokenizer - no highlighting
|
|
189
17
|
*/
|
|
@@ -15,6 +15,10 @@ export { GoTokenizer, createGoTokenizer } from './languages/go';
|
|
|
15
15
|
export { MarkdownTokenizer, createMarkdownTokenizer } from './languages/markdown';
|
|
16
16
|
export { SvelteTokenizer, createSvelteTokenizer } from './languages/svelte';
|
|
17
17
|
import type { LanguageTokenizer, Token, TokenizedLine, TokenizerState, TokenType } from './types';
|
|
18
|
+
/**
|
|
19
|
+
* Tokenizer factory functions by language
|
|
20
|
+
*/
|
|
21
|
+
export declare const tokenizerFactories: Record<string, () => LanguageTokenizer>;
|
|
18
22
|
/**
|
|
19
23
|
* Get the canonical language name from an alias or extension
|
|
20
24
|
*/
|
|
@@ -78,7 +78,7 @@ const languageAliases = {
|
|
|
78
78
|
/**
|
|
79
79
|
* Tokenizer factory functions by language
|
|
80
80
|
*/
|
|
81
|
-
const tokenizerFactories = {
|
|
81
|
+
export const tokenizerFactories = {
|
|
82
82
|
javascript: createJavaScriptTokenizer,
|
|
83
83
|
typescript: createTypeScriptTokenizer,
|
|
84
84
|
jsx: createJSXTokenizer,
|
|
@@ -3,14 +3,15 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import type { TokenizedLine, TokenizerState } from '../types';
|
|
5
5
|
interface HTMLTokenizerState extends TokenizerState {
|
|
6
|
-
inTag?: boolean;
|
|
7
6
|
inScript?: boolean;
|
|
8
7
|
inStyle?: boolean;
|
|
9
|
-
|
|
8
|
+
innerState?: TokenizerState;
|
|
10
9
|
}
|
|
11
10
|
export declare class HTMLTokenizer {
|
|
12
11
|
language: string;
|
|
13
12
|
private isXML;
|
|
13
|
+
private jsTokenizer;
|
|
14
|
+
private cssTokenizer;
|
|
14
15
|
constructor(options?: {
|
|
15
16
|
xml?: boolean;
|
|
16
17
|
});
|
|
@@ -2,9 +2,13 @@
|
|
|
2
2
|
* HTML/XML tokenizer
|
|
3
3
|
*/
|
|
4
4
|
import { createToken } from '../base';
|
|
5
|
+
import { createCSSTokenizer } from './css';
|
|
6
|
+
import { createJavaScriptTokenizer } from './javascript';
|
|
5
7
|
export class HTMLTokenizer {
|
|
6
8
|
language;
|
|
7
9
|
isXML;
|
|
10
|
+
jsTokenizer = createJavaScriptTokenizer();
|
|
11
|
+
cssTokenizer = createCSSTokenizer();
|
|
8
12
|
constructor(options = {}) {
|
|
9
13
|
this.isXML = options.xml ?? false;
|
|
10
14
|
this.language = this.isXML ? 'xml' : 'html';
|
|
@@ -29,6 +33,54 @@ export class HTMLTokenizer {
|
|
|
29
33
|
return { lineNumber, tokens, text: line, state };
|
|
30
34
|
}
|
|
31
35
|
}
|
|
36
|
+
if (!this.isXML && state.inScript) {
|
|
37
|
+
const closeScriptMatch = line.match(/<\/script>/i);
|
|
38
|
+
if (closeScriptMatch) {
|
|
39
|
+
const scriptPart = line.slice(0, closeScriptMatch.index);
|
|
40
|
+
if (scriptPart) {
|
|
41
|
+
const result = this.jsTokenizer.tokenizeLine(scriptPart, lineNumber, state.innerState ?? this.jsTokenizer.getInitialState());
|
|
42
|
+
tokens.push(...result.tokens);
|
|
43
|
+
state.innerState = result.state;
|
|
44
|
+
}
|
|
45
|
+
state.inScript = false;
|
|
46
|
+
state.innerState = undefined;
|
|
47
|
+
pos = closeScriptMatch.index;
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
const result = this.jsTokenizer.tokenizeLine(line, lineNumber, state.innerState ?? this.jsTokenizer.getInitialState());
|
|
51
|
+
state.innerState = result.state;
|
|
52
|
+
return {
|
|
53
|
+
lineNumber,
|
|
54
|
+
tokens: result.tokens,
|
|
55
|
+
text: line,
|
|
56
|
+
state
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
if (!this.isXML && state.inStyle) {
|
|
61
|
+
const closeStyleMatch = line.match(/<\/style>/i);
|
|
62
|
+
if (closeStyleMatch) {
|
|
63
|
+
const stylePart = line.slice(0, closeStyleMatch.index);
|
|
64
|
+
if (stylePart) {
|
|
65
|
+
const result = this.cssTokenizer.tokenizeLine(stylePart, lineNumber, state.innerState ?? this.cssTokenizer.getInitialState());
|
|
66
|
+
tokens.push(...result.tokens);
|
|
67
|
+
state.innerState = result.state;
|
|
68
|
+
}
|
|
69
|
+
state.inStyle = false;
|
|
70
|
+
state.innerState = undefined;
|
|
71
|
+
pos = closeStyleMatch.index;
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
const result = this.cssTokenizer.tokenizeLine(line, lineNumber, state.innerState ?? this.cssTokenizer.getInitialState());
|
|
75
|
+
state.innerState = result.state;
|
|
76
|
+
return {
|
|
77
|
+
lineNumber,
|
|
78
|
+
tokens: result.tokens,
|
|
79
|
+
text: line,
|
|
80
|
+
state
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
}
|
|
32
84
|
while (pos < line.length) {
|
|
33
85
|
const remaining = line.slice(pos);
|
|
34
86
|
const token = this.getNextToken(remaining, pos, state, line);
|
|
@@ -89,10 +141,14 @@ export class HTMLTokenizer {
|
|
|
89
141
|
const closingTagMatch = text.match(/^<\/([a-zA-Z][a-zA-Z0-9:-]*)>/);
|
|
90
142
|
if (closingTagMatch) {
|
|
91
143
|
const tagName = closingTagMatch[1].toLowerCase();
|
|
92
|
-
if (tagName === 'script')
|
|
144
|
+
if (tagName === 'script') {
|
|
93
145
|
state.inScript = false;
|
|
94
|
-
|
|
146
|
+
state.innerState = undefined;
|
|
147
|
+
}
|
|
148
|
+
if (tagName === 'style') {
|
|
95
149
|
state.inStyle = false;
|
|
150
|
+
state.innerState = undefined;
|
|
151
|
+
}
|
|
96
152
|
return createToken('tag', closingTagMatch[0], pos);
|
|
97
153
|
}
|
|
98
154
|
// Opening tag
|
|
@@ -121,17 +177,19 @@ export class HTMLTokenizer {
|
|
|
121
177
|
}
|
|
122
178
|
if (openingMatch) {
|
|
123
179
|
const tagName = openingMatch[1].toLowerCase();
|
|
124
|
-
if (tagName === 'script')
|
|
180
|
+
if (!this.isXML && tagName === 'script') {
|
|
125
181
|
state.inScript = true;
|
|
126
|
-
|
|
182
|
+
state.innerState = this.jsTokenizer.getInitialState();
|
|
183
|
+
}
|
|
184
|
+
if (!this.isXML && tagName === 'style') {
|
|
127
185
|
state.inStyle = true;
|
|
186
|
+
state.innerState = this.cssTokenizer.getInitialState();
|
|
187
|
+
}
|
|
128
188
|
return createToken('tag', openingMatch[0], pos);
|
|
129
189
|
}
|
|
130
190
|
// Partial tag - just match the tag name part
|
|
131
191
|
const partialMatch = text.match(/^<([a-zA-Z][a-zA-Z0-9:-]*)/);
|
|
132
192
|
if (partialMatch) {
|
|
133
|
-
state.inTag = true;
|
|
134
|
-
state.tagName = partialMatch[1];
|
|
135
193
|
return createToken('tag.name', partialMatch[0], pos);
|
|
136
194
|
}
|
|
137
195
|
return createToken('tag.punctuation', '<', pos);
|
|
@@ -4,9 +4,6 @@
|
|
|
4
4
|
import type { TokenizedLine, TokenizerState } from '../types';
|
|
5
5
|
interface JSTokenizerState extends TokenizerState {
|
|
6
6
|
templateDepth?: number;
|
|
7
|
-
jsxDepth?: number;
|
|
8
|
-
/** Track if regex is valid in current context (for division vs regex ambiguity) */
|
|
9
|
-
expectExpression?: boolean;
|
|
10
7
|
/**
|
|
11
8
|
* Last significant token text, used for regex/division disambiguation.
|
|
12
9
|
* Stored per-line in the threaded state (not on the tokenizer instance) so a
|
|
@@ -45,8 +42,19 @@ export declare class JavaScriptTokenizer {
|
|
|
45
42
|
*/
|
|
46
43
|
private tryMatchRegex;
|
|
47
44
|
private tokenizeString;
|
|
48
|
-
|
|
49
|
-
|
|
45
|
+
/**
|
|
46
|
+
* Scan the string portion of a template literal, starting at `startPos`. Emits
|
|
47
|
+
* one `string.template` token for the run of literal characters and stops at one
|
|
48
|
+
* of three boundaries:
|
|
49
|
+
* - a closing backtick → ends the literal (clears template state);
|
|
50
|
+
* - a `${` → emits the `${` delimiter and enters interpolation
|
|
51
|
+
* (templateDepth = 1) so the expression is tokenized as code by the caller;
|
|
52
|
+
* - end of line → the literal spans lines and continues next line.
|
|
53
|
+
*
|
|
54
|
+
* `isStart` is true when this is the opening backtick (vs. a continuation of a
|
|
55
|
+
* multi-line literal or the resumption after a `${…}` interpolation).
|
|
56
|
+
*/
|
|
57
|
+
private scanTemplateString;
|
|
50
58
|
private tokenizeJSXTag;
|
|
51
59
|
}
|
|
52
60
|
export declare function createJavaScriptTokenizer(): JavaScriptTokenizer;
|
|
@@ -234,8 +234,7 @@ export class JavaScriptTokenizer {
|
|
|
234
234
|
}
|
|
235
235
|
getInitialState() {
|
|
236
236
|
return {
|
|
237
|
-
templateDepth: 0
|
|
238
|
-
jsxDepth: 0
|
|
237
|
+
templateDepth: 0
|
|
239
238
|
};
|
|
240
239
|
}
|
|
241
240
|
tokenizeLine(line, lineNumber, prevState) {
|
|
@@ -263,22 +262,39 @@ export class JavaScriptTokenizer {
|
|
|
263
262
|
return { lineNumber, tokens, text: line, state };
|
|
264
263
|
}
|
|
265
264
|
}
|
|
266
|
-
// Handle template literal continuation
|
|
267
|
-
if (state.inTemplateLiteral && (state.templateDepth ?? 0) > 0) {
|
|
268
|
-
const result = this.tokenizeTemplateLiteralContinuation(line, pos, state);
|
|
269
|
-
tokens.push(...result.tokens);
|
|
270
|
-
pos = result.pos;
|
|
271
|
-
if (pos >= line.length) {
|
|
272
|
-
return { lineNumber, tokens, text: line, state };
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
265
|
while (pos < line.length) {
|
|
266
|
+
// Template literals: scan the string portion (between the backticks and
|
|
267
|
+
// `${` / `}`) as a single string.template token. The interpolation
|
|
268
|
+
// expression is tokenized as ordinary code below, tracking brace depth in
|
|
269
|
+
// state.templateDepth so the closing backtick is recognised. This fixes the
|
|
270
|
+
// leak where an unclosed template state bled onto every following line.
|
|
271
|
+
const inStringPortion = state.inTemplateLiteral && (state.templateDepth ?? 0) === 0;
|
|
272
|
+
const startsTemplate = !state.inTemplateLiteral && line[pos] === '`';
|
|
273
|
+
if (inStringPortion || startsTemplate) {
|
|
274
|
+
const result = this.scanTemplateString(line, pos, state, startsTemplate);
|
|
275
|
+
tokens.push(...result.tokens);
|
|
276
|
+
pos = result.pos;
|
|
277
|
+
continue;
|
|
278
|
+
}
|
|
276
279
|
const remaining = line.slice(pos);
|
|
277
280
|
const token = this.getNextToken(remaining, pos, state);
|
|
278
281
|
if (token) {
|
|
279
282
|
tokens.push(token);
|
|
280
283
|
this.updateLastToken(token, state);
|
|
281
284
|
pos = token.end;
|
|
285
|
+
// Track brace nesting inside a ${...} interpolation so we know when it
|
|
286
|
+
// closes and we return to the template's string portion. Braces inside
|
|
287
|
+
// strings/comments are separate token types, so they aren't miscounted.
|
|
288
|
+
if (state.inTemplateLiteral &&
|
|
289
|
+
(state.templateDepth ?? 0) > 0 &&
|
|
290
|
+
token.type === 'punctuation.brace') {
|
|
291
|
+
if (token.text === '{') {
|
|
292
|
+
state.templateDepth = (state.templateDepth ?? 0) + 1;
|
|
293
|
+
}
|
|
294
|
+
else if (token.text === '}') {
|
|
295
|
+
state.templateDepth = Math.max(0, (state.templateDepth ?? 1) - 1);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
282
298
|
}
|
|
283
299
|
else {
|
|
284
300
|
// No match - shouldn't happen but handle gracefully
|
|
@@ -314,10 +330,10 @@ export class JavaScriptTokenizer {
|
|
|
314
330
|
return createToken('comment.block', text, pos);
|
|
315
331
|
}
|
|
316
332
|
}
|
|
317
|
-
// Template literals
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
333
|
+
// Template literals are handled in tokenizeLine's main loop
|
|
334
|
+
// (scanTemplateString) so the string portions and the ${...} interpolation
|
|
335
|
+
// expression are tokenized separately. A backtick only reaches here while
|
|
336
|
+
// inside an interpolation (a nested template); fall through to consume it.
|
|
321
337
|
// Regular strings
|
|
322
338
|
if (text.startsWith('"') || text.startsWith("'")) {
|
|
323
339
|
return this.tokenizeString(text, pos, text[0]);
|
|
@@ -552,59 +568,55 @@ export class JavaScriptTokenizer {
|
|
|
552
568
|
// Unterminated string at end of line
|
|
553
569
|
return createToken('string', text, pos);
|
|
554
570
|
}
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
if (text[i] === '$' && text[i + 1] === '{') {
|
|
569
|
-
// Template expression - for simplicity, tokenize up to this point
|
|
570
|
-
if (result.length > 1) {
|
|
571
|
-
// Return string part first
|
|
572
|
-
state.inTemplateLiteral = true;
|
|
573
|
-
state.templateDepth = (state.templateDepth ?? 0) + 1;
|
|
574
|
-
return createToken('string.template', result, pos);
|
|
575
|
-
}
|
|
576
|
-
}
|
|
577
|
-
result += text[i];
|
|
578
|
-
i++;
|
|
579
|
-
}
|
|
580
|
-
// Multi-line template literal
|
|
581
|
-
state.inTemplateLiteral = true;
|
|
582
|
-
return createToken('string.template', result, pos);
|
|
583
|
-
}
|
|
584
|
-
tokenizeTemplateLiteralContinuation(line, startPos, state) {
|
|
585
|
-
const tokens = [];
|
|
571
|
+
/**
|
|
572
|
+
* Scan the string portion of a template literal, starting at `startPos`. Emits
|
|
573
|
+
* one `string.template` token for the run of literal characters and stops at one
|
|
574
|
+
* of three boundaries:
|
|
575
|
+
* - a closing backtick → ends the literal (clears template state);
|
|
576
|
+
* - a `${` → emits the `${` delimiter and enters interpolation
|
|
577
|
+
* (templateDepth = 1) so the expression is tokenized as code by the caller;
|
|
578
|
+
* - end of line → the literal spans lines and continues next line.
|
|
579
|
+
*
|
|
580
|
+
* `isStart` is true when this is the opening backtick (vs. a continuation of a
|
|
581
|
+
* multi-line literal or the resumption after a `${…}` interpolation).
|
|
582
|
+
*/
|
|
583
|
+
scanTemplateString(line, startPos, state, isStart) {
|
|
586
584
|
let pos = startPos;
|
|
587
585
|
let result = '';
|
|
586
|
+
if (isStart) {
|
|
587
|
+
state.inTemplateLiteral = true;
|
|
588
|
+
state.templateDepth = 0;
|
|
589
|
+
result = '`';
|
|
590
|
+
pos += 1;
|
|
591
|
+
}
|
|
588
592
|
while (pos < line.length) {
|
|
589
|
-
|
|
593
|
+
const ch = line[pos];
|
|
594
|
+
if (ch === '\\' && pos + 1 < line.length) {
|
|
590
595
|
result += line.slice(pos, pos + 2);
|
|
591
596
|
pos += 2;
|
|
592
597
|
continue;
|
|
593
598
|
}
|
|
594
|
-
if (
|
|
599
|
+
if (ch === '`') {
|
|
595
600
|
result += '`';
|
|
596
|
-
tokens.push(createToken('string.template', result, startPos));
|
|
597
601
|
state.inTemplateLiteral = false;
|
|
598
|
-
state.templateDepth =
|
|
599
|
-
return { tokens, pos: pos + 1 };
|
|
602
|
+
state.templateDepth = 0;
|
|
603
|
+
return { tokens: [createToken('string.template', result, startPos)], pos: pos + 1 };
|
|
600
604
|
}
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
605
|
+
if (ch === '$' && line[pos + 1] === '{') {
|
|
606
|
+
const tokens = [];
|
|
607
|
+
if (result) {
|
|
608
|
+
tokens.push(createToken('string.template', result, startPos));
|
|
609
|
+
}
|
|
610
|
+
tokens.push(createToken('string.template', '${', startPos + result.length));
|
|
611
|
+
state.templateDepth = 1;
|
|
612
|
+
return { tokens, pos: pos + 2 };
|
|
613
|
+
}
|
|
614
|
+
result += ch;
|
|
615
|
+
pos += 1;
|
|
606
616
|
}
|
|
607
|
-
|
|
617
|
+
// End of line inside the string portion → multi-line template literal.
|
|
618
|
+
state.inTemplateLiteral = true;
|
|
619
|
+
return { tokens: result ? [createToken('string.template', result, startPos)] : [], pos };
|
|
608
620
|
}
|
|
609
621
|
tokenizeJSXTag(text, pos, _state) {
|
|
610
622
|
// Simple JSX tag detection
|
|
@@ -29,7 +29,7 @@ export declare class SvelteTokenizer implements LanguageTokenizer {
|
|
|
29
29
|
private cssTokenizer;
|
|
30
30
|
constructor();
|
|
31
31
|
getInitialState(): SvelteTokenizerState;
|
|
32
|
-
tokenizeLine(line: string, lineNumber: number,
|
|
32
|
+
tokenizeLine(line: string, lineNumber: number, prevState?: SvelteTokenizerState): TokenizedLine;
|
|
33
33
|
private tokenizeTemplate;
|
|
34
34
|
private tokenizeTag;
|
|
35
35
|
private tokenizeJSExpression;
|
|
@@ -65,9 +65,14 @@ export class SvelteTokenizer {
|
|
|
65
65
|
tagDepth: 0
|
|
66
66
|
};
|
|
67
67
|
}
|
|
68
|
-
tokenizeLine(line, lineNumber,
|
|
68
|
+
tokenizeLine(line, lineNumber, prevState) {
|
|
69
69
|
const tokens = [];
|
|
70
70
|
let pos = 0;
|
|
71
|
+
const state = {
|
|
72
|
+
...this.getInitialState(),
|
|
73
|
+
...prevState,
|
|
74
|
+
innerState: prevState?.innerState ? { ...prevState.innerState } : undefined
|
|
75
|
+
};
|
|
71
76
|
// Handle script context
|
|
72
77
|
if (state.context === 'script') {
|
|
73
78
|
const closeScriptMatch = line.match(/<\/script>/i);
|
|
@@ -56,31 +56,3 @@ export interface LanguageTokenizer {
|
|
|
56
56
|
/** Get initial state */
|
|
57
57
|
getInitialState(): TokenizerState;
|
|
58
58
|
}
|
|
59
|
-
/**
|
|
60
|
-
* Token rule for regex-based tokenization
|
|
61
|
-
*/
|
|
62
|
-
export interface TokenRule {
|
|
63
|
-
/** Token type to assign */
|
|
64
|
-
type: TokenType;
|
|
65
|
-
/** Regex pattern to match */
|
|
66
|
-
pattern: RegExp;
|
|
67
|
-
/** Optional: next state after matching */
|
|
68
|
-
nextState?: string;
|
|
69
|
-
/** Optional: pop state after matching */
|
|
70
|
-
popState?: boolean;
|
|
71
|
-
}
|
|
72
|
-
/**
|
|
73
|
-
* Grammar definition for a language
|
|
74
|
-
*/
|
|
75
|
-
export interface LanguageGrammar {
|
|
76
|
-
/** Language identifier */
|
|
77
|
-
language: string;
|
|
78
|
-
/** File extensions */
|
|
79
|
-
extensions: string[];
|
|
80
|
-
/** Line comment prefix */
|
|
81
|
-
lineComment?: string;
|
|
82
|
-
/** Block comment start/end */
|
|
83
|
-
blockComment?: [string, string];
|
|
84
|
-
/** Token rules by state */
|
|
85
|
-
rules: Record<string, TokenRule[]>;
|
|
86
|
-
}
|