shell-lite 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shell_lite/__init__.py +1 -0
- shell_lite/ast_nodes.py +15 -110
- shell_lite/cli.py +10 -0
- shell_lite/compiler.py +2 -189
- shell_lite/formatter.py +75 -0
- shell_lite/interpreter.py +35 -538
- shell_lite/js_compiler.py +3 -79
- shell_lite/lexer.py +29 -107
- shell_lite/main.py +120 -75
- shell_lite/parser.py +17 -510
- shell_lite/runtime.py +1 -76
- shell_lite-0.3.5.dist-info/LICENSE +21 -0
- shell_lite-0.3.5.dist-info/METADATA +40 -0
- shell_lite-0.3.5.dist-info/RECORD +17 -0
- {shell_lite-0.3.3.dist-info → shell_lite-0.3.5.dist-info}/WHEEL +1 -1
- shell_lite-0.3.3.dist-info/METADATA +0 -77
- shell_lite-0.3.3.dist-info/RECORD +0 -14
- {shell_lite-0.3.3.dist-info → shell_lite-0.3.5.dist-info}/entry_points.txt +0 -0
- {shell_lite-0.3.3.dist-info → shell_lite-0.3.5.dist-info}/top_level.txt +0 -0
shell_lite/js_compiler.py
CHANGED
|
@@ -1,38 +1,28 @@
|
|
|
1
1
|
import random
|
|
2
2
|
from typing import List
|
|
3
3
|
from .ast_nodes import *
|
|
4
|
-
|
|
5
4
|
class JSCompiler:
|
|
6
5
|
def __init__(self):
|
|
7
6
|
self.indentation = 0
|
|
8
|
-
|
|
9
7
|
def indent(self):
|
|
10
8
|
return " " * self.indentation
|
|
11
|
-
|
|
12
9
|
def visit(self, node: Node) -> str:
|
|
13
10
|
method_name = f'visit_{type(node).__name__}'
|
|
14
11
|
visitor = getattr(self, method_name, self.generic_visit)
|
|
15
12
|
return visitor(node)
|
|
16
|
-
|
|
17
13
|
def generic_visit(self, node: Node):
|
|
18
14
|
raise Exception(f"JSCompiler does not support {type(node).__name__}")
|
|
19
|
-
|
|
20
15
|
def compile_block(self, statements: List[Node]) -> str:
|
|
21
16
|
if not statements:
|
|
22
17
|
return ""
|
|
23
|
-
|
|
24
18
|
code = ""
|
|
25
19
|
for stmt in statements:
|
|
26
20
|
stmt_code = self.visit(stmt)
|
|
27
21
|
if not stmt_code: continue
|
|
28
|
-
|
|
29
|
-
# Formatting
|
|
30
22
|
indented_stmt = "\n".join([f"{self.indent()}{line}" for line in stmt_code.split('\n')])
|
|
31
23
|
code += indented_stmt + "\n"
|
|
32
24
|
return code.rstrip()
|
|
33
|
-
|
|
34
25
|
def compile(self, statements: List[Node]) -> str:
|
|
35
|
-
# Preamble / Runtime
|
|
36
26
|
code = [
|
|
37
27
|
"// ShellLite Runtime (JS)",
|
|
38
28
|
"const fs = require('fs');",
|
|
@@ -55,91 +45,59 @@ class JSCompiler:
|
|
|
55
45
|
"// --- User Code ---",
|
|
56
46
|
""
|
|
57
47
|
]
|
|
58
|
-
|
|
59
48
|
code.append(self.compile_block(statements))
|
|
60
49
|
return "\n".join(code)
|
|
61
|
-
|
|
62
|
-
# --- Visitor Methods ---
|
|
63
|
-
|
|
64
50
|
def visit_Number(self, node: Number):
|
|
65
51
|
return str(node.value)
|
|
66
|
-
|
|
67
52
|
def visit_String(self, node: String):
|
|
68
|
-
return repr(node.value)
|
|
69
|
-
|
|
53
|
+
return repr(node.value)
|
|
70
54
|
def visit_Boolean(self, node: Boolean):
|
|
71
55
|
return "true" if node.value else "false"
|
|
72
|
-
|
|
73
56
|
def visit_Regex(self, node: Regex):
|
|
74
57
|
return f"/{node.pattern}/"
|
|
75
|
-
|
|
76
58
|
def visit_ListVal(self, node: ListVal):
|
|
77
59
|
elements = [self.visit(e) for e in node.elements]
|
|
78
60
|
return f"[{', '.join(elements)}]"
|
|
79
|
-
|
|
80
61
|
def visit_Dictionary(self, node: Dictionary):
|
|
81
|
-
# JS Objects for Dicts
|
|
82
62
|
pairs = [f"{self.visit(k)}: {self.visit(v)}" for k, v in node.pairs]
|
|
83
63
|
return f"{{{', '.join(pairs)}}}"
|
|
84
|
-
|
|
85
64
|
def visit_SetVal(self, node: SetVal):
|
|
86
65
|
elements = [self.visit(e) for e in node.elements]
|
|
87
66
|
return f"new Set([{', '.join(elements)}])"
|
|
88
|
-
|
|
89
67
|
def visit_VarAccess(self, node: VarAccess):
|
|
90
68
|
return node.name
|
|
91
|
-
|
|
92
69
|
def visit_Assign(self, node: Assign):
|
|
93
|
-
# We assume 'let' or 'const' isn't strictly needed if we reuse vars,
|
|
94
|
-
# but in strict mode we might need it.
|
|
95
|
-
# For simplicity, we'll try to use assignments directly.
|
|
96
|
-
# Ideally we track scope to emit 'let' on first use?
|
|
97
|
-
# ShellLite is dynamic. Let's assume Global/Function scope and just emit `name = value`.
|
|
98
|
-
# However, in strict JS, undeclared vars are errors.
|
|
99
|
-
# We might need a "let name;" preamble or just use `var`?
|
|
100
70
|
return f"var {node.name} = {self.visit(node.value)};"
|
|
101
|
-
|
|
102
71
|
def visit_ConstAssign(self, node: ConstAssign):
|
|
103
72
|
return f"const {node.name} = {self.visit(node.value)};"
|
|
104
|
-
|
|
105
73
|
def visit_PropertyAssign(self, node: PropertyAssign):
|
|
106
74
|
return f"{node.instance_name}.{node.property_name} = {self.visit(node.value)};"
|
|
107
|
-
|
|
108
75
|
def visit_BinOp(self, node: BinOp):
|
|
109
76
|
left = self.visit(node.left)
|
|
110
77
|
right = self.visit(node.right)
|
|
111
78
|
op = node.op
|
|
112
|
-
|
|
113
79
|
js_ops = {
|
|
114
|
-
'matches': None,
|
|
80
|
+
'matches': None,
|
|
115
81
|
'and': '&&',
|
|
116
82
|
'or': '||',
|
|
117
83
|
'==': '==='
|
|
118
84
|
}
|
|
119
|
-
|
|
120
85
|
if op == 'matches':
|
|
121
86
|
return f"new RegExp({right}).test({left})"
|
|
122
|
-
|
|
123
87
|
real_op = js_ops.get(op, op)
|
|
124
88
|
return f"({left} {real_op} {right})"
|
|
125
|
-
|
|
126
89
|
def visit_UnaryOp(self, node: UnaryOp):
|
|
127
90
|
return f"({node.op} {self.visit(node.right)})"
|
|
128
|
-
|
|
129
91
|
def visit_Print(self, node: Print):
|
|
130
92
|
return f"console.log({self.visit(node.expression)});"
|
|
131
|
-
|
|
132
93
|
def visit_Input(self, node: Input):
|
|
133
|
-
# JS doesn't have synchronous input easily in Node without libs.
|
|
134
94
|
return f"require('readline-sync').question({repr(node.prompt) if node.prompt else '\"\"'})"
|
|
135
|
-
|
|
136
95
|
def visit_If(self, node: If):
|
|
137
96
|
code = f"if ({self.visit(node.condition)}) {{\n"
|
|
138
97
|
self.indentation += 1
|
|
139
98
|
code += self.compile_block(node.body)
|
|
140
99
|
self.indentation -= 1
|
|
141
100
|
code += f"\n{self.indent()}}}"
|
|
142
|
-
|
|
143
101
|
if node.else_body:
|
|
144
102
|
code += f" else {{\n"
|
|
145
103
|
self.indentation += 1
|
|
@@ -147,7 +105,6 @@ class JSCompiler:
|
|
|
147
105
|
self.indentation -= 1
|
|
148
106
|
code += f"\n{self.indent()}}}"
|
|
149
107
|
return code
|
|
150
|
-
|
|
151
108
|
def visit_While(self, node: While):
|
|
152
109
|
code = f"while ({self.visit(node.condition)}) {{\n"
|
|
153
110
|
self.indentation += 1
|
|
@@ -155,10 +112,7 @@ class JSCompiler:
|
|
|
155
112
|
self.indentation -= 1
|
|
156
113
|
code += f"\n{self.indent()}}}"
|
|
157
114
|
return code
|
|
158
|
-
|
|
159
115
|
def visit_For(self, node: For):
|
|
160
|
-
# range loop
|
|
161
|
-
# for (let i = 0; i < count; i++)
|
|
162
116
|
count = self.visit(node.count)
|
|
163
117
|
var = f"_i_{random.randint(0,1000)}"
|
|
164
118
|
code = f"for (let {var} = 0; {var} < {count}; {var}++) {{\n"
|
|
@@ -167,7 +121,6 @@ class JSCompiler:
|
|
|
167
121
|
self.indentation -= 1
|
|
168
122
|
code += f"\n{self.indent()}}}"
|
|
169
123
|
return code
|
|
170
|
-
|
|
171
124
|
def visit_ForIn(self, node: ForIn):
|
|
172
125
|
code = f"for (let {node.var_name} of {self.visit(node.iterable)}) {{\n"
|
|
173
126
|
self.indentation += 1
|
|
@@ -175,35 +128,26 @@ class JSCompiler:
|
|
|
175
128
|
self.indentation -= 1
|
|
176
129
|
code += f"\n{self.indent()}}}"
|
|
177
130
|
return code
|
|
178
|
-
|
|
179
131
|
def visit_Repeat(self, node: Repeat):
|
|
180
132
|
return self.visit_For(For(node.count, node.body))
|
|
181
|
-
|
|
182
133
|
def visit_FunctionDef(self, node: FunctionDef):
|
|
183
|
-
args = [arg[0] for arg in node.args]
|
|
184
|
-
|
|
134
|
+
args = [arg[0] for arg in node.args]
|
|
185
135
|
code = f"function {node.name}({', '.join(args)}) {{\n"
|
|
186
136
|
self.indentation += 1
|
|
187
137
|
code += self.compile_block(node.body)
|
|
188
138
|
self.indentation -= 1
|
|
189
139
|
code += f"\n{self.indent()}}}"
|
|
190
140
|
return code
|
|
191
|
-
|
|
192
141
|
def visit_Return(self, node: Return):
|
|
193
142
|
return f"return {self.visit(node.value)};"
|
|
194
|
-
|
|
195
143
|
def visit_Call(self, node: Call):
|
|
196
144
|
args = [self.visit(a) for a in node.args]
|
|
197
145
|
return f"{node.name}({', '.join(args)})"
|
|
198
|
-
|
|
199
146
|
def visit_ClassDef(self, node: ClassDef):
|
|
200
147
|
parent = node.parent if node.parent else ""
|
|
201
148
|
extends = f" extends {parent}" if parent else ""
|
|
202
|
-
|
|
203
149
|
code = f"class {node.name}{extends} {{\n"
|
|
204
150
|
self.indentation += 1
|
|
205
|
-
|
|
206
|
-
# Constructor
|
|
207
151
|
if node.properties:
|
|
208
152
|
props = node.properties
|
|
209
153
|
code += f"{self.indent()}constructor({', '.join(props)}) {{\n"
|
|
@@ -213,8 +157,6 @@ class JSCompiler:
|
|
|
213
157
|
code += f"{self.indent()}self.{p} = {p};\n"
|
|
214
158
|
self.indentation -= 1
|
|
215
159
|
code += f"{self.indent()}}}\n"
|
|
216
|
-
|
|
217
|
-
# Methods
|
|
218
160
|
for m in node.methods:
|
|
219
161
|
args = [arg[0] for arg in m.args]
|
|
220
162
|
code += f"\n{self.indent()}{m.name}({', '.join(args)}) {{\n"
|
|
@@ -222,33 +164,25 @@ class JSCompiler:
|
|
|
222
164
|
code += self.compile_block(m.body)
|
|
223
165
|
self.indentation -= 1
|
|
224
166
|
code += f"\n{self.indent()}}}"
|
|
225
|
-
|
|
226
167
|
self.indentation -= 1
|
|
227
168
|
code += f"\n{self.indent()}}}"
|
|
228
169
|
return code
|
|
229
|
-
|
|
230
170
|
def visit_Instantiation(self, node: Instantiation):
|
|
231
171
|
args = [self.visit(a) for a in node.args]
|
|
232
172
|
return f"var {node.var_name} = new {node.class_name}({', '.join(args)});"
|
|
233
|
-
|
|
234
173
|
def visit_MethodCall(self, node: MethodCall):
|
|
235
174
|
args = [self.visit(a) for a in node.args]
|
|
236
175
|
return f"{node.instance_name}.{node.method_name}({', '.join(args)})"
|
|
237
|
-
|
|
238
176
|
def visit_PropertyAccess(self, node: PropertyAccess):
|
|
239
177
|
return f"{node.instance_name}.{node.property_name}"
|
|
240
|
-
|
|
241
178
|
def visit_Import(self, node: Import):
|
|
242
|
-
# require
|
|
243
179
|
base = node.path
|
|
244
180
|
if base == 'vscode': return 'const vscode = require("vscode");'
|
|
245
181
|
return f"const {base} = require('./{base}');"
|
|
246
|
-
|
|
247
182
|
def visit_ImportAs(self, node: ImportAs):
|
|
248
183
|
path = node.path
|
|
249
184
|
if path == 'vscode': return f"const {node.alias} = require('vscode');"
|
|
250
185
|
return f"const {node.alias} = require('./{path}');"
|
|
251
|
-
|
|
252
186
|
def visit_Try(self, node: Try):
|
|
253
187
|
code = f"try {{\n"
|
|
254
188
|
self.indentation += 1
|
|
@@ -262,21 +196,11 @@ class JSCompiler:
|
|
|
262
196
|
return code
|
|
263
197
|
def visit_Throw(self, node: Throw):
|
|
264
198
|
return f"throw new Error({self.visit(node.message)});"
|
|
265
|
-
|
|
266
199
|
def visit_Skip(self, node: Skip):
|
|
267
200
|
return "continue;"
|
|
268
|
-
|
|
269
201
|
def visit_Stop(self, node: Stop):
|
|
270
202
|
return "break;"
|
|
271
|
-
|
|
272
203
|
def visit_Lambda(self, node: Lambda):
|
|
273
204
|
return f"({', '.join(node.params)}) => {self.visit(node.body)}"
|
|
274
|
-
|
|
275
205
|
def visit_Execute(self, node: Execute):
|
|
276
|
-
# on execute command "id"
|
|
277
|
-
# This is specific for VS Code extension logic usually?
|
|
278
|
-
# Or just generic event listener mechanism?
|
|
279
|
-
# "on request to" was OnRequest.
|
|
280
|
-
# This needs a new generic "on event" node or handled via Call?
|
|
281
|
-
# Assuming user writes standard shl: `vscode.commands.registerCommand(...)`
|
|
282
206
|
pass
|
shell_lite/lexer.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from typing import List, Optional
|
|
4
|
-
|
|
4
|
+
@dataclass
|
|
5
5
|
@dataclass
|
|
6
6
|
class Token:
|
|
7
7
|
type: str
|
|
8
8
|
value: str
|
|
9
9
|
line: int
|
|
10
|
-
|
|
10
|
+
column: int = 1
|
|
11
11
|
class Lexer:
|
|
12
12
|
def __init__(self, source_code: str):
|
|
13
13
|
self.source_code = source_code
|
|
@@ -15,39 +15,35 @@ class Lexer:
|
|
|
15
15
|
self.current_char_index = 0
|
|
16
16
|
self.line_number = 1
|
|
17
17
|
self.indent_stack = [0]
|
|
18
|
-
|
|
19
18
|
def tokenize(self) -> List[Token]:
|
|
20
19
|
source = self._remove_multiline_comments(self.source_code)
|
|
21
20
|
lines = source.split('\n')
|
|
22
|
-
|
|
23
21
|
for line_num, line in enumerate(lines, 1):
|
|
24
22
|
self.line_number = line_num
|
|
25
23
|
stripped_line = line.strip()
|
|
26
|
-
|
|
27
|
-
if not stripped_line or stripped_line.startswith('#'):
|
|
24
|
+
if not stripped_line:
|
|
28
25
|
continue
|
|
29
|
-
|
|
30
26
|
indent_level = len(line) - len(line.lstrip())
|
|
27
|
+
if stripped_line.startswith('#'):
|
|
28
|
+
self.tokens.append(Token('COMMENT', stripped_line, self.line_number, indent_level + 1))
|
|
29
|
+
self.tokens.append(Token('NEWLINE', '', self.line_number, len(line) + 1))
|
|
30
|
+
continue
|
|
31
31
|
if indent_level > self.indent_stack[-1]:
|
|
32
32
|
self.indent_stack.append(indent_level)
|
|
33
|
-
self.tokens.append(Token('INDENT', '', self.line_number))
|
|
33
|
+
self.tokens.append(Token('INDENT', '', self.line_number, indent_level + 1))
|
|
34
34
|
elif indent_level < self.indent_stack[-1]:
|
|
35
35
|
while indent_level < self.indent_stack[-1]:
|
|
36
36
|
self.indent_stack.pop()
|
|
37
|
-
self.tokens.append(Token('DEDENT', '', self.line_number))
|
|
37
|
+
self.tokens.append(Token('DEDENT', '', self.line_number, indent_level + 1))
|
|
38
38
|
if indent_level != self.indent_stack[-1]:
|
|
39
39
|
raise IndentationError(f"Unindent does not match any outer indentation level on line {self.line_number}")
|
|
40
|
-
|
|
41
|
-
self.
|
|
42
|
-
self.tokens.append(Token('NEWLINE', '', self.line_number))
|
|
43
|
-
|
|
40
|
+
self.tokenize_line(stripped_line, indent_level + 1)
|
|
41
|
+
self.tokens.append(Token('NEWLINE', '', self.line_number, len(line) + 1))
|
|
44
42
|
while len(self.indent_stack) > 1:
|
|
45
43
|
self.indent_stack.pop()
|
|
46
|
-
self.tokens.append(Token('DEDENT', '', self.line_number))
|
|
47
|
-
|
|
48
|
-
self.tokens.append(Token('EOF', '', self.line_number))
|
|
44
|
+
self.tokens.append(Token('DEDENT', '', self.line_number, 1))
|
|
45
|
+
self.tokens.append(Token('EOF', '', self.line_number, 1))
|
|
49
46
|
return self.tokens
|
|
50
|
-
|
|
51
47
|
def _remove_multiline_comments(self, source: str) -> str:
|
|
52
48
|
result = []
|
|
53
49
|
i = 0
|
|
@@ -63,75 +59,41 @@ class Lexer:
|
|
|
63
59
|
result.append(source[i])
|
|
64
60
|
i += 1
|
|
65
61
|
return ''.join(result)
|
|
66
|
-
|
|
67
|
-
def tokenize_line(self, line: str):
|
|
62
|
+
def tokenize_line(self, line: str, start_col: int = 1):
|
|
68
63
|
pos = 0
|
|
69
64
|
while pos < len(line):
|
|
70
65
|
match = None
|
|
71
|
-
|
|
72
|
-
|
|
66
|
+
current_col = start_col + pos
|
|
73
67
|
if line[pos] == '#':
|
|
68
|
+
self.tokens.append(Token('COMMENT', line[pos:], self.line_number, current_col))
|
|
74
69
|
break
|
|
75
|
-
|
|
76
70
|
if line[pos].isspace():
|
|
77
71
|
pos += 1
|
|
78
72
|
continue
|
|
79
|
-
|
|
80
73
|
if line[pos].isdigit():
|
|
81
74
|
match = re.match(r'^\d+(\.\d+)?', line[pos:])
|
|
82
75
|
if match:
|
|
83
76
|
value = match.group(0)
|
|
84
|
-
self.tokens.append(Token('NUMBER', value, self.line_number))
|
|
77
|
+
self.tokens.append(Token('NUMBER', value, self.line_number, current_col))
|
|
85
78
|
pos += len(value)
|
|
86
79
|
continue
|
|
87
|
-
|
|
88
|
-
# Check for Triple Quotes first
|
|
89
80
|
if line[pos:pos+3] in ('"""', "'''"):
|
|
90
81
|
quote_char = line[pos:pos+3]
|
|
91
|
-
# For multiline, we need to scan ahead across lines?
|
|
92
|
-
# Lexer tokenizes line by line.
|
|
93
|
-
# If we want multiline strings, we need to look ahead in lines or store state.
|
|
94
|
-
# Current Lexer iterates lines.
|
|
95
|
-
# We can switch to "in_multiline_string" state?
|
|
96
|
-
# Or we can consume remaining lines here?
|
|
97
|
-
# Since tokenize() loop iterates lines, we can't easily consume from 'lines' list inside tokenize_line.
|
|
98
|
-
# But we can raise SyntaxError or support it limited to one line (useless).
|
|
99
|
-
|
|
100
|
-
# Simpler logic: Lexer state machine.
|
|
101
|
-
# But refactoring tokenize() loop is risky.
|
|
102
|
-
|
|
103
|
-
# Alternative: "css" tag takes a BLOCK?
|
|
104
|
-
# css:
|
|
105
|
-
# ... content ...
|
|
106
|
-
# But css takes expression.
|
|
107
|
-
|
|
108
|
-
# Let's support triple quotes ONLY if they end on same line? No.
|
|
109
|
-
# Let's change website/main.shl to use single line strings concatenated?
|
|
110
|
-
# Or use a separate file for CSS? serve static is already there.
|
|
111
|
-
# I used get_styles() returning css string.
|
|
112
|
-
|
|
113
|
-
# User asked for "CSS Bundling: A way to define styles directly".
|
|
114
|
-
# I'll stick to single quotes for now to save time and reliability.
|
|
115
|
-
# I'll update website/main.shl to use "string" + "string".
|
|
116
82
|
pass
|
|
117
|
-
|
|
118
83
|
if line[pos] in ('"', "'"):
|
|
119
84
|
quote_char = line[pos]
|
|
120
85
|
end_quote = line.find(quote_char, pos + 1)
|
|
121
86
|
if end_quote == -1:
|
|
122
87
|
raise SyntaxError(f"Unterminated string on line {self.line_number}")
|
|
123
88
|
value = line[pos+1:end_quote]
|
|
124
|
-
# Simple escape handling
|
|
125
89
|
value = value.replace("\\n", "\n").replace("\\t", "\t").replace("\\r", "\r").replace("\\\"", "\"").replace("\\\'", "\'")
|
|
126
|
-
self.tokens.append(Token('STRING', value, self.line_number))
|
|
90
|
+
self.tokens.append(Token('STRING', value, self.line_number, current_col))
|
|
127
91
|
pos = end_quote + 1
|
|
128
92
|
continue
|
|
129
|
-
|
|
130
93
|
if line[pos:pos+3] == '...':
|
|
131
|
-
self.tokens.append(Token('DOTDOTDOT', '...', self.line_number))
|
|
94
|
+
self.tokens.append(Token('DOTDOTDOT', '...', self.line_number, current_col))
|
|
132
95
|
pos += 3
|
|
133
96
|
continue
|
|
134
|
-
|
|
135
97
|
two_char = line[pos:pos+2]
|
|
136
98
|
two_char_tokens = {
|
|
137
99
|
'=>': 'ARROW', '==': 'EQ', '!=': 'NEQ',
|
|
@@ -140,65 +102,47 @@ class Lexer:
|
|
|
140
102
|
'%=': 'MODEQ'
|
|
141
103
|
}
|
|
142
104
|
if two_char in two_char_tokens:
|
|
143
|
-
self.tokens.append(Token(two_char_tokens[two_char], two_char, self.line_number))
|
|
105
|
+
self.tokens.append(Token(two_char_tokens[two_char], two_char, self.line_number, current_col))
|
|
144
106
|
pos += 2
|
|
145
107
|
continue
|
|
146
|
-
|
|
147
108
|
char = line[pos]
|
|
148
|
-
|
|
149
|
-
# Natural Language Comparisons: 'is at least', 'is exactly', 'is less than', 'is more than'
|
|
150
|
-
# We check this before single chars to catch 'is' phrases.
|
|
151
|
-
# Using simple Lookahead
|
|
152
109
|
rest_of_line = line[pos:]
|
|
153
|
-
|
|
154
110
|
if rest_of_line.startswith('is at least '):
|
|
155
|
-
self.tokens.append(Token('GE', '>=', self.line_number))
|
|
156
|
-
pos += 12
|
|
111
|
+
self.tokens.append(Token('GE', '>=', self.line_number, current_col))
|
|
112
|
+
pos += 12
|
|
157
113
|
continue
|
|
158
114
|
elif rest_of_line.startswith('is exactly '):
|
|
159
|
-
self.tokens.append(Token('EQ', '==', self.line_number))
|
|
115
|
+
self.tokens.append(Token('EQ', '==', self.line_number, current_col))
|
|
160
116
|
pos += 11
|
|
161
117
|
continue
|
|
162
118
|
elif rest_of_line.startswith('is less than '):
|
|
163
|
-
self.tokens.append(Token('LT', '<', self.line_number))
|
|
119
|
+
self.tokens.append(Token('LT', '<', self.line_number, current_col))
|
|
164
120
|
pos += 13
|
|
165
121
|
continue
|
|
166
122
|
elif rest_of_line.startswith('is more than '):
|
|
167
|
-
self.tokens.append(Token('GT', '>', self.line_number))
|
|
123
|
+
self.tokens.append(Token('GT', '>', self.line_number, current_col))
|
|
168
124
|
pos += 13
|
|
169
125
|
continue
|
|
170
|
-
|
|
171
|
-
# Filler Words: 'the'
|
|
172
|
-
# Check if next chars are 'the' plus a non-alphanum bound (e.g. space, newline, symbol)
|
|
173
126
|
if rest_of_line.startswith('the') and (len(rest_of_line) == 3 or not rest_of_line[3].isalnum()):
|
|
174
|
-
# Only skip if it's a standalone word 'the'
|
|
175
127
|
pos += 3
|
|
176
128
|
continue
|
|
177
|
-
|
|
178
129
|
if char == '/':
|
|
179
|
-
# Check for Regex /regex/
|
|
180
|
-
# We assume regex if the PREVIOUS token is not something that implies division (Number, ID, RBracket, RParen)
|
|
181
130
|
last_type = self.tokens[-1].type if self.tokens else None
|
|
182
131
|
is_division = False
|
|
183
132
|
if last_type in ('NUMBER', 'STRING', 'ID', 'RPAREN', 'RBRACKET'):
|
|
184
133
|
is_division = True
|
|
185
|
-
|
|
186
134
|
if not is_division:
|
|
187
|
-
# Parse Regex
|
|
188
135
|
end_slash = line.find('/', pos + 1)
|
|
189
136
|
if end_slash != -1:
|
|
190
137
|
pattern = line[pos+1:end_slash]
|
|
191
|
-
# Check for flags after slash
|
|
192
138
|
flags = ""
|
|
193
139
|
k = end_slash + 1
|
|
194
140
|
while k < len(line) and line[k].isalpha():
|
|
195
141
|
flags += line[k]
|
|
196
142
|
k += 1
|
|
197
|
-
|
|
198
|
-
self.tokens.append(Token('REGEX', pattern, self.line_number))
|
|
143
|
+
self.tokens.append(Token('REGEX', pattern, self.line_number, current_col))
|
|
199
144
|
pos = k
|
|
200
145
|
continue
|
|
201
|
-
|
|
202
146
|
single_char_tokens = {
|
|
203
147
|
'+': 'PLUS', '-': 'MINUS', '*': 'MUL', '/': 'DIV',
|
|
204
148
|
'%': 'MOD', '=': 'ASSIGN', '>': 'GT', '<': 'LT',
|
|
@@ -207,10 +151,9 @@ class Lexer:
|
|
|
207
151
|
'{': 'LBRACE', '}': 'RBRACE', ',': 'COMMA', '.': 'DOT'
|
|
208
152
|
}
|
|
209
153
|
if char in single_char_tokens:
|
|
210
|
-
self.tokens.append(Token(single_char_tokens[char], char, self.line_number))
|
|
154
|
+
self.tokens.append(Token(single_char_tokens[char], char, self.line_number, current_col))
|
|
211
155
|
pos += 1
|
|
212
156
|
continue
|
|
213
|
-
|
|
214
157
|
if char.isalpha() or char == '_':
|
|
215
158
|
match = re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*', line[pos:])
|
|
216
159
|
if match:
|
|
@@ -264,50 +207,29 @@ class Lexer:
|
|
|
264
207
|
'red': 'RED', 'green': 'GREEN', 'blue': 'BLUE',
|
|
265
208
|
'yellow': 'YELLOW', 'cyan': 'CYAN', 'magenta': 'MAGENTA',
|
|
266
209
|
'serve': 'SERVE', 'static': 'STATIC',
|
|
267
|
-
|
|
268
|
-
# === NATURAL ENGLISH WEB DSL ===
|
|
269
|
-
# Routing
|
|
270
|
-
# File System Mastery (v0.03.3)
|
|
271
210
|
'write': 'WRITE', 'append': 'APPEND', 'read': 'READ', 'file': 'FILE',
|
|
272
|
-
|
|
273
|
-
# File System Mastery (v0.03.3)
|
|
274
211
|
'write': 'WRITE', 'append': 'APPEND', 'read': 'READ', 'file': 'FILE',
|
|
275
212
|
'db': 'DB', 'database': 'DB',
|
|
276
213
|
'query': 'QUERY', 'open': 'OPEN', 'close': 'CLOSE', 'exec': 'EXEC',
|
|
277
214
|
'middleware': 'MIDDLEWARE', 'before': 'BEFORE',
|
|
278
|
-
|
|
279
|
-
|
|
280
215
|
'when': 'WHEN', 'someone': 'SOMEONE', 'visits': 'VISITS',
|
|
281
216
|
'submits': 'SUBMITS', 'start': 'START', 'server': 'SERVER',
|
|
282
217
|
'files': 'FILES',
|
|
283
|
-
|
|
284
|
-
# Page/Component creation
|
|
285
218
|
'define': 'DEFINE', 'page': 'PAGE', 'called': 'CALLED',
|
|
286
219
|
'using': 'USING', 'component': 'PAGE',
|
|
287
|
-
|
|
288
|
-
# HTML aliases (natural names)
|
|
289
220
|
'heading': 'HEADING', 'paragraph': 'PARAGRAPH',
|
|
290
|
-
# 'link' removed - conflicts with HTML <link> tag
|
|
291
221
|
'image': 'IMAGE',
|
|
292
|
-
|
|
293
|
-
# List operations
|
|
294
222
|
'add': 'ADD', 'put': 'ADD', 'into': 'INTO',
|
|
295
223
|
'count': 'COUNT', 'many': 'MANY', 'how': 'HOW',
|
|
296
|
-
|
|
297
|
-
# Forms
|
|
298
224
|
'field': 'FIELD', 'submit': 'SUBMIT', 'named': 'NAMED',
|
|
299
225
|
'placeholder': 'PLACEHOLDER',
|
|
300
226
|
}
|
|
301
227
|
token_type = keywords.get(value, 'ID')
|
|
302
|
-
self.tokens.append(Token(token_type, value, self.line_number))
|
|
228
|
+
self.tokens.append(Token(token_type, value, self.line_number, current_col))
|
|
303
229
|
pos += len(value)
|
|
304
230
|
continue
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
231
|
if char in single_char_tokens:
|
|
309
|
-
self.tokens.append(Token(single_char_tokens[char], char, self.line_number))
|
|
232
|
+
self.tokens.append(Token(single_char_tokens[char], char, self.line_number, current_col))
|
|
310
233
|
pos += 1
|
|
311
234
|
continue
|
|
312
|
-
|
|
313
235
|
raise SyntaxError(f"Illegal character '{char}' at line {self.line_number}")
|