arc-lang 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/lexer.js ADDED
@@ -0,0 +1,335 @@
1
+ // Arc Language Lexer
2
+ export var TokenType;
3
+ (function (TokenType) {
4
+ // Literals
5
+ TokenType[TokenType["Int"] = 0] = "Int";
6
+ TokenType[TokenType["Float"] = 1] = "Float";
7
+ TokenType[TokenType["String"] = 2] = "String";
8
+ TokenType[TokenType["StringInterpStart"] = 3] = "StringInterpStart";
9
+ TokenType[TokenType["StringInterpPart"] = 4] = "StringInterpPart";
10
+ TokenType[TokenType["StringInterpEnd"] = 5] = "StringInterpEnd";
11
+ TokenType[TokenType["Bool"] = 6] = "Bool";
12
+ TokenType[TokenType["Nil"] = 7] = "Nil";
13
+ // Identifiers & Keywords
14
+ TokenType[TokenType["Ident"] = 8] = "Ident";
15
+ TokenType[TokenType["Fn"] = 9] = "Fn";
16
+ TokenType[TokenType["Let"] = 10] = "Let";
17
+ TokenType[TokenType["Mut"] = 11] = "Mut";
18
+ TokenType[TokenType["Type"] = 12] = "Type";
19
+ TokenType[TokenType["Use"] = 13] = "Use";
20
+ TokenType[TokenType["Pub"] = 14] = "Pub";
21
+ TokenType[TokenType["Match"] = 15] = "Match";
22
+ TokenType[TokenType["If"] = 16] = "If";
23
+ TokenType[TokenType["El"] = 17] = "El";
24
+ TokenType[TokenType["For"] = 18] = "For";
25
+ TokenType[TokenType["In"] = 19] = "In";
26
+ TokenType[TokenType["Do"] = 20] = "Do";
27
+ TokenType[TokenType["While"] = 21] = "While";
28
+ TokenType[TokenType["Until"] = 22] = "Until";
29
+ TokenType[TokenType["Async"] = 23] = "Async";
30
+ TokenType[TokenType["Await"] = 24] = "Await";
31
+ TokenType[TokenType["Ret"] = 25] = "Ret";
32
+ TokenType[TokenType["True"] = 26] = "True";
33
+ TokenType[TokenType["False"] = 27] = "False";
34
+ TokenType[TokenType["NilKw"] = 28] = "NilKw";
35
+ TokenType[TokenType["And"] = 29] = "And";
36
+ TokenType[TokenType["Or"] = 30] = "Or";
37
+ TokenType[TokenType["Not"] = 31] = "Not";
38
+ TokenType[TokenType["Where"] = 32] = "Where";
39
+ TokenType[TokenType["Matching"] = 33] = "Matching";
40
+ TokenType[TokenType["Fetch"] = 34] = "Fetch";
41
+ // Operators
42
+ TokenType[TokenType["Plus"] = 35] = "Plus";
43
+ TokenType[TokenType["Minus"] = 36] = "Minus";
44
+ TokenType[TokenType["Star"] = 37] = "Star";
45
+ TokenType[TokenType["Slash"] = 38] = "Slash";
46
+ TokenType[TokenType["Percent"] = 39] = "Percent";
47
+ TokenType[TokenType["Power"] = 40] = "Power";
48
+ TokenType[TokenType["Eq"] = 41] = "Eq";
49
+ TokenType[TokenType["Neq"] = 42] = "Neq";
50
+ TokenType[TokenType["Lt"] = 43] = "Lt";
51
+ TokenType[TokenType["Gt"] = 44] = "Gt";
52
+ TokenType[TokenType["Lte"] = 45] = "Lte";
53
+ TokenType[TokenType["Gte"] = 46] = "Gte";
54
+ TokenType[TokenType["Pipe"] = 47] = "Pipe";
55
+ TokenType[TokenType["Bar"] = 48] = "Bar";
56
+ TokenType[TokenType["FatArrow"] = 49] = "FatArrow";
57
+ TokenType[TokenType["Arrow"] = 50] = "Arrow";
58
+ TokenType[TokenType["Question"] = 51] = "Question";
59
+ TokenType[TokenType["Range"] = 52] = "Range";
60
+ TokenType[TokenType["Concat"] = 53] = "Concat";
61
+ TokenType[TokenType["At"] = 54] = "At";
62
+ TokenType[TokenType["Hash"] = 55] = "Hash";
63
+ TokenType[TokenType["Assign"] = 56] = "Assign";
64
+ // Delimiters
65
+ TokenType[TokenType["LParen"] = 57] = "LParen";
66
+ TokenType[TokenType["RParen"] = 58] = "RParen";
67
+ TokenType[TokenType["LBrace"] = 59] = "LBrace";
68
+ TokenType[TokenType["RBrace"] = 60] = "RBrace";
69
+ TokenType[TokenType["LBracket"] = 61] = "LBracket";
70
+ TokenType[TokenType["RBracket"] = 62] = "RBracket";
71
+ TokenType[TokenType["Comma"] = 63] = "Comma";
72
+ TokenType[TokenType["Colon"] = 64] = "Colon";
73
+ TokenType[TokenType["Dot"] = 65] = "Dot";
74
+ TokenType[TokenType["Semicolon"] = 66] = "Semicolon";
75
+ TokenType[TokenType["Newline"] = 67] = "Newline";
76
+ // Regex
77
+ TokenType[TokenType["Regex"] = 68] = "Regex";
78
+ // Special
79
+ TokenType[TokenType["EOF"] = 69] = "EOF";
80
+ })(TokenType || (TokenType = {}));
81
+ const KEYWORDS = {
82
+ fn: TokenType.Fn, let: TokenType.Let, mut: TokenType.Mut, type: TokenType.Type,
83
+ use: TokenType.Use, pub: TokenType.Pub, match: TokenType.Match,
84
+ if: TokenType.If, el: TokenType.El, for: TokenType.For, in: TokenType.In,
85
+ do: TokenType.Do, while: TokenType.While, until: TokenType.Until,
86
+ async: TokenType.Async, await: TokenType.Await, ret: TokenType.Ret,
87
+ true: TokenType.True, false: TokenType.False, nil: TokenType.NilKw,
88
+ and: TokenType.And, or: TokenType.Or, not: TokenType.Not,
89
+ where: TokenType.Where, matching: TokenType.Matching, fetch: TokenType.Fetch,
90
+ };
91
+ export function lex(source) {
92
+ const tokens = [];
93
+ let i = 0;
94
+ let line = 1;
95
+ let col = 1;
96
+ function peek(offset = 0) { return source[i + offset] ?? ""; }
97
+ function advance() {
98
+ const ch = source[i++];
99
+ if (ch === "\n") {
100
+ line++;
101
+ col = 1;
102
+ }
103
+ else {
104
+ col++;
105
+ }
106
+ return ch;
107
+ }
108
+ function tok(type, value, startLine, startCol) {
109
+ return { type, value, line: startLine, col: startCol };
110
+ }
111
+ while (i < source.length) {
112
+ const ch = peek();
113
+ const sl = line, sc = col;
114
+ // Whitespace (not newline)
115
+ if (ch === " " || ch === "\t" || ch === "\r") {
116
+ advance();
117
+ continue;
118
+ }
119
+ // Newline
120
+ if (ch === "\n") {
121
+ advance();
122
+ tokens.push(tok(TokenType.Newline, "\\n", sl, sc));
123
+ continue;
124
+ }
125
+ // Comments: # to end of line
126
+ if (ch === "#") {
127
+ while (i < source.length && peek() !== "\n")
128
+ advance();
129
+ continue;
130
+ }
131
+ // String with interpolation
132
+ if (ch === '"') {
133
+ advance(); // skip opening quote
134
+ let str = "";
135
+ const parts = [];
136
+ let hasInterp = false;
137
+ while (i < source.length && peek() !== '"') {
138
+ if (peek() === "{") {
139
+ hasInterp = true;
140
+ if (str.length > 0 || parts.length === 0) {
141
+ parts.push(tok(TokenType.String, str, sl, sc));
142
+ str = "";
143
+ }
144
+ advance(); // skip {
145
+ // Lex the expression inside {} as tokens - just grab until matching }
146
+ let depth = 1;
147
+ let interpExpr = "";
148
+ while (i < source.length && depth > 0) {
149
+ if (peek() === "{")
150
+ depth++;
151
+ if (peek() === "}") {
152
+ depth--;
153
+ if (depth === 0)
154
+ break;
155
+ }
156
+ interpExpr += advance();
157
+ }
158
+ if (peek() === "}")
159
+ advance(); // skip }
160
+ parts.push(tok(TokenType.Ident, interpExpr, line, col));
161
+ continue;
162
+ }
163
+ if (peek() === "\\") {
164
+ advance();
165
+ const esc = advance();
166
+ if (esc === "n")
167
+ str += "\n";
168
+ else if (esc === "t")
169
+ str += "\t";
170
+ else if (esc === "\\")
171
+ str += "\\";
172
+ else if (esc === '"')
173
+ str += '"';
174
+ else
175
+ str += esc;
176
+ continue;
177
+ }
178
+ str += advance();
179
+ }
180
+ if (peek() === '"')
181
+ advance(); // skip closing quote
182
+ if (hasInterp) {
183
+ if (str.length > 0)
184
+ parts.push(tok(TokenType.String, str, sl, sc));
185
+ // Encode as StringInterpStart ... StringInterpEnd
186
+ tokens.push(tok(TokenType.StringInterpStart, "", sl, sc));
187
+ for (const p of parts) {
188
+ if (p.type === TokenType.String) {
189
+ tokens.push(tok(TokenType.StringInterpPart, p.value, p.line, p.col));
190
+ }
191
+ else {
192
+ // It's an ident expression
193
+ tokens.push(tok(TokenType.Ident, p.value, p.line, p.col));
194
+ }
195
+ }
196
+ tokens.push(tok(TokenType.StringInterpEnd, "", line, col));
197
+ }
198
+ else {
199
+ tokens.push(tok(TokenType.String, str, sl, sc));
200
+ }
201
+ continue;
202
+ }
203
+ // Numbers
204
+ if (ch >= "0" && ch <= "9") {
205
+ let num = "";
206
+ let isFloat = false;
207
+ while (i < source.length && ((peek() >= "0" && peek() <= "9") || peek() === ".")) {
208
+ if (peek() === ".") {
209
+ if (peek(1) === ".")
210
+ break; // range operator
211
+ if (isFloat)
212
+ break;
213
+ isFloat = true;
214
+ }
215
+ num += advance();
216
+ }
217
+ tokens.push(tok(isFloat ? TokenType.Float : TokenType.Int, num, sl, sc));
218
+ continue;
219
+ }
220
+ // Identifiers and keywords
221
+ if ((ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") || ch === "_") {
222
+ let ident = "";
223
+ while (i < source.length && ((peek() >= "a" && peek() <= "z") || (peek() >= "A" && peek() <= "Z") || (peek() >= "0" && peek() <= "9") || peek() === "_")) {
224
+ ident += advance();
225
+ }
226
+ const kw = KEYWORDS[ident];
227
+ if (kw !== undefined) {
228
+ tokens.push(tok(kw, ident, sl, sc));
229
+ }
230
+ else {
231
+ tokens.push(tok(TokenType.Ident, ident, sl, sc));
232
+ }
233
+ continue;
234
+ }
235
+ // Regex literal: /pattern/ (only after 'matching' keyword)
236
+ if (ch === "/" && tokens.length > 0 && tokens[tokens.length - 1].type === TokenType.Matching) {
237
+ advance(); // skip opening /
238
+ let pattern = "";
239
+ while (i < source.length && peek() !== "/" && peek() !== "\n") {
240
+ if (peek() === "\\") {
241
+ pattern += advance();
242
+ } // include escape char
243
+ pattern += advance();
244
+ }
245
+ if (peek() === "/")
246
+ advance(); // skip closing /
247
+ tokens.push(tok(TokenType.Regex, pattern, sl, sc));
248
+ continue;
249
+ }
250
+ // Multi-char operators
251
+ if (ch === "|" && peek(1) === ">") {
252
+ advance();
253
+ advance();
254
+ tokens.push(tok(TokenType.Pipe, "|>", sl, sc));
255
+ continue;
256
+ }
257
+ if (ch === "|") {
258
+ advance();
259
+ tokens.push(tok(TokenType.Bar, "|", sl, sc));
260
+ continue;
261
+ }
262
+ if (ch === "=" && peek(1) === ">") {
263
+ advance();
264
+ advance();
265
+ tokens.push(tok(TokenType.FatArrow, "=>", sl, sc));
266
+ continue;
267
+ }
268
+ if (ch === "-" && peek(1) === ">") {
269
+ advance();
270
+ advance();
271
+ tokens.push(tok(TokenType.Arrow, "->", sl, sc));
272
+ continue;
273
+ }
274
+ if (ch === "*" && peek(1) === "*") {
275
+ advance();
276
+ advance();
277
+ tokens.push(tok(TokenType.Power, "**", sl, sc));
278
+ continue;
279
+ }
280
+ if (ch === "+" && peek(1) === "+") {
281
+ advance();
282
+ advance();
283
+ tokens.push(tok(TokenType.Concat, "++", sl, sc));
284
+ continue;
285
+ }
286
+ if (ch === "=" && peek(1) === "=") {
287
+ advance();
288
+ advance();
289
+ tokens.push(tok(TokenType.Eq, "==", sl, sc));
290
+ continue;
291
+ }
292
+ if (ch === "!" && peek(1) === "=") {
293
+ advance();
294
+ advance();
295
+ tokens.push(tok(TokenType.Neq, "!=", sl, sc));
296
+ continue;
297
+ }
298
+ if (ch === "<" && peek(1) === "=") {
299
+ advance();
300
+ advance();
301
+ tokens.push(tok(TokenType.Lte, "<=", sl, sc));
302
+ continue;
303
+ }
304
+ if (ch === ">" && peek(1) === "=") {
305
+ advance();
306
+ advance();
307
+ tokens.push(tok(TokenType.Gte, ">=", sl, sc));
308
+ continue;
309
+ }
310
+ if (ch === "." && peek(1) === ".") {
311
+ advance();
312
+ advance();
313
+ tokens.push(tok(TokenType.Range, "..", sl, sc));
314
+ continue;
315
+ }
316
+ // Single char
317
+ const singles = {
318
+ "+": TokenType.Plus, "-": TokenType.Minus, "*": TokenType.Star, "/": TokenType.Slash,
319
+ "%": TokenType.Percent, "<": TokenType.Lt, ">": TokenType.Gt, "?": TokenType.Question,
320
+ "@": TokenType.At, "=": TokenType.Assign,
321
+ "(": TokenType.LParen, ")": TokenType.RParen, "{": TokenType.LBrace, "}": TokenType.RBrace,
322
+ "[": TokenType.LBracket, "]": TokenType.RBracket, ",": TokenType.Comma, ":": TokenType.Colon,
323
+ ".": TokenType.Dot, ";": TokenType.Semicolon,
324
+ };
325
+ if (singles[ch] !== undefined) {
326
+ advance();
327
+ tokens.push(tok(singles[ch], ch, sl, sc));
328
+ continue;
329
+ }
330
+ // Unknown
331
+ advance();
332
+ }
333
+ tokens.push(tok(TokenType.EOF, "", line, col));
334
+ return tokens;
335
+ }
@@ -0,0 +1,15 @@
1
+ export type Severity = "error" | "warning" | "info";
2
+ export interface LintDiagnostic {
3
+ severity: Severity;
4
+ message: string;
5
+ rule: string;
6
+ file: string;
7
+ line: number;
8
+ col: number;
9
+ }
10
+ export interface LintOptions {
11
+ maxLineLength: number;
12
+ file: string;
13
+ }
14
+ export declare function lint(source: string, options?: Partial<LintOptions>): LintDiagnostic[];
15
+ export declare function formatDiagnostic(d: LintDiagnostic): string;