pure-dango 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,334 @@
1
+ // the lexer for pure-dango. Returns a list of tokens.
2
+ import {Tokenizer} from "../../runtime/errors";
3
+ import * as quote_crimes from "./funnies";
4
+
5
+ const REGEX = new RegExp([
6
+ // strings
7
+ "(`(?:[^`\\\\]|\\\\.)*`)", // backtick strings
8
+ "('(?:[^'\\\\]|\\\\.)*')", // single quote strings
9
+
10
+ '(\u201C(?:[^\u201C\u201D\\\\]|\\\\.)*\u201D)', // curly single quote strings
11
+ "(\u2018(?:[^\u2018\u2019\\\\]|\\\\.)*\u2019)", // curly double quote strings
12
+
13
+ // multi-char operators
14
+ "\\?\\?",
15
+ "&&", "\\|\\|",
16
+ "!=", "<=", ">=", "==",
17
+ "--", "\\+\\+",
18
+ "-=", "\\+=", "/=", "\\*=",
19
+ "&=", "\\|=", "\\^=", // bitwise assignment
20
+ "<<=", ">>=", ">>>=", // shift assignment
21
+ ">>>", ">>", "<<", // shifts (longest first)
22
+ "\\.\\.\.", // spread
23
+
24
+ // number literals
25
+ "0b[01]+", // binary
26
+ "0x[0-9a-fA-F]+", // hex
27
+ "\\d+(\\.\\d+)?", // float
28
+
29
+ // identifiers
30
+ "[\\p{L}_][\\p{L}\\d_]*",
31
+
32
+ // single char
33
+ "[+\\-*/=()&^%$#@!<>?:~,|]",
34
+ "[\\[\\]{}]",
35
+ "[\\n;]",
36
+ "\\.",
37
+ ].join('|'), 'gu');
38
+
39
+ type BaseToken =
40
+ {
41
+ type : string
42
+ value : string
43
+ row : number
44
+ column : number
45
+ }
46
+
47
+ type MatchIterable = IterableIterator<RegExpMatchArray>;
48
+
49
+ // the sets for types
50
+ const keywordSet = new Set(["new","const","if","else","while","continue","break","for","in","of","function","return","import","class","extends","inst","internal","try","catch","finally","do","switch","case","default"]);
51
+ const separatorSet = new Set(["\n", ",", "\t", ";"]);
52
+ const operatorSet = new Set([
53
+ "...", "??", "&&", "||", "{", "}", "[", "]",
54
+ "!=", "<=", ">=", "==",
55
+ "-=", "+=", "++", "/=", "*=", "--",
56
+ "&=", "|=", "^=",
57
+ "<<=", ">>=", ">>>=",
58
+ ">>>", ">>", "<<",
59
+ "+", "-", "*", "/", "%", "=", "(", ")",
60
+ "&", "^", "!", "<", ">", "?", ":", "~", ".", "|",
61
+ ]);
62
+
63
+ // gets the type of a token.
64
+ function getType(code : string) : string
65
+ {
66
+ if (keywordSet.has(code))
67
+ return "Keyword";
68
+
69
+ if (separatorSet.has(code))
70
+ return "Separator";
71
+
72
+ if (operatorSet.has(code))
73
+ return "Operator";
74
+
75
+ if (/^["'`]/.test(code))
76
+ return "StringLiteral";
77
+
78
+ if (/^\d/.test(code))
79
+ return "Literal";
80
+
81
+ return "Identifier";
82
+ }
83
+
84
+ export function tokenizer(code : string) : BaseToken[]
85
+ {
86
+ // priority: Comments (#) > strings "<any>" > normal character <any>
87
+ const tokens : BaseToken[] = [];
88
+
89
+ // uid generated with 11 random characters (e.g. 0.epnonhkzjjl)
90
+ const uid : string = Math.random().toString(36).slice(2);
91
+ const stringMeta = new Map<string, {value: string; row: number; column: number}>();
92
+ const out : string[] = [];
93
+
94
+ let row : number = 1;
95
+ let column : number = 1;
96
+ let i : number = 0;
97
+ let stringIndex : number = 0;
98
+
99
+ while (i < code.length)
100
+ {
101
+ const character = code[i];
102
+
103
+ // single line comment
104
+ if (character === "#")
105
+ {
106
+ while (i < code.length && code[i] !== "\n")
107
+ i++;
108
+
109
+ // \n will be handled in the next iteration
110
+ continue;
111
+ }
112
+
113
+ // multi line comments
114
+ if (character === "/" && code[i + 1] === "*")
115
+ {
116
+ while (i < code.length && !(code[i] === "*" && code[i + 1] === "/"))
117
+ {
118
+ if (code[i] === "\n")
119
+ {
120
+ row++;
121
+ column = 1;
122
+ }
123
+ i++;
124
+ }
125
+
126
+ i += 2; // eat */
127
+ continue;
128
+ }
129
+
130
+ // handle strings
131
+ if
132
+ (
133
+ (
134
+ character === '"' || character === "\u201C" ||
135
+ character === "'" || character === "\u2018" ||
136
+ character === "`"
137
+ ) &&
138
+ (i === 0 || !/[\p{L}\d]/u.test(code[i - 1]))
139
+ )
140
+ {
141
+ const quote = character === "\u201C" ? "\u201D" // check for curly double quotes
142
+ : character === "\u2018" ? "\u2019" // check for curly single quotes
143
+ : character;
144
+
145
+ const stringRow : number = row;
146
+ const stringColumn : number = column;
147
+
148
+ const start : number = i;
149
+
150
+ i++;
151
+ column++;
152
+
153
+ while (i < code.length && code[i] !== quote)
154
+ {
155
+ const type = quote === "\u201D" ? "double" : "single";
156
+ const opening = type === "double" ? "\u201C" : "\u2018";
157
+
158
+ if ((code[i] === "\u201C" && quote === "\u201D") || (code[i] === "\u2018" && quote === "\u2019"))
159
+ {
160
+ const quoteCrimesMessage = quote_crimes.QuoteCrimesMessage(quote_crimes.incrementQuoteCrimes() - 1, opening) + " ";
161
+ throw new Tokenizer
162
+ (
163
+ quoteCrimesMessage +
164
+ `Found: ${opening}...${opening}. ` +
165
+ `Tip: Either match them or just use normal ${type} quotes`,
166
+ row,
167
+ column
168
+ );
169
+ }
170
+
171
+ if (code[i] === "\\")
172
+ {
173
+ i += 2;
174
+ column += 2;
175
+ }
176
+
177
+ else
178
+ {
179
+ if (code[i] === "\n")
180
+ {
181
+ row++;
182
+ column = 1;
183
+ }
184
+ else
185
+ column++;
186
+
187
+ i++
188
+ }
189
+ }
190
+
191
+ // closing quote
192
+ i++;
193
+ column++;
194
+
195
+ const raw : string = code.slice(start, i);
196
+ const placeholder : string = `__STRING${uid}_${stringIndex++}__`;
197
+ stringMeta.set
198
+ (
199
+ placeholder,
200
+ {
201
+ value: raw.slice(1, -1)
202
+ .replace(/\\"/g, '"')
203
+ .replace(/\\'/g, "'")
204
+ .replace(/\\\\/g, "\\")
205
+ .replace(/\\n/g, "\n")
206
+ .replace(/\\t/g, "\t")
207
+ .replace(/\\r/g, "\r"),
208
+ row: stringRow,
209
+ column: stringColumn
210
+ }
211
+ );
212
+
213
+ out.push(placeholder);
214
+ continue;
215
+ }
216
+
217
+ // when encountering a "\n" character we add one to row and set column to one
218
+ // else, we add one to column
219
+ if (character === "\n")
220
+ {
221
+ row++;
222
+ column = 1;
223
+ }
224
+ else
225
+ column++;
226
+
227
+ out.push(character);
228
+ i++;
229
+ }
230
+
231
+ // we join all characters from out in to a string
232
+ const processed : string = out.join("");
233
+
234
+ row = 1;
235
+ column = 1;
236
+
237
+ let commented : boolean = false;
238
+
239
+ // get the tokens we need
240
+ const matches : MatchIterable = processed.matchAll(REGEX);
241
+ for (const match of matches)
242
+ {
243
+ const value : string = match[0];
244
+
245
+ // \n encounter
246
+ if (value === "\n")
247
+ {
248
+ row++;
249
+ column = 1;
250
+ commented = false;
251
+ continue;
252
+ }
253
+
254
+ // comment encounter
255
+ if (value === "#")
256
+ {
257
+ commented = true;
258
+ continue;
259
+ }
260
+
261
+ // if the value is part of a comment or value is the tab character
262
+ // we don't add the token and only add its length to column
263
+ if (commented || value === "\t")
264
+ {
265
+ column += value.length;
266
+ continue;
267
+ }
268
+
269
+ // comma encounter
270
+ if (value === ",")
271
+ {
272
+ tokens.push
273
+ (
274
+ {
275
+ type: "Separator",
276
+ value,
277
+ row,
278
+ column
279
+ }
280
+ );
281
+ column += value.length;
282
+ continue;
283
+ }
284
+
285
+ // if value is in stringMeta, we get value in stringMeta
286
+ // then, we push a StringLiteral to tokens
287
+ // finally, we add the length to value
288
+ if (stringMeta.has(value))
289
+ {
290
+ const meta = stringMeta.get(value)!;
291
+ tokens.push
292
+ (
293
+ {
294
+ type : "StringLiteral",
295
+ value : meta.value,
296
+ row : meta.row,
297
+ column : meta.column
298
+ }
299
+ );
300
+ column += value.length;
301
+ continue;
302
+ }
303
+
304
+ // split value into separate lines
305
+ // if lines has a length more than one
306
+ // we add the length of lines decreased by one to row
307
+ // then, we change column to be equal to the the last line's length + 1
308
+ const lines = value.split("\n");
309
+ if (lines.length > 1)
310
+ {
311
+ row += lines.length - 1;
312
+ column = lines[lines.length - 1].length + 1;
313
+ }
314
+
315
+ // we push value as an object to tokens
316
+ tokens.push
317
+ (
318
+ {
319
+ type : getType(value),
320
+ value,
321
+ row,
322
+ column
323
+ }
324
+ );
325
+
326
+ // finally, if the length of lines is equal to one
327
+ // we add the length of value to column
328
+ if (lines.length === 1)
329
+ column += value.length;
330
+ }
331
+ //console.dir(tokens, {depth: null, colors: false})
332
+
333
+ return tokens;
334
+ }