exprify 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,145 +1,399 @@
1
- export function tokenize(expr, context) {
2
- let tokens = [];
3
- let current = "";
4
- let quote = "";
5
-
6
- for (let i = 0; i < expr.length; i++) {
7
-
8
- let char = expr[i];
9
-
10
- const isOperator =
11
- char === '(' || char === ')' ||
12
- char === '^' || char === '*' ||
13
- char === '/' || char === '%' ||
14
- char === '+' || char === '-';
15
-
16
- const isQuote = char === '"' || char === "'" || char === "`";
17
-
18
- if (isQuote) {
19
- if (quote === "") {
20
- quote = char;
21
- current += char;
22
- } else if (quote === char) {
23
- current += char;
24
- quote = "";
25
-
26
- tokens.push(context.stringToJS(current, context.variablesDB));
27
- current = "";
28
- } else {
29
- current += char;
30
- }
31
- continue;
32
- }
33
-
34
- if (quote !== "") {
35
- current += char;
36
- continue;
37
- }
38
-
39
- if (char === "#") {
40
-
41
- let bracket = 0;
42
- let funcName = "";
43
- let arg = "";
44
- let args = [];
45
- let quoteFunc = "";
46
-
47
- while (i < expr.length - 1) {
48
- i++;
49
- char = expr[i];
50
-
51
- if (bracket === 0) {
52
- if (char === "(") {
53
- bracket++;
54
- continue;
55
- }
56
-
57
- if (char === " ")
58
- throw new Error("Function name cannot contain space");
59
-
60
- if (isQuote)
61
- throw new Error("Function name cannot contain quotes");
62
-
63
- if (funcName === "" && /[0-9.]/.test(char))
64
- throw new Error("Function name cannot start with number");
65
-
66
- funcName += char;
67
- continue;
68
- }
69
-
70
- if (isQuote) {
71
- if (quoteFunc === "") quoteFunc = char;
72
- else if (quoteFunc === char) quoteFunc = "";
73
- }
74
-
75
- if (quoteFunc === "") {
76
-
77
- if (char === "(") bracket++;
78
- else if (char === ")") {
79
- bracket--;
80
-
81
- if (bracket === 0) {
82
- if (arg !== "") args.push(arg);
83
- break;
84
- }
85
- }
86
-
87
- if (char === "," && bracket === 1) {
88
- if (arg === "")
89
- throw new Error(`Missing argument in #${funcName}()`);
90
-
91
- args.push(arg);
92
- arg = "";
93
- continue;
94
- }
95
- }
96
-
97
- arg += char;
98
- }
99
-
100
- args = args.map(a => context.evaluate(a));
101
-
102
- let fn =
103
- context.func_DB_intrnl[funcName] ||
104
- context.func_DB_extrnl[funcName];
105
-
106
- if (!fn) {
107
- throw new Error(`#${funcName}() not defined`);
108
- }
109
-
110
- tokens.push(fn(...args));
111
- continue;
112
- }
113
-
114
- if (isOperator) {
115
-
116
- if (current !== "") {
117
- tokens.push(context.stringToJS(current, context.variablesDB));
118
- current = "";
119
- }
120
-
121
- tokens.push(char);
122
- continue;
123
- }
124
-
125
- if (char === " ") {
126
- if (current !== "") {
127
- tokens.push(context.stringToJS(current, context.variablesDB));
128
- current = "";
129
- }
130
- continue;
131
- }
132
-
133
- current += char;
134
-
135
- if (i === expr.length - 1 && current !== "") {
136
- tokens.push(context.stringToJS(current, context.variablesDB));
137
- }
138
- }
139
-
140
- if (quote !== "") {
141
- throw new Error("Unclosed string literal");
142
- }
143
-
144
- return tokens;
145
- }
1
+ export function tokenize(expr, context = {}) {
2
+ const tokens = [];
3
+ let current = "";
4
+ let quote = "";
5
+
6
+ const operators = ["+", "-", "*", "/", "%", "^", "=", ">", "<", "!", "&", "|"];
7
+ const multiOps = [
8
+ "==", ">=", "<=", "&&", "||",
9
+ "+=", "-=", "*=", "/=", "%=",
10
+ "?.", "??", "|>"
11
+ ];
12
+
13
+ const parentheses = "()";
14
+ const comma = ",";
15
+ const semicolon = ";";
16
+ const keywords = ["to", "in"];
17
+ // const functions = context.functions?.getAllFunctionsName?.() || [];
18
+ const units = context.units?.getAllUnitsFlat?.() || [];
19
+
20
+ const isIdentifier = (s) => /^[a-zA-Z_][a-zA-Z0-9_]*$/.test(s);
21
+
22
+ function getContext(str, charIndex) {
23
+ // 1. Extract all alphanumeric words into an array
24
+ const words = str.match(/[a-z0-9]+/gi) || [];
25
+
26
+ // 2. Identify the current character and the one immediately before it
27
+ const currentChar = str[charIndex] || null;
28
+ const prevChar = charIndex > 0 ? str[charIndex - 1] : null;
29
+
30
+ // 3. Find the word that contains the current charIndex
31
+ let start = charIndex;
32
+ // Move pointer back to the start of the current word
33
+ while (start > 0 && /[a-z0-9]/i.test(str[start - 1])) start--;
34
+
35
+ let end = charIndex;
36
+ // Move pointer forward to the end of the current word
37
+ while (end < str.length && /[a-z0-9]/i.test(str[end])) end++;
38
+
39
+ const currentWord = str.substring(start, end);
40
+
41
+ // 4. Find the word that appears before the currentWord in the sequence
42
+ const currentWordIdx = words.indexOf(currentWord);
43
+ const prevWord = currentWordIdx > 0 ? words[currentWordIdx - 1] : null;
44
+
45
+ // 5. Find the word that appears after the currentWord
46
+ const nextWord = (currentWordIdx !== -1 && currentWordIdx < words.length - 1)
47
+ ? words[currentWordIdx + 1]
48
+ : null;
49
+
50
+ return {
51
+ prevWord: prevWord,
52
+ prevChar: prevChar,
53
+ currentWord: currentWord,
54
+ currentChar: currentChar,
55
+ nextWord: nextWord
56
+ };
57
+ }
58
+
59
+ const isUnaryContext = (prev) =>
60
+ !prev ||
61
+ prev.type === "Operator" ||
62
+ prev.type === "UnaryOperator" ||
63
+ (prev.type === "Parenthesis" && prev.value !== ")") ||
64
+ prev.type === "ArrayStart" ||
65
+ prev.type === "Semicolon" ||
66
+ prev.type === "Comma" ||
67
+ prev.type === "Ternary";
68
+
69
+ const flushCurrent = (nextChar, index) => {
70
+ if (!current) return;
71
+
72
+ // BOOLEAN
73
+ if (/^(true|false)$/i.test(current)) {
74
+ tokens.push({ type: "Boolean", value: current.toLowerCase() === "true" });
75
+ current = "";
76
+ return;
77
+ }
78
+
79
+ // KEYWORD
80
+ if (keywords.includes(current)) {
81
+ tokens.push({ type: "Keyword", value: current, pos: index });
82
+ current = "";
83
+ return;
84
+ }
85
+
86
+ // BIGINT
87
+ if (/^\d+n$/.test(current)) {
88
+ tokens.push({ type: "BigInt", value: BigInt(current.slice(0, -1)), pos: index });
89
+ current = "";
90
+ return;
91
+ }
92
+
93
+ // HEX
94
+ if (/^0x[0-9a-fA-F]+$/.test(current)) {
95
+ tokens.push({ type: "Number", value: parseInt(current, 16), pos: index });
96
+ current = "";
97
+ return;
98
+ }
99
+
100
+ // BINARY
101
+ if (/^0b[01]+$/.test(current)) {
102
+ tokens.push({ type: "Number", value: parseInt(current, 2), pos: index });
103
+ current = "";
104
+ return;
105
+ }
106
+
107
+ // NUMBER (including scientific)
108
+ if (/^[+-]?(\d+(\.\d+)?|\.\d+)(e[+-]?\d+)?$/i.test(current)) {
109
+ tokens.push({ type: "Number", value: parseFloat(current), pos: index });
110
+ current = "";
111
+ return;
112
+ }
113
+
114
+ // IMAGINARY NUMBER
115
+ if (/^[+-]?(\d+(\.\d+)?|\.\d+)(e[+-]?\d+)?i$/i.test(current)) {
116
+ tokens.push({
117
+ type: "ImaginaryLiteral",
118
+ value: parseFloat(current.slice(0, -1)),
119
+ pos: index
120
+ });
121
+ current = "";
122
+ return;
123
+ }
124
+
125
+ // IMAGINARY UNIT
126
+ if (/^[+-]?i$/i.test(current)) {
127
+ const sign = current[0] === "-" ? -1 : 1;
128
+ tokens.push({
129
+ type: "ImaginaryLiteral",
130
+ value: sign,
131
+ pos: index
132
+ });
133
+ current = "";
134
+ return;
135
+ }
136
+
137
+ // NUMBER + UNIT
138
+ const numUnit = current.match(/^([+-]?\d+(\.\d+)?)([a-zA-Z]+)$/);
139
+ if (numUnit) {
140
+ const value = parseFloat(numUnit[1]);
141
+ const unit = numUnit[3];
142
+
143
+ tokens.push({
144
+ type: units.includes(unit) ? "NumberWithUnit" : "UnknownUnit",
145
+ value,
146
+ unit,
147
+ pos: index
148
+ });
149
+
150
+ current = "";
151
+ return;
152
+ }
153
+
154
+ // UNIT
155
+ if (units.includes(current)) {
156
+ const {prevWord} = getContext(expr, index);
157
+ if (nextChar !== "(") {
158
+ if (prevWord){
159
+ if (!isNaN(parseFloat(prevWord)) || prevWord === "to" || prevWord === "in") {
160
+ // console.log("Context for unit detection:", {current, prevWord, nextChar});
161
+
162
+ tokens.push({ type: "Unit", value: current, pos: index });
163
+ current = "";
164
+ return;
165
+ }
166
+ }
167
+ }
168
+ }
169
+
170
+ // IDENTIFIER
171
+ if (isIdentifier(current)) {
172
+ if (nextChar === "(") {
173
+ tokens.push({
174
+ type: "Function",
175
+ name: current,
176
+ pos: index
177
+ });
178
+ } else {
179
+ tokens.push({
180
+ type: "Identifier",
181
+ name: current,
182
+ pos: index
183
+ });
184
+ }
185
+
186
+ current = "";
187
+ return;
188
+ }
189
+
190
+ throw new Error(`Invalid token "${current}" at index ${index}`);
191
+ };
192
+
193
+
194
+ for (let i = 0; i < expr.length; i++) {
195
+ let char = expr[i];
196
+ let next = expr[i + 1];
197
+
198
+ // comments
199
+ if (char === "/" && next === "/") {
200
+ while (i < expr.length && expr[i] !== "\n") i++;
201
+ continue;
202
+ }
203
+
204
+ if (char === "/" && next === "*") {
205
+ i += 2;
206
+ while (i < expr.length && !(expr[i] === "*" && expr[i + 1] === "/")) i++;
207
+ i++;
208
+ continue;
209
+ }
210
+
211
+ // string
212
+ if (`"'`.includes(char)) {
213
+ if (!quote) {
214
+ quote = char;
215
+ current += char;
216
+ } else if (quote === char) {
217
+ current += char;
218
+ tokens.push({
219
+ type: "String",
220
+ value: current.slice(1, -1),
221
+ pos: i
222
+ });
223
+ current = "";
224
+ quote = "";
225
+ } else {
226
+ current += char;
227
+ }
228
+ continue;
229
+ }
230
+
231
+ if (quote) {
232
+ if (char === "\\") {
233
+ current += char + expr[++i];
234
+ } else {
235
+ current += char;
236
+ }
237
+ continue;
238
+ }
239
+
240
+ // multi operators
241
+ const twoChar = char + next;
242
+ if (multiOps.includes(twoChar)) {
243
+ flushCurrent(char, i);
244
+ tokens.push({ type: "Operator", value: twoChar, pos: i });
245
+ i++;
246
+ continue;
247
+ }
248
+
249
+ if (char === "?") {
250
+ tokens.push({ type: "Ternary", value: "?" });
251
+ continue;
252
+ }
253
+
254
+ // only treat ':' as ternary IF previous token was '?'
255
+ if (char === ":") {
256
+ flushCurrent(char, i);
257
+ const prev = tokens[tokens.length - 1];
258
+
259
+ if (prev && prev.type === "Ternary") {
260
+ tokens.push({ type: "Ternary", value: ":" });
261
+ } else {
262
+ tokens.push({ type: "Colon" });
263
+ }
264
+ continue;
265
+ }
266
+
267
+ // dot
268
+ if (char === "." && /\d/.test(current) && /\d/.test(next)) {
269
+ current += char;
270
+ continue;
271
+ }
272
+
273
+ if (char === ".") {
274
+ flushCurrent(char, i);
275
+ tokens.push({ type: "Dot", pos: i });
276
+ continue;
277
+ }
278
+
279
+ // operators
280
+ if (operators.includes(char)) {
281
+ flushCurrent(char, i);
282
+
283
+ const prev = tokens[tokens.length - 1];
284
+ if ((char === "-" || char === "!") && isUnaryContext(prev)) {
285
+ tokens.push({ type: "UnaryOperator", value: char, pos: i });
286
+ } else {
287
+ tokens.push({ type: "Operator", value: char, pos: i });
288
+ }
289
+ continue;
290
+ }
291
+
292
+ // parenthesis
293
+ if (parentheses.includes(char)) {
294
+ flushCurrent(char, i);
295
+ tokens.push({ type: "Parenthesis", value: char, pos: i });
296
+ continue;
297
+ }
298
+
299
+ // array
300
+ if (char === "[") {
301
+ flushCurrent(char, i);
302
+ tokens.push({ type: "ArrayStart", pos: i });
303
+ continue;
304
+ }
305
+
306
+ if (char === "]") {
307
+ flushCurrent(char, i);
308
+ tokens.push({ type: "ArrayEnd", pos: i });
309
+ continue;
310
+ }
311
+
312
+ // OBJECT START
313
+ if (char === "{") {
314
+ flushCurrent(char, i);
315
+ tokens.push({ type: "BlockStart", pos: i });
316
+ continue;
317
+ }
318
+
319
+ // OBJECT END
320
+ if (char === "}") {
321
+ flushCurrent(char, i);
322
+ tokens.push({ type: "BlockEnd", pos: i });
323
+ continue;
324
+ }
325
+
326
+ // comma
327
+ if (char === comma) {
328
+ flushCurrent(char, i);
329
+ tokens.push({ type: "Comma", pos: i });
330
+ continue;
331
+ }
332
+
333
+ // semicolon
334
+ if (char === semicolon) {
335
+ flushCurrent(char, i);
336
+ tokens.push({ type: "Semicolon", pos: i });
337
+ continue;
338
+ }
339
+
340
+ // space
341
+ if (char === " ") {
342
+ flushCurrent(next, i);
343
+ continue;
344
+ }
345
+
346
+ // build token
347
+ current += char;
348
+
349
+ if (i === expr.length - 1) {
350
+ flushCurrent(null, i);
351
+ }
352
+ }
353
+
354
+ if (quote) throw new Error("Unclosed string literal");
355
+
356
+ // merge number + unit
357
+ const merged = [];
358
+ for (let i = 0; i < tokens.length; i++) {
359
+ const t = tokens[i];
360
+ const next = tokens[i + 1];
361
+
362
+ if (t?.type === "Number" && next?.type === "Unit") {
363
+ merged.push({
364
+ type: "NumberWithUnit",
365
+ value: t.value,
366
+ unit: next.value,
367
+ pos: t.pos
368
+ });
369
+ i++;
370
+ continue;
371
+ }
372
+
373
+ merged.push(t);
374
+ }
375
+
376
+ // implicit multiplication
377
+ const final = [];
378
+ for (let i = 0; i < merged.length; i++) {
379
+ const a = merged[i];
380
+ const b = merged[i + 1];
381
+
382
+ final.push(a);
383
+
384
+ if (
385
+ a && b &&
386
+ (
387
+ (["Number", "Identifier"].includes(a.type) ||
388
+ (a.type === "Parenthesis" && a.value === ")") ||
389
+ a.type === "ArrayEnd") &&
390
+ (["Identifier", "Function"].includes(b.type) ||
391
+ (b.type === "Parenthesis" && b.value === "("))
392
+ )
393
+ ) {
394
+ final.push({ type: "Operator", value: "*", implicit: true });
395
+ }
396
+ }
397
+
398
+ return final;
399
+ }