jpsx 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +242 -0
  2. package/dist/api/__tests__/compile.test.d.ts +2 -0
  3. package/dist/api/__tests__/compile.test.d.ts.map +1 -0
  4. package/dist/api/__tests__/compile.test.js +336 -0
  5. package/dist/api/__tests__/runtime.test.d.ts +2 -0
  6. package/dist/api/__tests__/runtime.test.d.ts.map +1 -0
  7. package/dist/api/__tests__/runtime.test.js +275 -0
  8. package/dist/api/advanced.d.ts +100 -0
  9. package/dist/api/advanced.d.ts.map +1 -0
  10. package/dist/api/advanced.js +192 -0
  11. package/dist/api/benchmark.d.ts +87 -0
  12. package/dist/api/benchmark.d.ts.map +1 -0
  13. package/dist/api/benchmark.js +147 -0
  14. package/dist/api/index.d.ts +88 -0
  15. package/dist/api/index.d.ts.map +1 -0
  16. package/dist/api/index.js +304 -0
  17. package/dist/ast/types.d.ts +141 -0
  18. package/dist/ast/types.d.ts.map +1 -0
  19. package/dist/ast/types.js +1 -0
  20. package/dist/cli/index.d.ts +3 -0
  21. package/dist/cli/index.d.ts.map +1 -0
  22. package/dist/cli/index.js +155 -0
  23. package/dist/cli.js +30 -0
  24. package/dist/generator/generator.d.ts +3 -0
  25. package/dist/generator/generator.d.ts.map +1 -0
  26. package/dist/generator/generator.js +175 -0
  27. package/dist/lexer/lexer.d.ts +3 -0
  28. package/dist/lexer/lexer.d.ts.map +1 -0
  29. package/dist/lexer/lexer.js +23 -0
  30. package/dist/lexer/tokenizer.d.ts +9 -0
  31. package/dist/lexer/tokenizer.d.ts.map +1 -0
  32. package/dist/lexer/tokenizer.js +240 -0
  33. package/dist/parser/grammar.d.ts +29 -0
  34. package/dist/parser/grammar.d.ts.map +1 -0
  35. package/dist/parser/grammar.js +312 -0
  36. package/dist/parser/parser.d.ts +4 -0
  37. package/dist/parser/parser.d.ts.map +1 -0
  38. package/dist/parser/parser.js +47 -0
  39. package/dist/runtime/index.d.ts +24 -0
  40. package/dist/runtime/index.d.ts.map +1 -0
  41. package/dist/runtime/index.js +108 -0
  42. package/dist/transformer/transformer.d.ts +3 -0
  43. package/dist/transformer/transformer.d.ts.map +1 -0
  44. package/dist/transformer/transformer.js +318 -0
  45. package/package.json +54 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/cli/index.ts"],"names":[],"mappings":""}
@@ -0,0 +1,155 @@
1
+ #!/usr/bin/env node
2
+ import { Command } from "commander";
3
+ import chalk from "chalk";
4
+ import * as fs from "fs";
5
+ import * as path from "path";
6
+ import { execSync } from "child_process";
7
+ import { tokenize } from "../lexer/tokenizer.js";
8
+ import { parse } from "../parser/parser.js";
9
+ import { transform } from "../transformer/transformer.js";
10
+ import { generate } from "../generator/generator.js";
11
+ import { fileURLToPath } from "url";
12
+ const __filename = fileURLToPath(import.meta.url);
13
+ const __dirname = path.dirname(__filename);
14
+ const program = new Command();
15
+ program
16
+ .name("jps")
17
+ .description("JPS (Just Python Script) Compiler CLI")
18
+ .version("0.1.0");
19
+ function getRuntimePath() {
20
+ // Assuming structure: dist/cli/index.js
21
+ // Runtime: dist/runtime/index.js
22
+ return path.resolve(__dirname, "../runtime/index.js");
23
+ }
24
+ function printCodeFrame(code, line, col) {
25
+ const lines = code.split("\n");
26
+ const startObj = Math.max(0, line - 3);
27
+ const endObj = Math.min(lines.length, line + 2);
28
+ for (let i = startObj; i < endObj; i++) {
29
+ const lineNum = i + 1;
30
+ const isErrorLine = lineNum === line;
31
+ const gutter = isErrorLine ? "> " : " ";
32
+ const lineStr = lines[i];
33
+ console.error(chalk.gray(`${gutter}${lineNum} | `) + lineStr);
34
+ if (isErrorLine) {
35
+ const pad = " ".repeat(String(lineNum).length + 3 + (col - 1));
36
+ console.error(chalk.red(`${pad}^`));
37
+ }
38
+ }
39
+ }
40
+ function compileFile(inputFile, outputFile, quiet = false) {
41
+ if (!fs.existsSync(inputFile)) {
42
+ console.error(chalk.red(`Error: File '${inputFile}' not found.`));
43
+ process.exit(1);
44
+ }
45
+ const code = fs.readFileSync(inputFile, "utf-8");
46
+ try {
47
+ const tokens = tokenize(code);
48
+ const ast = parse(tokens);
49
+ const transformed = transform(ast);
50
+ const js = generate(transformed);
51
+ const target = outputFile || inputFile.replace(/\.jps$/, ".js");
52
+ fs.writeFileSync(target, js);
53
+ // Copy Runtime
54
+ const runtimeSrc = getRuntimePath();
55
+ const runtimeDest = path.join(path.dirname(target), "jps_runtime.js");
56
+ if (!fs.existsSync(runtimeSrc)) {
57
+ // Try finding it in dist
58
+ const altRuntime = path.resolve(__dirname, "../../dist/runtime/index.js");
59
+ if (fs.existsSync(altRuntime)) {
60
+ fs.copyFileSync(altRuntime, runtimeDest);
61
+ }
62
+ else {
63
+ // If not found, maybe we are running from src via ts-node?
64
+ // But valid install should have dist.
65
+ if (!quiet)
66
+ console.warn(chalk.yellow("Warning: Runtime not found. Build the project first."));
67
+ }
68
+ }
69
+ else {
70
+ fs.copyFileSync(runtimeSrc, runtimeDest);
71
+ }
72
+ if (!quiet) {
73
+ console.log(chalk.green(`Compiled: ${inputFile} -> ${target}`));
74
+ }
75
+ return target;
76
+ }
77
+ catch (e) {
78
+ console.error(chalk.red("Compilation Failed:"));
79
+ if (e.token) { // Nearley error
80
+ console.error(chalk.red(`Syntax Error at line ${e.token.line}, col ${e.token.col}: Unexpected token '${e.token.value}'`));
81
+ printCodeFrame(code, e.token.line, e.token.col);
82
+ }
83
+ else if (e.message) {
84
+ console.error(chalk.red(e.message));
85
+ }
86
+ else {
87
+ console.error(e);
88
+ }
89
+ process.exit(1);
90
+ }
91
+ }
92
+ program.command("build")
93
+ .argument("<file>", "JPS source file")
94
+ .option("-o, --out <file>", "Output file path")
95
+ .description("Compile a JPS file to JavaScript")
96
+ .action((file, options) => {
97
+ compileFile(file, options.out);
98
+ });
99
+ program.command("run")
100
+ .argument("<file>", "JPS source file")
101
+ .description("Compile and run a JPS file")
102
+ .action((file) => {
103
+ const jsFile = compileFile(file, undefined, true);
104
+ try {
105
+ // Run node
106
+ execSync(`node "${jsFile}"`, { stdio: "inherit" });
107
+ }
108
+ catch (e) {
109
+ process.exit(1);
110
+ }
111
+ });
112
+ program.command("watch")
113
+ .argument("<file>", "JPS source file")
114
+ .description("Watch file and recompile on changes")
115
+ .action((file) => {
116
+ console.log(chalk.blue(`Watching ${file}...`));
117
+ compileFile(file); // Initial compile
118
+ fs.watchFile(file, { interval: 1000 }, () => {
119
+ console.log(chalk.blue("Change detected. Recompiling..."));
120
+ compileFile(file);
121
+ });
122
+ });
123
+ program.command("init")
124
+ .argument("[name]", "Project name")
125
+ .description("Initialize a new JPS project")
126
+ .action((name) => {
127
+ const projectName = name || "jps-project";
128
+ if (!fs.existsSync(projectName)) {
129
+ fs.mkdirSync(projectName);
130
+ }
131
+ const sampleCode = `print("Hello JPS!")\n`;
132
+ fs.writeFileSync(path.join(projectName, "main.jps"), sampleCode);
133
+ console.log(chalk.green(`Initialize project '${projectName}' with main.jps`));
134
+ });
135
+ program.command("doctor")
136
+ .description("Check JPS environment")
137
+ .action(() => {
138
+ console.log(chalk.bold("JPS Doctor"));
139
+ console.log("OS: " + process.platform);
140
+ console.log("Node Version: " + process.version);
141
+ const runtimePath = getRuntimePath();
142
+ // Check local or alternate for verify
143
+ let found = fs.existsSync(runtimePath);
144
+ if (!found) {
145
+ const altRuntime = path.resolve(__dirname, "../../dist/runtime/index.js");
146
+ found = fs.existsSync(altRuntime);
147
+ }
148
+ if (found) {
149
+ console.log(chalk.green("Runtime Library: Found"));
150
+ }
151
+ else {
152
+ console.log(chalk.red("Runtime Library: MISSING"));
153
+ }
154
+ });
155
+ program.parse();
package/dist/cli.js ADDED
@@ -0,0 +1,30 @@
1
+ import { tokenize } from "./lexer/tokenizer.js";
2
+ import { parse } from "./parser/parser.js";
3
+ import { transform } from "./transformer/transformer.js";
4
+ import { generate } from "./generator/generator.js";
5
+ import { readFileSync, writeFileSync } from "node:fs";
6
+ import { dirname, extname, join } from "node:path";
7
+ function build(inputPath) {
8
+ const source = readFileSync(inputPath, "utf-8");
9
+ const tokens = tokenize(source);
10
+ console.log("Tokens:", JSON.stringify(tokens, null, 2));
11
+ const ast = parse(tokens);
12
+ const transformed = transform(ast);
13
+ const output = generate(transformed);
14
+ const outputPath = join(dirname(inputPath), `${basenameWithoutExt(inputPath)}.js`);
15
+ writeFileSync(outputPath, output, "utf-8");
16
+ console.log(`Wrote ${outputPath}`);
17
+ }
18
+ function basenameWithoutExt(path) {
19
+ const ext = extname(path);
20
+ if (!ext) {
21
+ return path;
22
+ }
23
+ return path.slice(0, -ext.length).split(/[\\/]/).pop();
24
+ }
25
+ const args = process.argv.slice(2);
26
+ if (args.length !== 2 || args[0] !== "build") {
27
+ console.log("Usage: jps build <file.jps>");
28
+ process.exit(1);
29
+ }
30
+ build(args[1]);
@@ -0,0 +1,3 @@
1
+ import { Program } from "../ast/types.js";
2
+ export declare function generate(program: Program): string;
3
+ //# sourceMappingURL=generator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generator.d.ts","sourceRoot":"","sources":["../../src/generator/generator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAuE,MAAM,iBAAiB,CAAC;AA4E/G,wBAAgB,QAAQ,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM,CAGjD"}
@@ -0,0 +1,175 @@
1
+ function generateStatement(statement, indent = 0) {
2
+ const pad = " ".repeat(indent);
3
+ switch (statement.type) {
4
+ case "FunctionDeclaration": {
5
+ const params = statement.params.join(", ");
6
+ const body = statement.body.map((stmt) => generateStatement(stmt, indent + 1)).join("\n");
7
+ return `${pad}function ${statement.name}(${params}) {\n${body}\n${pad}}`;
8
+ }
9
+ case "ForStatement": {
10
+ if (statement.collection) {
11
+ const collection = generateExpression(statement.collection);
12
+ const body = statement.body.map((stmt) => generateStatement(stmt, indent + 1)).join("\n");
13
+ return `${pad}for (const ${statement.iterator} of ${collection}) {\n${body}\n${pad}}`;
14
+ }
15
+ const start = generateExpression(statement.start);
16
+ const end = generateExpression(statement.end);
17
+ const body = statement.body.map((stmt) => generateStatement(stmt, indent + 1)).join("\n");
18
+ return `${pad}for (let ${statement.iterator} = ${start}; ${statement.iterator} < ${end}; ${statement.iterator}++) {\n${body}\n${pad}}`;
19
+ }
20
+ case "WhileStatement": {
21
+ const test = generateExpression(statement.test);
22
+ const body = statement.body.map((stmt) => generateStatement(stmt, indent + 1)).join("\n");
23
+ return `${pad}while (${test}) {\n${body}\n${pad}}`;
24
+ }
25
+ case "IfStatement": {
26
+ const test = generateExpression(statement.test);
27
+ const consequent = statement.consequent.map((stmt) => generateStatement(stmt, indent + 1)).join("\n");
28
+ let code = `${pad}if (${test}) {\n${consequent}\n${pad}}`;
29
+ if (statement.alternate) {
30
+ const alternate = statement.alternate.map((stmt) => generateStatement(stmt, indent + 1)).join("\n");
31
+ code += ` else {\n${alternate}\n${pad}}`;
32
+ }
33
+ return code;
34
+ }
35
+ case "ExpressionStatement": {
36
+ const expression = statement.expression;
37
+ if (expression.type === "BinaryExpression" && expression.operator === "=" && expression.left.type === "Identifier") {
38
+ return `${pad}var ${generateExpression(expression.left)} = ${generateExpression(expression.right)};`;
39
+ }
40
+ const expr = generateExpression(statement.expression);
41
+ return `${pad}${expr};`;
42
+ }
43
+ case "ReturnStatement": {
44
+ if (statement.argument) {
45
+ return `${pad}return ${generateExpression(statement.argument)};`;
46
+ }
47
+ return `${pad}return;`;
48
+ }
49
+ case "ClassDeclaration": {
50
+ const body = statement.body.map((stmt) => generateStatement(stmt, indent + 1)).join("\n");
51
+ // Hack to remove 'function' keyword for methods in class body
52
+ const classBody = body.replace(/function\s+/g, "");
53
+ // Note: This is a weak hack. But simpler than refactoring generator.
54
+ const extendsClause = statement.superClass ? ` extends ${statement.superClass}` : "";
55
+ return `${pad}class ${statement.name}${extendsClause} {\n${classBody}\n${pad}}`;
56
+ }
57
+ case "TryStatement": {
58
+ const block = statement.block.map((stmt) => generateStatement(stmt, indent + 1)).join("\n");
59
+ const handler = statement.handler.map((stmt) => generateStatement(stmt, indent + 1)).join("\n");
60
+ return `${pad}try {\n${block}\n${pad}} catch (e) {\n${handler}\n${pad}}`;
61
+ }
62
+ case "ImportDeclaration": {
63
+ const { source, specifiers } = statement;
64
+ const specs = specifiers.map(s => s.local === s.imported ? s.local : `${s.imported} as ${s.local}`).join(", ");
65
+ return `${pad}import { ${specs} } from "${source}";`;
66
+ }
67
+ case "ExportDeclaration": {
68
+ return `${pad}export ${generateStatement(statement.declaration, 0)}`;
69
+ }
70
+ default:
71
+ return "";
72
+ }
73
+ }
74
+ export function generate(program) {
75
+ const RUNTIME_IMPORT = `import { print, len, range, sum, min, max, sorted, str, int, float, map, filter, list, __in__ } from "jps/runtime";\n\n`;
76
+ return RUNTIME_IMPORT + program.body.map((s) => generateStatement(s)).join("\n\n") + "\n";
77
+ }
78
+ function generateExpression(expression) {
79
+ if (!expression)
80
+ return "";
81
+ switch (expression.type) {
82
+ case "BooleanLiteral":
83
+ return expression.value ? "true" : "false";
84
+ case "ArrayLiteral":
85
+ const elements = expression.elements.map(generateExpression).join(", ");
86
+ return `[${elements}]`;
87
+ case "ObjectLiteral":
88
+ const properties = expression.properties.map((p) => `"${p.key}": ${generateExpression(p.value)}`).join(", ");
89
+ return `{${properties}}`;
90
+ case "Identifier":
91
+ return expression.name;
92
+ case "NumberLiteral":
93
+ return String(expression.value);
94
+ case "StringLiteral":
95
+ return JSON.stringify(expression.value);
96
+ case "FStringLiteral": {
97
+ const parts = expression.value.split(/\{([^}]+)\}/);
98
+ const segments = parts.map((part, index) => {
99
+ if (index % 2 === 0)
100
+ return JSON.stringify(part); // String literal
101
+ // Transform common Python methods that might be in f-string expressions
102
+ let expr = part;
103
+ expr = expr.replace(/\.lower\(\)/g, ".toLowerCase()");
104
+ expr = expr.replace(/\.upper\(\)/g, ".toUpperCase()");
105
+ expr = expr.replace(/\.strip\(\)/g, ".trim()");
106
+ return `(${expr})`; // Expression interpolation
107
+ });
108
+ return segments.join(" + ");
109
+ }
110
+ case "BinaryExpression": {
111
+ const left = generateExpression(expression.left);
112
+ const right = generateExpression(expression.right);
113
+ let operator = expression.operator;
114
+ // Map Python operators to JS
115
+ if (operator === "and")
116
+ operator = "&&";
117
+ if (operator === "or")
118
+ operator = "||";
119
+ if (operator === "not")
120
+ operator = "!";
121
+ // not in / in handling? 'in' keyword?
122
+ // For now assume simple mapping or pre-handled by transformer
123
+ // JS 'in' operator exists but 'not in' needs !(... in ...)
124
+ // But Array.includes is better for arrays. 'in' is for objects.
125
+ // JPS 'in' is likely Array/Iterable includes.
126
+ // We should probably emit helper for 'in' if possible.
127
+ // But standard 'in' operator works for keys.
128
+ // Let's stick to literal operator for now unless we see issues.
129
+ // Actually, 'in' in Python is values, 'in' in JS is keys.
130
+ // We might need a runtime helper `__in__(val, obj)`.
131
+ return `${left} ${operator} ${right}`;
132
+ }
133
+ case "UnaryExpression": {
134
+ const argument = generateExpression(expression.argument);
135
+ return `${expression.operator}(${argument})`; // Add parens for safety
136
+ }
137
+ case "ConditionalExpression": {
138
+ const test = generateExpression(expression.test);
139
+ const consequent = generateExpression(expression.consequent);
140
+ const alternate = generateExpression(expression.alternate);
141
+ return `(${test} ? ${consequent} : ${alternate})`;
142
+ }
143
+ case "CallExpression": {
144
+ const callee = generateExpression(expression.callee);
145
+ const args = expression.args.map(generateExpression).join(", ");
146
+ return `${callee}(${args})`;
147
+ }
148
+ case "NewExpression": {
149
+ const callee = generateExpression(expression.callee);
150
+ const args = expression.args.map(generateExpression).join(", ");
151
+ return `new ${callee}(${args})`;
152
+ }
153
+ case "ListComprehension": {
154
+ const expr = generateExpression(expression.expression);
155
+ const iterator = expression.iterator;
156
+ const collection = generateExpression((expression.collection || expression.start)); // fallback
157
+ const test = expression.test ? `if (${generateExpression(expression.test)}) ` : "";
158
+ // If collection is not defined (old range style with start/end), handle it?
159
+ // Grammar update removed start/end. So collection is mandatory.
160
+ return `(() => { const res = []; for (const ${iterator} of ${collection}) { ${test}res.push(${expr}); } return res; })()`;
161
+ }
162
+ case "MemberExpression": {
163
+ const obj = generateExpression(expression.object);
164
+ const computed = expression.computed;
165
+ const prop = computed ? generateExpression(expression.property) : expression.property.name;
166
+ return computed ? `${obj}[${prop}]` : `${obj}.${prop}`;
167
+ }
168
+ case "LambdaExpression": {
169
+ const params = expression.params.join(", ");
170
+ return `(${params}) => ${generateExpression(expression.body)}`;
171
+ }
172
+ default:
173
+ return "";
174
+ }
175
+ }
@@ -0,0 +1,3 @@
1
+ import moo from 'moo';
2
+ export declare const lexer: moo.Lexer;
3
+ //# sourceMappingURL=lexer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"lexer.d.ts","sourceRoot":"","sources":["../../src/lexer/lexer.ts"],"names":[],"mappings":"AAAA,OAAO,GAAG,MAAM,KAAK,CAAC;AAEtB,eAAO,MAAM,KAAK,WAqBhB,CAAC"}
@@ -0,0 +1,23 @@
1
+ import moo from 'moo';
2
+ export const lexer = moo.compile({
3
+ ws: { match: /[ \t]+/, type: (text) => 'ws' },
4
+ nl: { match: /\r?\n/, lineBreaks: true },
5
+ lparen: '(',
6
+ rparen: ')',
7
+ string: /" (?:\\["\\]|[^\n"\\])* "/,
8
+ number: /0|[1-9][0-9]*/,
9
+ keyword: ['def', 'if', 'else', 'for', 'in', 'return', 'let', 'const', 'var', 'while'], // Added common keywords
10
+ identifier: {
11
+ match: /[a-zA-Z_][a-zA-Z0-9_]*/, type: moo.keywords({
12
+ def: 'def',
13
+ if: 'if',
14
+ else: 'else',
15
+ for: 'for',
16
+ in: 'in',
17
+ return: 'return'
18
+ })
19
+ },
20
+ operator: ['+', '-', '*', '/', '<', '>', '==', '='],
21
+ comma: ',',
22
+ colon: ':',
23
+ });
@@ -0,0 +1,9 @@
1
+ export type TokenType = "INDENT" | "DEDENT" | "NEWLINE" | "DEF" | "IF" | "ELSE" | "FOR" | "IN" | "RANGE" | "RETURN" | "DEF" | "CLASS" | "TRY" | "EXCEPT" | "LAMBDA" | "IMPORT" | "FROM" | "NOT" | "AND" | "OR" | "EXPORT" | "AS" | "IDENT" | "NUMBER" | "STRING" | "FSTRING" | "LPAREN" | "RPAREN" | "COLON" | "COMMA" | "OP" | "LBRACKET" | "RBRACKET" | "TRUE" | "FALSE" | "LBRACE" | "RBRACE" | "WHILE" | "DOT";
2
+ export type Token = {
3
+ type: TokenType;
4
+ value?: string;
5
+ line: number;
6
+ column: number;
7
+ };
8
+ export declare function tokenize(source: string): Token[];
9
+ //# sourceMappingURL=tokenizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/lexer/tokenizer.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,SAAS,GACjB,QAAQ,GACR,QAAQ,GACR,SAAS,GACT,KAAK,GACL,IAAI,GACJ,MAAM,GACN,KAAK,GACL,IAAI,GACJ,OAAO,GACP,QAAQ,GACR,KAAK,GACL,OAAO,GACP,KAAK,GACL,QAAQ,GACR,QAAQ,GACR,QAAQ,GACR,MAAM,GACN,KAAK,GACL,KAAK,GACL,IAAI,GACJ,QAAQ,GACR,IAAI,GACJ,OAAO,GACP,QAAQ,GACR,QAAQ,GACR,SAAS,GACT,QAAQ,GACR,QAAQ,GACR,OAAO,GACP,OAAO,GACP,IAAI,GACJ,UAAU,GACV,UAAU,GACV,MAAM,GACN,OAAO,GACP,QAAQ,GACR,QAAQ,GACR,OAAO,GACP,KAAK,CAAC;AAEV,MAAM,MAAM,KAAK,GAAG;IAClB,IAAI,EAAE,SAAS,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC;AAqCF,wBAAgB,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,KAAK,EAAE,CAyOhD"}
@@ -0,0 +1,240 @@
1
+ // Keywords map directly to token types.
2
+ const keywords = {
3
+ def: "DEF",
4
+ if: "IF",
5
+ else: "ELSE",
6
+ for: "FOR",
7
+ in: "IN",
8
+ range: "RANGE",
9
+ return: "RETURN",
10
+ or: "OR",
11
+ and: "AND",
12
+ not: "NOT",
13
+ true: "TRUE",
14
+ false: "FALSE",
15
+ True: "TRUE",
16
+ False: "FALSE",
17
+ while: "WHILE",
18
+ class: "CLASS",
19
+ try: "TRY",
20
+ except: "EXCEPT",
21
+ lambda: "LAMBDA",
22
+ import: "IMPORT",
23
+ from: "FROM",
24
+ export: "EXPORT",
25
+ as: "AS",
26
+ };
27
+ // Supported operators for simple expressions.
28
+ const operators = ["+", "-", "*", "/", "<", ">", "==", "=", "%", "<=", ">=", "!=", "+=", "-=", "*=", "/="];
29
+ const isAlpha = (ch) => /[A-Za-z_]/.test(ch);
30
+ const isAlphaNumeric = (ch) => /[A-Za-z0-9_]/.test(ch);
31
+ const isDigit = (ch) => /[0-9]/.test(ch);
32
+ // Convert source code into a flat token stream with INDENT/DEDENT markers.
33
+ export function tokenize(source) {
34
+ const tokens = [];
35
+ const lines = source.replace(/\r\n/g, "\n").split("\n");
36
+ const indentStack = [0];
37
+ let inMultilineString = false;
38
+ let multilineStringValue = "";
39
+ let multilineStartLine = 0;
40
+ let multilineStartCol = 0;
41
+ lines.forEach((lineText, lineIndex) => {
42
+ const lineNumber = lineIndex + 1;
43
+ let i = 0;
44
+ // --- State: Inside Multiline String ---
45
+ if (inMultilineString) {
46
+ const closingIndex = lineText.indexOf('"""');
47
+ if (closingIndex !== -1) {
48
+ // Found closing delimiter
49
+ multilineStringValue += lineText.slice(0, closingIndex);
50
+ tokens.push({ type: "STRING", value: multilineStringValue, line: multilineStartLine, column: multilineStartCol });
51
+ inMultilineString = false;
52
+ multilineStringValue = "";
53
+ i = closingIndex + 3; // Resume processing after """
54
+ }
55
+ else {
56
+ // No closing delimiter, consume entire line
57
+ multilineStringValue += lineText + "\n";
58
+ return; // Proceed to next line (don't add NEWLINE token for inside of string)
59
+ }
60
+ }
61
+ else {
62
+ // --- State: Normal (Start of Line) ---
63
+ // 1. Skip empty lines (only if not inside multiline string)
64
+ if (lineText.trim().length === 0) {
65
+ return;
66
+ }
67
+ // 2. Skip full-line comments
68
+ if (lineText.trim().startsWith("#")) {
69
+ return;
70
+ }
71
+ // 3. Handle Indentation
72
+ const indentMatch = lineText.match(/^\s*/);
73
+ const indent = indentMatch ? indentMatch[0].length : 0;
74
+ i = indent; // Start processing after indentation
75
+ if (indent > indentStack[indentStack.length - 1]) {
76
+ indentStack.push(indent);
77
+ tokens.push({ type: "INDENT", line: lineNumber, column: 1 });
78
+ }
79
+ else {
80
+ while (indent < indentStack[indentStack.length - 1]) {
81
+ indentStack.pop();
82
+ tokens.push({ type: "DEDENT", line: lineNumber, column: 1 });
83
+ }
84
+ }
85
+ }
86
+ // --- Tokenization Loop ---
87
+ while (i < lineText.length) {
88
+ const char = lineText[i];
89
+ // Skip whitespace
90
+ if (char === " " || char === "\t") {
91
+ i += 1;
92
+ continue;
93
+ }
94
+ // Handle F-String
95
+ if (char === 'f' && lineText[i + 1] === '"') {
96
+ let start = i + 2;
97
+ i += 2;
98
+ while (i < lineText.length && lineText[i] !== '"') {
99
+ i += 1;
100
+ }
101
+ const value = lineText.slice(start, i);
102
+ i += 1;
103
+ tokens.push({ type: "FSTRING", value, line: lineNumber, column: start - 1 });
104
+ continue;
105
+ }
106
+ // Handle Multiline String Start
107
+ if (lineText.slice(i, i + 3) === '"""') {
108
+ inMultilineString = true;
109
+ multilineStartLine = lineNumber;
110
+ multilineStartCol = i + 1;
111
+ multilineStringValue = "";
112
+ i += 3;
113
+ // Try to find closing on same line
114
+ const remainder = lineText.slice(i);
115
+ const closingIndex = remainder.indexOf('"""');
116
+ if (closingIndex !== -1) {
117
+ multilineStringValue = remainder.slice(0, closingIndex);
118
+ tokens.push({ type: "STRING", value: multilineStringValue, line: multilineStartLine, column: multilineStartCol });
119
+ inMultilineString = false;
120
+ multilineStringValue = "";
121
+ i += closingIndex + 3;
122
+ }
123
+ else {
124
+ multilineStringValue += remainder + "\n";
125
+ return; // Stop processing this line, iterate to next
126
+ }
127
+ continue;
128
+ }
129
+ // Handle Inline Comments
130
+ if (char === '#') {
131
+ break; // Stop handling this line (ignore rest)
132
+ }
133
+ if (isAlpha(char)) {
134
+ let start = i;
135
+ i += 1;
136
+ while (i < lineText.length && isAlphaNumeric(lineText[i])) {
137
+ i += 1;
138
+ }
139
+ const text = lineText.slice(start, i);
140
+ const keyword = keywords[text];
141
+ if (keyword) {
142
+ tokens.push({ type: keyword, line: lineNumber, column: start + 1 });
143
+ }
144
+ else {
145
+ tokens.push({ type: "IDENT", value: text, line: lineNumber, column: start + 1 });
146
+ }
147
+ continue;
148
+ }
149
+ if (isDigit(char)) {
150
+ let start = i;
151
+ i += 1;
152
+ while (i < lineText.length && isDigit(lineText[i])) {
153
+ i += 1;
154
+ }
155
+ if (i < lineText.length && lineText[i] === ".") {
156
+ i += 1;
157
+ while (i < lineText.length && isDigit(lineText[i])) {
158
+ i += 1;
159
+ }
160
+ }
161
+ tokens.push({ type: "NUMBER", value: lineText.slice(start, i), line: lineNumber, column: start + 1 });
162
+ continue;
163
+ }
164
+ if (char === '"') {
165
+ let start = i + 1;
166
+ i += 1;
167
+ while (i < lineText.length && lineText[i] !== '"') {
168
+ i += 1;
169
+ }
170
+ const value = lineText.slice(start, i);
171
+ i += 1;
172
+ tokens.push({ type: "STRING", value, line: lineNumber, column: start });
173
+ continue;
174
+ }
175
+ if (char === "(") {
176
+ tokens.push({ type: "LPAREN", line: lineNumber, column: i + 1 });
177
+ i += 1;
178
+ continue;
179
+ }
180
+ if (char === ")") {
181
+ tokens.push({ type: "RPAREN", line: lineNumber, column: i + 1 });
182
+ i += 1;
183
+ continue;
184
+ }
185
+ if (char === ":") {
186
+ tokens.push({ type: "COLON", line: lineNumber, column: i + 1 });
187
+ i += 1;
188
+ continue;
189
+ }
190
+ if (char === ",") {
191
+ tokens.push({ type: "COMMA", line: lineNumber, column: i + 1 });
192
+ i += 1;
193
+ continue;
194
+ }
195
+ if (char === ".") {
196
+ tokens.push({ type: "DOT", line: lineNumber, column: i + 1 });
197
+ i += 1;
198
+ continue;
199
+ }
200
+ if (char === "[") {
201
+ tokens.push({ type: "LBRACKET", line: lineNumber, column: i + 1 });
202
+ i += 1;
203
+ continue;
204
+ }
205
+ if (char === "]") {
206
+ tokens.push({ type: "RBRACKET", line: lineNumber, column: i + 1 });
207
+ i += 1;
208
+ continue;
209
+ }
210
+ if (char === "{") {
211
+ tokens.push({ type: "LBRACE", line: lineNumber, column: i + 1 });
212
+ i += 1;
213
+ continue;
214
+ }
215
+ if (char === "}") {
216
+ tokens.push({ type: "RBRACE", line: lineNumber, column: i + 1 });
217
+ i += 1;
218
+ continue;
219
+ }
220
+ const twoChar = lineText.slice(i, i + 2);
221
+ if (operators.includes(twoChar)) {
222
+ tokens.push({ type: "OP", value: twoChar, line: lineNumber, column: i + 1 });
223
+ i += 2;
224
+ continue;
225
+ }
226
+ if (operators.includes(char)) {
227
+ tokens.push({ type: "OP", value: char, line: lineNumber, column: i + 1 });
228
+ i += 1;
229
+ continue;
230
+ }
231
+ throw new Error(`Unexpected character '${char}' at ${lineNumber}:${i + 1}`);
232
+ }
233
+ tokens.push({ type: "NEWLINE", line: lineNumber, column: lineText.length + 1 });
234
+ });
235
+ while (indentStack.length > 1) {
236
+ indentStack.pop();
237
+ tokens.push({ type: "DEDENT", line: lines.length, column: 1 });
238
+ }
239
+ return tokens;
240
+ }