@forwardimpact/libsyntheticgen 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/dsl/index.js +36 -0
- package/dsl/parser.js +728 -0
- package/dsl/tokenizer.js +282 -0
- package/engine/activity.js +956 -0
- package/engine/entities.js +144 -0
- package/engine/names.js +290 -0
- package/engine/prose-keys.js +182 -0
- package/engine/rng.js +43 -0
- package/engine/tier0.js +63 -0
- package/index.js +7 -0
- package/package.json +35 -0
- package/test/activity.test.js +322 -0
- package/test/faker.test.js +98 -0
- package/test/parser-dataset.test.js +142 -0
- package/test/parser.test.js +596 -0
- package/test/rng.test.js +236 -0
- package/test/sdv.test.js +67 -0
- package/test/synthea.test.js +95 -0
- package/test/tokenizer.test.js +266 -0
- package/tools/faker.js +83 -0
- package/tools/sdv.js +93 -0
- package/tools/sdv_generate.py +29 -0
- package/tools/synthea.js +126 -0
package/test/rng.test.js
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import { describe, test } from "node:test";
|
|
2
|
+
import assert from "node:assert";
|
|
3
|
+
import { createSeededRNG } from "../engine/rng.js";
|
|
4
|
+
|
|
5
|
+
describe("createSeededRNG", () => {
|
|
6
|
+
describe("determinism", () => {
|
|
7
|
+
test("same seed produces same sequence", () => {
|
|
8
|
+
const rng1 = createSeededRNG(42);
|
|
9
|
+
const rng2 = createSeededRNG(42);
|
|
10
|
+
const seq1 = Array.from({ length: 10 }, () => rng1.random());
|
|
11
|
+
const seq2 = Array.from({ length: 10 }, () => rng2.random());
|
|
12
|
+
assert.deepStrictEqual(seq1, seq2);
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
test("different seeds produce different sequences", () => {
|
|
16
|
+
const rng1 = createSeededRNG(42);
|
|
17
|
+
const rng2 = createSeededRNG(99);
|
|
18
|
+
const seq1 = Array.from({ length: 5 }, () => rng1.random());
|
|
19
|
+
const seq2 = Array.from({ length: 5 }, () => rng2.random());
|
|
20
|
+
assert.notDeepStrictEqual(seq1, seq2);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
test("string seed is deterministic", () => {
|
|
24
|
+
const rng1 = createSeededRNG("hello");
|
|
25
|
+
const rng2 = createSeededRNG("hello");
|
|
26
|
+
assert.strictEqual(rng1.random(), rng2.random());
|
|
27
|
+
});
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
describe("random()", () => {
|
|
31
|
+
test("returns values in [0, 1)", () => {
|
|
32
|
+
const rng = createSeededRNG(42);
|
|
33
|
+
for (let i = 0; i < 100; i++) {
|
|
34
|
+
const val = rng.random();
|
|
35
|
+
assert.ok(val >= 0, `Expected >= 0, got ${val}`);
|
|
36
|
+
assert.ok(val < 1, `Expected < 1, got ${val}`);
|
|
37
|
+
}
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
test("returns different values on successive calls", () => {
|
|
41
|
+
const rng = createSeededRNG(42);
|
|
42
|
+
const a = rng.random();
|
|
43
|
+
const b = rng.random();
|
|
44
|
+
assert.notStrictEqual(a, b);
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
describe("randomInt(min, max)", () => {
|
|
49
|
+
test("returns integers within the specified range", () => {
|
|
50
|
+
const rng = createSeededRNG(42);
|
|
51
|
+
for (let i = 0; i < 100; i++) {
|
|
52
|
+
const val = rng.randomInt(1, 10);
|
|
53
|
+
assert.ok(Number.isInteger(val), `Expected integer, got ${val}`);
|
|
54
|
+
assert.ok(val >= 1, `Expected >= 1, got ${val}`);
|
|
55
|
+
assert.ok(val <= 10, `Expected <= 10, got ${val}`);
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
test("returns min when min equals max", () => {
|
|
60
|
+
const rng = createSeededRNG(42);
|
|
61
|
+
const val = rng.randomInt(5, 5);
|
|
62
|
+
assert.strictEqual(val, 5);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
test("handles range of 0 to 0", () => {
|
|
66
|
+
const rng = createSeededRNG(42);
|
|
67
|
+
assert.strictEqual(rng.randomInt(0, 0), 0);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test("is deterministic for same seed", () => {
|
|
71
|
+
const rng1 = createSeededRNG(42);
|
|
72
|
+
const rng2 = createSeededRNG(42);
|
|
73
|
+
for (let i = 0; i < 20; i++) {
|
|
74
|
+
assert.strictEqual(rng1.randomInt(0, 100), rng2.randomInt(0, 100));
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
describe("pick(arr)", () => {
|
|
80
|
+
test("returns an element from the array", () => {
|
|
81
|
+
const rng = createSeededRNG(42);
|
|
82
|
+
const arr = ["a", "b", "c", "d", "e"];
|
|
83
|
+
for (let i = 0; i < 50; i++) {
|
|
84
|
+
const picked = rng.pick(arr);
|
|
85
|
+
assert.ok(arr.includes(picked), `Picked '${picked}' not in array`);
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
test("returns the only element for single-item arrays", () => {
|
|
90
|
+
const rng = createSeededRNG(42);
|
|
91
|
+
assert.strictEqual(rng.pick(["only"]), "only");
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
test("is deterministic for same seed", () => {
|
|
95
|
+
const rng1 = createSeededRNG(42);
|
|
96
|
+
const rng2 = createSeededRNG(42);
|
|
97
|
+
const arr = [1, 2, 3, 4, 5];
|
|
98
|
+
for (let i = 0; i < 10; i++) {
|
|
99
|
+
assert.strictEqual(rng1.pick(arr), rng2.pick(arr));
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
describe("shuffle(arr)", () => {
|
|
105
|
+
test("returns a new array with same elements", () => {
|
|
106
|
+
const rng = createSeededRNG(42);
|
|
107
|
+
const arr = [1, 2, 3, 4, 5];
|
|
108
|
+
const shuffled = rng.shuffle(arr);
|
|
109
|
+
assert.strictEqual(shuffled.length, arr.length);
|
|
110
|
+
assert.deepStrictEqual([...shuffled].sort(), [...arr].sort());
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
test("does not modify the original array", () => {
|
|
114
|
+
const rng = createSeededRNG(42);
|
|
115
|
+
const arr = [1, 2, 3, 4, 5];
|
|
116
|
+
const original = [...arr];
|
|
117
|
+
rng.shuffle(arr);
|
|
118
|
+
assert.deepStrictEqual(arr, original);
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
test("is deterministic for same seed", () => {
|
|
122
|
+
const rng1 = createSeededRNG(42);
|
|
123
|
+
const rng2 = createSeededRNG(42);
|
|
124
|
+
const arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
|
|
125
|
+
assert.deepStrictEqual(rng1.shuffle(arr), rng2.shuffle(arr));
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
test("handles single-element arrays", () => {
|
|
129
|
+
const rng = createSeededRNG(42);
|
|
130
|
+
assert.deepStrictEqual(rng.shuffle([1]), [1]);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
test("handles empty arrays", () => {
|
|
134
|
+
const rng = createSeededRNG(42);
|
|
135
|
+
assert.deepStrictEqual(rng.shuffle([]), []);
|
|
136
|
+
});
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
describe("weightedPick(weights)", () => {
|
|
140
|
+
test("returns a valid index", () => {
|
|
141
|
+
const rng = createSeededRNG(42);
|
|
142
|
+
const weights = [10, 20, 30];
|
|
143
|
+
for (let i = 0; i < 50; i++) {
|
|
144
|
+
const idx = rng.weightedPick(weights);
|
|
145
|
+
assert.ok(
|
|
146
|
+
idx >= 0 && idx < weights.length,
|
|
147
|
+
`Index ${idx} out of range`,
|
|
148
|
+
);
|
|
149
|
+
}
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
test("respects heavy weighting", () => {
|
|
153
|
+
const rng = createSeededRNG(42);
|
|
154
|
+
// Weight index 2 very heavily
|
|
155
|
+
const weights = [1, 1, 1000];
|
|
156
|
+
const counts = [0, 0, 0];
|
|
157
|
+
for (let i = 0; i < 200; i++) {
|
|
158
|
+
counts[rng.weightedPick(weights)]++;
|
|
159
|
+
}
|
|
160
|
+
// Index 2 should dominate
|
|
161
|
+
assert.ok(
|
|
162
|
+
counts[2] > counts[0] + counts[1],
|
|
163
|
+
`Expected index 2 to dominate: ${JSON.stringify(counts)}`,
|
|
164
|
+
);
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
test("returns only valid index for single-weight array", () => {
|
|
168
|
+
const rng = createSeededRNG(42);
|
|
169
|
+
assert.strictEqual(rng.weightedPick([100]), 0);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
test("is deterministic for same seed", () => {
|
|
173
|
+
const rng1 = createSeededRNG(42);
|
|
174
|
+
const rng2 = createSeededRNG(42);
|
|
175
|
+
const weights = [10, 20, 30, 40];
|
|
176
|
+
for (let i = 0; i < 20; i++) {
|
|
177
|
+
assert.strictEqual(
|
|
178
|
+
rng1.weightedPick(weights),
|
|
179
|
+
rng2.weightedPick(weights),
|
|
180
|
+
);
|
|
181
|
+
}
|
|
182
|
+
});
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
describe("gaussian(mean, std)", () => {
|
|
186
|
+
test("produces values centered around the mean", () => {
|
|
187
|
+
const rng = createSeededRNG(42);
|
|
188
|
+
const mean = 50;
|
|
189
|
+
const std = 10;
|
|
190
|
+
const values = Array.from({ length: 1000 }, () =>
|
|
191
|
+
rng.gaussian(mean, std),
|
|
192
|
+
);
|
|
193
|
+
const avg = values.reduce((s, v) => s + v, 0) / values.length;
|
|
194
|
+
// Average should be reasonably close to mean
|
|
195
|
+
assert.ok(
|
|
196
|
+
Math.abs(avg - mean) < 3,
|
|
197
|
+
`Average ${avg} too far from mean ${mean}`,
|
|
198
|
+
);
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
test("produces reasonable spread", () => {
|
|
202
|
+
const rng = createSeededRNG(42);
|
|
203
|
+
const mean = 0;
|
|
204
|
+
const std = 1;
|
|
205
|
+
const values = Array.from({ length: 1000 }, () =>
|
|
206
|
+
rng.gaussian(mean, std),
|
|
207
|
+
);
|
|
208
|
+
// Most values should be within 3 standard deviations
|
|
209
|
+
const withinThreeSigma = values.filter(
|
|
210
|
+
(v) => v >= mean - 3 * std && v <= mean + 3 * std,
|
|
211
|
+
);
|
|
212
|
+
assert.ok(
|
|
213
|
+
withinThreeSigma.length > 990,
|
|
214
|
+
`Expected >99% within 3 sigma, got ${withinThreeSigma.length}/1000`,
|
|
215
|
+
);
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
test("is deterministic for same seed", () => {
|
|
219
|
+
const rng1 = createSeededRNG(42);
|
|
220
|
+
const rng2 = createSeededRNG(42);
|
|
221
|
+
for (let i = 0; i < 10; i++) {
|
|
222
|
+
assert.strictEqual(rng1.gaussian(0, 1), rng2.gaussian(0, 1));
|
|
223
|
+
}
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
test("respects custom mean and std", () => {
|
|
227
|
+
const rng = createSeededRNG(42);
|
|
228
|
+
const values = Array.from({ length: 500 }, () => rng.gaussian(100, 5));
|
|
229
|
+
const min = Math.min(...values);
|
|
230
|
+
const max = Math.max(...values);
|
|
231
|
+
// Should be roughly within 100 +/- 25 (5 sigma)
|
|
232
|
+
assert.ok(min > 70, `Min ${min} too low for mean=100, std=5`);
|
|
233
|
+
assert.ok(max < 130, `Max ${max} too high for mean=100, std=5`);
|
|
234
|
+
});
|
|
235
|
+
});
|
|
236
|
+
});
|
package/test/sdv.test.js
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { describe, test } from "node:test";
|
|
2
|
+
import assert from "node:assert";
|
|
3
|
+
import { SdvTool } from "../tools/sdv.js";
|
|
4
|
+
|
|
5
|
+
const logger = { info() {}, error() {} };
|
|
6
|
+
|
|
7
|
+
describe("SdvTool", () => {
|
|
8
|
+
test("requires all dependencies", () => {
|
|
9
|
+
assert.throws(() => new SdvTool({}), /requires logger/);
|
|
10
|
+
assert.throws(() => new SdvTool({ logger }), /requires execFileFn/);
|
|
11
|
+
assert.throws(
|
|
12
|
+
() => new SdvTool({ logger, execFileFn: async () => {} }),
|
|
13
|
+
/requires fsFns/,
|
|
14
|
+
);
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
test("checkAvailability throws when python/sdv missing", async () => {
|
|
18
|
+
const tool = new SdvTool({
|
|
19
|
+
logger,
|
|
20
|
+
execFileFn: async () => {
|
|
21
|
+
throw new Error("not found");
|
|
22
|
+
},
|
|
23
|
+
fsFns: { writeFile: async () => {}, rm: async () => {} },
|
|
24
|
+
});
|
|
25
|
+
await assert.rejects(
|
|
26
|
+
() => tool.checkAvailability(),
|
|
27
|
+
/SDV requires Python 3/,
|
|
28
|
+
);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
test("parses subprocess output into datasets", async () => {
|
|
32
|
+
let _capturedConfig;
|
|
33
|
+
const stdout = [
|
|
34
|
+
JSON.stringify({ name: "orders", records: [{ id: 1, amount: 99.5 }] }),
|
|
35
|
+
JSON.stringify({
|
|
36
|
+
name: "items",
|
|
37
|
+
records: [{ id: 2, product: "widget" }],
|
|
38
|
+
}),
|
|
39
|
+
].join("\n");
|
|
40
|
+
|
|
41
|
+
const tool = new SdvTool({
|
|
42
|
+
logger,
|
|
43
|
+
execFileFn: async (cmd, args) => {
|
|
44
|
+
_capturedConfig = args;
|
|
45
|
+
return { stdout };
|
|
46
|
+
},
|
|
47
|
+
fsFns: {
|
|
48
|
+
writeFile: async () => {},
|
|
49
|
+
rm: async () => {},
|
|
50
|
+
},
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const datasets = await tool.generate({
|
|
54
|
+
name: "sales",
|
|
55
|
+
metadata: "meta.json",
|
|
56
|
+
data: { orders: "orders.csv", items: "items.csv" },
|
|
57
|
+
rows: 100,
|
|
58
|
+
seed: 42,
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
assert.strictEqual(datasets.length, 2);
|
|
62
|
+
assert.strictEqual(datasets[0].name, "sales_orders");
|
|
63
|
+
assert.strictEqual(datasets[0].records[0].amount, 99.5);
|
|
64
|
+
assert.strictEqual(datasets[0].metadata.tool, "sdv");
|
|
65
|
+
assert.strictEqual(datasets[1].name, "sales_items");
|
|
66
|
+
});
|
|
67
|
+
});
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { describe, test } from "node:test";
|
|
2
|
+
import assert from "node:assert";
|
|
3
|
+
import { SyntheaTool } from "../tools/synthea.js";
|
|
4
|
+
|
|
5
|
+
const logger = { info() {}, error() {} };
|
|
6
|
+
|
|
7
|
+
describe("SyntheaTool", () => {
|
|
8
|
+
test("requires all dependencies", () => {
|
|
9
|
+
assert.throws(() => new SyntheaTool({}), /requires logger/);
|
|
10
|
+
assert.throws(() => new SyntheaTool({ logger }), /requires syntheaJar/);
|
|
11
|
+
assert.throws(
|
|
12
|
+
() => new SyntheaTool({ logger, syntheaJar: "/path.jar" }),
|
|
13
|
+
/requires execFileFn/,
|
|
14
|
+
);
|
|
15
|
+
assert.throws(
|
|
16
|
+
() =>
|
|
17
|
+
new SyntheaTool({
|
|
18
|
+
logger,
|
|
19
|
+
syntheaJar: "/path.jar",
|
|
20
|
+
execFileFn: async () => {},
|
|
21
|
+
}),
|
|
22
|
+
/requires fsFns/,
|
|
23
|
+
);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
test("checkAvailability throws when java missing", async () => {
|
|
27
|
+
const tool = new SyntheaTool({
|
|
28
|
+
logger,
|
|
29
|
+
syntheaJar: "/missing.jar",
|
|
30
|
+
execFileFn: async () => {
|
|
31
|
+
throw new Error("not found");
|
|
32
|
+
},
|
|
33
|
+
fsFns: { readFile: async () => Buffer.from("") },
|
|
34
|
+
});
|
|
35
|
+
await assert.rejects(
|
|
36
|
+
() => tool.checkAvailability(),
|
|
37
|
+
/Synthea requires Java/,
|
|
38
|
+
);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
test("passes correct args to java", async () => {
|
|
42
|
+
let capturedArgs;
|
|
43
|
+
const fhirBundle = {
|
|
44
|
+
entry: [
|
|
45
|
+
{ resource: { resourceType: "Patient", id: "p1", name: "Alice" } },
|
|
46
|
+
{ resource: { resourceType: "Condition", id: "c1", code: "diabetes" } },
|
|
47
|
+
],
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
const tool = new SyntheaTool({
|
|
51
|
+
logger,
|
|
52
|
+
syntheaJar: "/synthea.jar",
|
|
53
|
+
execFileFn: async (cmd, args) => {
|
|
54
|
+
capturedArgs = { cmd, args };
|
|
55
|
+
return { stdout: "" };
|
|
56
|
+
},
|
|
57
|
+
fsFns: {
|
|
58
|
+
readFile: async (path, _enc) => {
|
|
59
|
+
if (path === "/synthea.jar") return Buffer.from("");
|
|
60
|
+
return JSON.stringify(fhirBundle);
|
|
61
|
+
},
|
|
62
|
+
readdir: async () => ["patient1.json"],
|
|
63
|
+
mkdtemp: async () => "/tmp/synthea-abc",
|
|
64
|
+
rm: async () => {},
|
|
65
|
+
},
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
const datasets = await tool.generate({
|
|
69
|
+
name: "test",
|
|
70
|
+
population: 50,
|
|
71
|
+
modules: ["diabetes"],
|
|
72
|
+
seed: 42,
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
assert.strictEqual(capturedArgs.cmd, "java");
|
|
76
|
+
assert.ok(capturedArgs.args.includes("-jar"));
|
|
77
|
+
assert.ok(capturedArgs.args.includes("/synthea.jar"));
|
|
78
|
+
assert.ok(capturedArgs.args.includes("-p"));
|
|
79
|
+
assert.ok(capturedArgs.args.includes("50"));
|
|
80
|
+
assert.ok(capturedArgs.args.includes("-s"));
|
|
81
|
+
assert.ok(capturedArgs.args.includes("42"));
|
|
82
|
+
assert.ok(capturedArgs.args.includes("-m"));
|
|
83
|
+
assert.ok(capturedArgs.args.includes("diabetes"));
|
|
84
|
+
|
|
85
|
+
// Verify dataset flattening by resource type
|
|
86
|
+
assert.strictEqual(datasets.length, 2);
|
|
87
|
+
const names = datasets.map((d) => d.name).sort();
|
|
88
|
+
assert.deepStrictEqual(names, ["test_condition", "test_patient"]);
|
|
89
|
+
|
|
90
|
+
const patientDs = datasets.find((d) => d.name === "test_patient");
|
|
91
|
+
assert.strictEqual(patientDs.records.length, 1);
|
|
92
|
+
assert.strictEqual(patientDs.metadata.tool, "synthea");
|
|
93
|
+
assert.strictEqual(patientDs.metadata.resourceType, "Patient");
|
|
94
|
+
});
|
|
95
|
+
});
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
import { describe, test } from "node:test";
|
|
2
|
+
import assert from "node:assert";
|
|
3
|
+
import { tokenize } from "../dsl/tokenizer.js";
|
|
4
|
+
|
|
5
|
+
describe("tokenize", () => {
|
|
6
|
+
describe("basic tokens", () => {
|
|
7
|
+
test("tokenizes keywords", () => {
|
|
8
|
+
const tokens = tokenize("universe domain industry seed");
|
|
9
|
+
assert.deepStrictEqual(
|
|
10
|
+
tokens.filter((t) => t.type !== "EOF"),
|
|
11
|
+
[
|
|
12
|
+
{ type: "KEYWORD", value: "universe", line: 1 },
|
|
13
|
+
{ type: "KEYWORD", value: "domain", line: 1 },
|
|
14
|
+
{ type: "KEYWORD", value: "industry", line: 1 },
|
|
15
|
+
{ type: "KEYWORD", value: "seed", line: 1 },
|
|
16
|
+
],
|
|
17
|
+
);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
test("tokenizes all structural keywords", () => {
|
|
21
|
+
const input = "org department team people project framework";
|
|
22
|
+
const tokens = tokenize(input).filter((t) => t.type !== "EOF");
|
|
23
|
+
assert.strictEqual(tokens.length, 6);
|
|
24
|
+
for (const t of tokens) {
|
|
25
|
+
assert.strictEqual(t.type, "KEYWORD");
|
|
26
|
+
}
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
test("tokenizes string literals", () => {
|
|
30
|
+
const tokens = tokenize('"hello world"');
|
|
31
|
+
assert.deepStrictEqual(tokens[0], {
|
|
32
|
+
type: "STRING",
|
|
33
|
+
value: "hello world",
|
|
34
|
+
line: 1,
|
|
35
|
+
});
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test("tokenizes string with escape sequences", () => {
|
|
39
|
+
const tokens = tokenize('"line1\\nline2\\ttab\\\\"');
|
|
40
|
+
assert.strictEqual(tokens[0].value, "line1\nline2\ttab\\");
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
test("tokenizes integer numbers", () => {
|
|
44
|
+
const tokens = tokenize("42");
|
|
45
|
+
assert.deepStrictEqual(tokens[0], {
|
|
46
|
+
type: "NUMBER",
|
|
47
|
+
value: "42",
|
|
48
|
+
line: 1,
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
test("tokenizes decimal numbers", () => {
|
|
53
|
+
const tokens = tokenize("3.14");
|
|
54
|
+
assert.deepStrictEqual(tokens[0], {
|
|
55
|
+
type: "NUMBER",
|
|
56
|
+
value: "3.14",
|
|
57
|
+
line: 1,
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test("tokenizes identifiers", () => {
|
|
62
|
+
const tokens = tokenize("myVar another_one");
|
|
63
|
+
assert.deepStrictEqual(tokens[0], {
|
|
64
|
+
type: "IDENT",
|
|
65
|
+
value: "myVar",
|
|
66
|
+
line: 1,
|
|
67
|
+
});
|
|
68
|
+
assert.deepStrictEqual(tokens[1], {
|
|
69
|
+
type: "IDENT",
|
|
70
|
+
value: "another_one",
|
|
71
|
+
line: 1,
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
test("distinguishes keywords from identifiers", () => {
|
|
76
|
+
const tokens = tokenize("universe myUniverse");
|
|
77
|
+
assert.strictEqual(tokens[0].type, "KEYWORD");
|
|
78
|
+
assert.strictEqual(tokens[1].type, "IDENT");
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
describe("special tokens", () => {
|
|
83
|
+
test("tokenizes @references", () => {
|
|
84
|
+
const tokens = tokenize("@apollo @themis");
|
|
85
|
+
assert.deepStrictEqual(tokens[0], {
|
|
86
|
+
type: "AT_IDENT",
|
|
87
|
+
value: "apollo",
|
|
88
|
+
line: 1,
|
|
89
|
+
});
|
|
90
|
+
assert.deepStrictEqual(tokens[1], {
|
|
91
|
+
type: "AT_IDENT",
|
|
92
|
+
value: "themis",
|
|
93
|
+
line: 1,
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
test("tokenizes percentages", () => {
|
|
98
|
+
const tokens = tokenize("50%");
|
|
99
|
+
assert.deepStrictEqual(tokens[0], {
|
|
100
|
+
type: "PERCENT",
|
|
101
|
+
value: "50",
|
|
102
|
+
line: 1,
|
|
103
|
+
});
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
test("tokenizes dates in YYYY-MM format", () => {
|
|
107
|
+
const tokens = tokenize("2024-01");
|
|
108
|
+
assert.deepStrictEqual(tokens[0], {
|
|
109
|
+
type: "DATE",
|
|
110
|
+
value: "2024-01",
|
|
111
|
+
line: 1,
|
|
112
|
+
});
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
test("tokenizes multiple dates", () => {
|
|
116
|
+
const tokens = tokenize("2024-01 2025-12");
|
|
117
|
+
assert.strictEqual(tokens[0].type, "DATE");
|
|
118
|
+
assert.strictEqual(tokens[0].value, "2024-01");
|
|
119
|
+
assert.strictEqual(tokens[1].type, "DATE");
|
|
120
|
+
assert.strictEqual(tokens[1].value, "2025-12");
|
|
121
|
+
});
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
describe("comments", () => {
|
|
125
|
+
test("skips single-line comments", () => {
|
|
126
|
+
const tokens = tokenize("universe // this is a comment\ndomain");
|
|
127
|
+
const nonEof = tokens.filter((t) => t.type !== "EOF");
|
|
128
|
+
assert.strictEqual(nonEof.length, 2);
|
|
129
|
+
assert.strictEqual(nonEof[0].value, "universe");
|
|
130
|
+
assert.strictEqual(nonEof[1].value, "domain");
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
test("skips multi-line comments", () => {
|
|
134
|
+
const tokens = tokenize("universe /* multi\nline\ncomment */ domain");
|
|
135
|
+
const nonEof = tokens.filter((t) => t.type !== "EOF");
|
|
136
|
+
assert.strictEqual(nonEof.length, 2);
|
|
137
|
+
assert.strictEqual(nonEof[0].value, "universe");
|
|
138
|
+
assert.strictEqual(nonEof[1].value, "domain");
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
test("tracks line numbers through multi-line comments", () => {
|
|
142
|
+
const tokens = tokenize("universe\n/* line 2\nline 3 */\ndomain");
|
|
143
|
+
const domainToken = tokens.find((t) => t.value === "domain");
|
|
144
|
+
assert.strictEqual(domainToken.line, 4);
|
|
145
|
+
});
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
describe("braces and brackets", () => {
|
|
149
|
+
test("tokenizes braces", () => {
|
|
150
|
+
const tokens = tokenize("{ }");
|
|
151
|
+
assert.strictEqual(tokens[0].type, "LBRACE");
|
|
152
|
+
assert.strictEqual(tokens[1].type, "RBRACE");
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
test("tokenizes brackets", () => {
|
|
156
|
+
const tokens = tokenize("[ ]");
|
|
157
|
+
assert.strictEqual(tokens[0].type, "LBRACKET");
|
|
158
|
+
assert.strictEqual(tokens[1].type, "RBRACKET");
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
test("tokenizes commas", () => {
|
|
162
|
+
const tokens = tokenize(",");
|
|
163
|
+
assert.strictEqual(tokens[0].type, "COMMA");
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
test("tokenizes mixed structural characters", () => {
|
|
167
|
+
const tokens = tokenize("[1, 2, 3]");
|
|
168
|
+
const types = tokens.filter((t) => t.type !== "EOF").map((t) => t.type);
|
|
169
|
+
assert.deepStrictEqual(types, [
|
|
170
|
+
"LBRACKET",
|
|
171
|
+
"NUMBER",
|
|
172
|
+
"COMMA",
|
|
173
|
+
"NUMBER",
|
|
174
|
+
"COMMA",
|
|
175
|
+
"NUMBER",
|
|
176
|
+
"RBRACKET",
|
|
177
|
+
]);
|
|
178
|
+
});
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
describe("line tracking", () => {
|
|
182
|
+
test("tracks line numbers across newlines", () => {
|
|
183
|
+
const tokens = tokenize("universe\ndomain\nindustry");
|
|
184
|
+
assert.strictEqual(tokens[0].line, 1);
|
|
185
|
+
assert.strictEqual(tokens[1].line, 2);
|
|
186
|
+
assert.strictEqual(tokens[2].line, 3);
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
test("ignores carriage returns for line counting", () => {
|
|
190
|
+
const tokens = tokenize("universe\r\ndomain");
|
|
191
|
+
assert.strictEqual(tokens[0].line, 1);
|
|
192
|
+
assert.strictEqual(tokens[1].line, 2);
|
|
193
|
+
});
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
describe("edge cases", () => {
|
|
197
|
+
test("returns only EOF for empty input", () => {
|
|
198
|
+
const tokens = tokenize("");
|
|
199
|
+
assert.strictEqual(tokens.length, 1);
|
|
200
|
+
assert.strictEqual(tokens[0].type, "EOF");
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
test("returns only EOF for whitespace-only input", () => {
|
|
204
|
+
const tokens = tokenize(" \t\n \n ");
|
|
205
|
+
assert.strictEqual(tokens.length, 1);
|
|
206
|
+
assert.strictEqual(tokens[0].type, "EOF");
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
test("throws on unknown characters", () => {
|
|
210
|
+
assert.throws(() => tokenize("~"), /Unexpected character '~'/);
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
test("throws on unknown character with line number", () => {
|
|
214
|
+
assert.throws(() => tokenize("\n\n~"), /at line 3/);
|
|
215
|
+
});
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
describe("negative numbers", () => {
|
|
219
|
+
test("tokenizes negative integers", () => {
|
|
220
|
+
const tokens = tokenize("-5");
|
|
221
|
+
assert.deepStrictEqual(tokens[0], {
|
|
222
|
+
type: "NUMBER",
|
|
223
|
+
value: "-5",
|
|
224
|
+
line: 1,
|
|
225
|
+
});
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
test("tokenizes negative decimals", () => {
|
|
229
|
+
const tokens = tokenize("-3.14");
|
|
230
|
+
assert.deepStrictEqual(tokens[0], {
|
|
231
|
+
type: "NUMBER",
|
|
232
|
+
value: "-3.14",
|
|
233
|
+
line: 1,
|
|
234
|
+
});
|
|
235
|
+
});
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
describe("EOF token", () => {
|
|
239
|
+
test("always ends with EOF", () => {
|
|
240
|
+
const tokens = tokenize("universe");
|
|
241
|
+
assert.strictEqual(tokens[tokens.length - 1].type, "EOF");
|
|
242
|
+
});
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
describe("complex input", () => {
|
|
246
|
+
test("tokenizes a minimal universe declaration", () => {
|
|
247
|
+
const input = `universe test_co {
|
|
248
|
+
domain "engineering"
|
|
249
|
+
seed 42
|
|
250
|
+
}`;
|
|
251
|
+
const tokens = tokenize(input);
|
|
252
|
+
const types = tokens.map((t) => t.type);
|
|
253
|
+
assert.deepStrictEqual(types, [
|
|
254
|
+
"KEYWORD", // universe
|
|
255
|
+
"IDENT", // test_co
|
|
256
|
+
"LBRACE",
|
|
257
|
+
"KEYWORD", // domain
|
|
258
|
+
"STRING", // "engineering"
|
|
259
|
+
"KEYWORD", // seed
|
|
260
|
+
"NUMBER", // 42
|
|
261
|
+
"RBRACE",
|
|
262
|
+
"EOF",
|
|
263
|
+
]);
|
|
264
|
+
});
|
|
265
|
+
});
|
|
266
|
+
});
|