@forwardimpact/libsyntheticgen 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js ADDED
@@ -0,0 +1,7 @@
1
+ export { DslParser, createDslParser } from "./dsl/index.js";
2
+ export { EntityGenerator, createEntityGenerator } from "./engine/tier0.js";
3
+ export { createSeededRNG } from "./engine/rng.js";
4
+ export { collectProseKeys } from "./engine/prose-keys.js";
5
+ export { FakerTool, createFakerTool } from "./tools/faker.js";
6
+ export { SyntheaTool, createSyntheaTool } from "./tools/synthea.js";
7
+ export { SdvTool, createSdvTool } from "./tools/sdv.js";
package/package.json ADDED
@@ -0,0 +1,35 @@
1
+ {
2
+ "name": "@forwardimpact/libsyntheticgen",
3
+ "version": "0.1.1",
4
+ "description": "DSL parsing and deterministic entity generation for synthetic data",
5
+ "license": "Apache-2.0",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "https://github.com/forwardimpact/monorepo",
9
+ "directory": "libraries/libsyntheticgen"
10
+ },
11
+ "type": "module",
12
+ "main": "index.js",
13
+ "exports": {
14
+ ".": "./index.js",
15
+ "./dsl": "./dsl/index.js",
16
+ "./engine": "./engine/tier0.js",
17
+ "./engine/entities": "./engine/entities.js",
18
+ "./engine/activity": "./engine/activity.js",
19
+ "./rng": "./engine/rng.js",
20
+ "./tools/faker": "./tools/faker.js",
21
+ "./tools/synthea": "./tools/synthea.js",
22
+ "./tools/sdv": "./tools/sdv.js"
23
+ },
24
+ "dependencies": {
25
+ "@faker-js/faker": "^10.4.0",
26
+ "@forwardimpact/libutil": "^0.1.61",
27
+ "seedrandom": "^3.0.5"
28
+ },
29
+ "engines": {
30
+ "node": ">=18.0.0"
31
+ },
32
+ "publishConfig": {
33
+ "access": "public"
34
+ }
35
+ }
@@ -0,0 +1,322 @@
1
+ import { describe, test } from "node:test";
2
+ import assert from "node:assert";
3
+ import { tokenize } from "../dsl/tokenizer.js";
4
+ import { parse } from "../dsl/parser.js";
5
+ import { createSeededRNG } from "../engine/rng.js";
6
+ import { buildEntities } from "../engine/entities.js";
7
+ import { generateActivity } from "../engine/activity.js";
8
+
9
+ /**
10
+ * Helper: parse DSL, build entities, generate activity.
11
+ * @param {string} source
12
+ * @returns {object}
13
+ */
14
+ function generateFromDsl(source) {
15
+ const ast = parse(tokenize(source));
16
+ const rng = createSeededRNG(ast.seed);
17
+ const { orgs, departments, teams, people, projects } = buildEntities(
18
+ ast,
19
+ rng,
20
+ );
21
+ const activity = generateActivity(ast, rng, people, teams);
22
+ return { ast, orgs, departments, teams, people, projects, activity };
23
+ }
24
+
25
+ const MINI_UNIVERSE = `universe test {
26
+ domain "test.example"
27
+ seed 42
28
+
29
+ org hq { name "HQ" location "NY" }
30
+
31
+ department eng {
32
+ name "Engineering"
33
+ parent hq
34
+ headcount 10
35
+
36
+ team alpha {
37
+ name "Alpha Team"
38
+ size 5
39
+ manager @zeus
40
+ repos ["repo-a"]
41
+ }
42
+
43
+ team beta {
44
+ name "Beta Team"
45
+ size 5
46
+ manager @hera
47
+ repos ["repo-b"]
48
+ }
49
+ }
50
+
51
+ people {
52
+ count 10
53
+ names "greek_mythology"
54
+ distribution {
55
+ L1 40%
56
+ L2 30%
57
+ L3 20%
58
+ L4 10%
59
+ }
60
+ disciplines {
61
+ software_engineering 80%
62
+ data_engineering 20%
63
+ }
64
+ }
65
+
66
+ project proj_a {
67
+ name "Project Alpha"
68
+ type "platform"
69
+ teams [alpha, beta]
70
+ timeline_start 2024-06
71
+ timeline_end 2025-06
72
+ }
73
+
74
+ snapshots {
75
+ quarterly_from 2024-07
76
+ quarterly_to 2025-07
77
+ account_id "acct_test"
78
+ comments_per_snapshot 5
79
+ }
80
+
81
+ scenario pressure {
82
+ name "Release Pressure"
83
+ timerange_start 2024-07
84
+ timerange_end 2025-01
85
+
86
+ affect alpha {
87
+ github_commits "spike"
88
+ github_prs "elevated"
89
+ dx_drivers {
90
+ deep_work { trajectory "declining" magnitude -6 }
91
+ ease_of_release { trajectory "declining" magnitude -4 }
92
+ }
93
+ evidence_skills [architecture_design]
94
+ evidence_floor "working"
95
+ }
96
+ }
97
+
98
+ scenario improvement {
99
+ name "Culture Improvement"
100
+ timerange_start 2025-01
101
+ timerange_end 2025-06
102
+
103
+ affect beta {
104
+ github_commits "moderate"
105
+ github_prs "moderate"
106
+ dx_drivers {
107
+ learning_culture { trajectory "rising" magnitude 5 }
108
+ connectedness { trajectory "rising" magnitude 3 }
109
+ }
110
+ evidence_skills [team_collaboration]
111
+ evidence_floor "foundational"
112
+ }
113
+ }
114
+
115
+ framework {
116
+ proficiencies [awareness, foundational, working, practitioner, expert]
117
+ drivers {
118
+ deep_work {
119
+ name "Deep Work"
120
+ skills [architecture_design, data_integration]
121
+ behaviours []
122
+ }
123
+ ease_of_release {
124
+ name "Ease of Release"
125
+ skills [change_management, sre_practices]
126
+ behaviours []
127
+ }
128
+ learning_culture {
129
+ name "Learning Culture"
130
+ skills [mentoring, technical_writing]
131
+ behaviours []
132
+ }
133
+ connectedness {
134
+ name "Connectedness"
135
+ skills [team_collaboration, stakeholder_management]
136
+ behaviours []
137
+ }
138
+ }
139
+ }
140
+ }`;
141
+
142
+ describe("activity generation", () => {
143
+ describe("deriveInitiatives", () => {
144
+ test("generates initiatives from declining scenario drivers", () => {
145
+ const { activity } = generateFromDsl(MINI_UNIVERSE);
146
+ const initiatives = activity.initiatives;
147
+ assert.ok(
148
+ initiatives.length > 0,
149
+ "should generate at least one initiative",
150
+ );
151
+
152
+ // Declining deep_work with magnitude -6 should produce priority 0
153
+ const deepWorkInit = initiatives.find(
154
+ (i) => i._driver_id === "deep_work",
155
+ );
156
+ assert.ok(deepWorkInit, "should have initiative for deep_work driver");
157
+ assert.strictEqual(deepWorkInit.priority, 0);
158
+ assert.ok(deepWorkInit.name.includes("Alpha Team"));
159
+ assert.ok(deepWorkInit.scorecard_id);
160
+ assert.ok(deepWorkInit.owner.email);
161
+ });
162
+
163
+ test("generates initiatives from rising scenario drivers", () => {
164
+ const { activity } = generateFromDsl(MINI_UNIVERSE);
165
+ const risingInit = activity.initiatives.find(
166
+ (i) => i._driver_id === "learning_culture",
167
+ );
168
+ assert.ok(risingInit, "should have initiative for rising driver");
169
+ assert.ok(
170
+ risingInit.priority >= 3,
171
+ "rising initiatives should have lower priority",
172
+ );
173
+ assert.ok(risingInit.name.includes("Sustain"));
174
+ });
175
+
176
+ test("initiative has all required GetDX API fields", () => {
177
+ const { activity } = generateFromDsl(MINI_UNIVERSE);
178
+ const init = activity.initiatives[0];
179
+ assert.ok(init.id);
180
+ assert.ok(init.name);
181
+ assert.ok(init.description);
182
+ assert.ok(init.scorecard_id);
183
+ assert.ok(init.scorecard_name);
184
+ assert.strictEqual(typeof init.priority, "number");
185
+ assert.strictEqual(typeof init.published, "boolean");
186
+ assert.ok(init.complete_by);
187
+ assert.strictEqual(typeof init.percentage_complete, "number");
188
+ assert.strictEqual(typeof init.passed_checks, "number");
189
+ assert.strictEqual(typeof init.total_checks, "number");
190
+ assert.strictEqual(typeof init.remaining_dev_days, "number");
191
+ assert.ok(init.owner.id);
192
+ assert.ok(init.owner.name);
193
+ assert.ok(init.owner.email);
194
+ assert.ok(Array.isArray(init.tags));
195
+ });
196
+ });
197
+
198
+ describe("deriveScorecards", () => {
199
+ test("generates a scorecard per initiative", () => {
200
+ const { activity } = generateFromDsl(MINI_UNIVERSE);
201
+ const scorecardIds = new Set(activity.scorecards.map((s) => s.id));
202
+ for (const init of activity.initiatives) {
203
+ assert.ok(
204
+ scorecardIds.has(init.scorecard_id),
205
+ `initiative ${init.id} should reference valid scorecard`,
206
+ );
207
+ }
208
+ });
209
+
210
+ test("scorecard has checks derived from driver skills", () => {
211
+ const { activity } = generateFromDsl(MINI_UNIVERSE);
212
+ const sc = activity.scorecards[0];
213
+ assert.ok(sc.checks.length > 0, "scorecard should have checks");
214
+ assert.ok(sc.levels.length === 3, "scorecard should have 3 levels");
215
+ assert.strictEqual(sc.type, "LEVEL");
216
+ });
217
+ });
218
+
219
+ describe("generateCommentKeys", () => {
220
+ test("generates comment keys for active snapshots", () => {
221
+ const { activity } = generateFromDsl(MINI_UNIVERSE);
222
+ assert.ok(
223
+ activity.commentKeys.length > 0,
224
+ "should generate comment keys",
225
+ );
226
+ });
227
+
228
+ test("comment keys have required metadata", () => {
229
+ const { activity } = generateFromDsl(MINI_UNIVERSE);
230
+ const ck = activity.commentKeys[0];
231
+ assert.ok(ck.snapshot_id);
232
+ assert.ok(ck.email);
233
+ assert.ok(ck.team_id);
234
+ assert.ok(ck.timestamp);
235
+ assert.ok(ck.driver_id);
236
+ assert.ok(ck.driver_name);
237
+ assert.ok(ck.trajectory);
238
+ assert.strictEqual(typeof ck.magnitude, "number");
239
+ assert.ok(ck.scenario_name);
240
+ assert.ok(ck.team_name);
241
+ });
242
+
243
+ test("declining drivers weighted higher in comment selection", () => {
244
+ const { activity } = generateFromDsl(MINI_UNIVERSE);
245
+ // The first snapshot overlaps with the "pressure" scenario (declining)
246
+ const firstSnap = activity.snapshots[0];
247
+ const firstSnapComments = activity.commentKeys.filter(
248
+ (ck) => ck.snapshot_id === firstSnap.snapshot_id,
249
+ );
250
+ if (firstSnapComments.length > 0) {
251
+ const decliningCount = firstSnapComments.filter(
252
+ (ck) => ck.trajectory === "declining",
253
+ ).length;
254
+ assert.ok(
255
+ decliningCount >= firstSnapComments.length * 0.5,
256
+ "declining comments should be weighted higher",
257
+ );
258
+ }
259
+ });
260
+ });
261
+
262
+ describe("generateRosterSnapshots", () => {
263
+ test("generates one roster snapshot per survey snapshot", () => {
264
+ const { activity } = generateFromDsl(MINI_UNIVERSE);
265
+ assert.strictEqual(
266
+ activity.rosterSnapshots.length,
267
+ activity.snapshots.length,
268
+ );
269
+ });
270
+
271
+ test("first roster snapshot matches initial roster", () => {
272
+ const { activity } = generateFromDsl(MINI_UNIVERSE);
273
+ const first = activity.rosterSnapshots[0];
274
+ assert.strictEqual(first.members, activity.roster.length);
275
+ assert.strictEqual(first.changes.length, 0);
276
+ });
277
+
278
+ test("subsequent roster snapshots have changes", () => {
279
+ const { activity } = generateFromDsl(MINI_UNIVERSE);
280
+ if (activity.rosterSnapshots.length > 1) {
281
+ const second = activity.rosterSnapshots[1];
282
+ assert.ok(
283
+ second.changes.length > 0,
284
+ "second snapshot should have roster changes",
285
+ );
286
+ }
287
+ });
288
+
289
+ test("roster snapshot entries have required fields", () => {
290
+ const { activity } = generateFromDsl(MINI_UNIVERSE);
291
+ const entry = activity.rosterSnapshots[0].roster[0];
292
+ assert.ok(entry.email);
293
+ assert.ok(entry.name);
294
+ assert.ok(entry.discipline);
295
+ assert.ok(entry.level);
296
+ assert.ok(entry.team_id);
297
+ });
298
+ });
299
+
300
+ describe("deriveProjectTeams", () => {
301
+ test("generates project teams from DSL projects", () => {
302
+ const { activity } = generateFromDsl(MINI_UNIVERSE);
303
+ assert.ok(
304
+ activity.projectTeams.length > 0,
305
+ "should generate project teams",
306
+ );
307
+ assert.strictEqual(activity.projectTeams[0].id, "proj_a");
308
+ });
309
+
310
+ test("project team members have allocation", () => {
311
+ const { activity } = generateFromDsl(MINI_UNIVERSE);
312
+ const pt = activity.projectTeams[0];
313
+ assert.ok(pt.members.length > 0);
314
+ for (const m of pt.members) {
315
+ assert.ok(m.email);
316
+ assert.ok(m.job);
317
+ assert.strictEqual(typeof m.allocation, "number");
318
+ assert.ok(m.allocation > 0 && m.allocation <= 1.0);
319
+ }
320
+ });
321
+ });
322
+ });
@@ -0,0 +1,98 @@
1
+ import { describe, test } from "node:test";
2
+ import assert from "node:assert";
3
+ import { FakerTool } from "../tools/faker.js";
4
+
5
+ const logger = {
6
+ info() {},
7
+ error() {},
8
+ };
9
+
10
+ describe("FakerTool", () => {
11
+ test("requires logger", () => {
12
+ assert.throws(() => new FakerTool({}), /requires logger/);
13
+ });
14
+
15
+ test("checkAvailability returns true", async () => {
16
+ const tool = new FakerTool({ logger });
17
+ assert.strictEqual(await tool.checkAvailability(), true);
18
+ });
19
+
20
+ test("generates correct number of records", async () => {
21
+ const tool = new FakerTool({ logger });
22
+ const datasets = await tool.generate({
23
+ name: "test",
24
+ rows: 10,
25
+ fields: { id: "string.uuid", name: "person.fullName" },
26
+ seed: 42,
27
+ });
28
+ assert.strictEqual(datasets.length, 1);
29
+ assert.strictEqual(datasets[0].records.length, 10);
30
+ assert.strictEqual(datasets[0].name, "test");
31
+ assert.strictEqual(datasets[0].metadata.tool, "faker");
32
+ });
33
+
34
+ test("records have all specified fields", async () => {
35
+ const tool = new FakerTool({ logger });
36
+ const datasets = await tool.generate({
37
+ name: "test",
38
+ rows: 3,
39
+ fields: { id: "string.uuid", email: "internet.email" },
40
+ seed: 42,
41
+ });
42
+ for (const record of datasets[0].records) {
43
+ assert.ok("id" in record, "record should have id");
44
+ assert.ok("email" in record, "record should have email");
45
+ assert.ok(typeof record.id === "string");
46
+ assert.ok(typeof record.email === "string");
47
+ }
48
+ });
49
+
50
+ test("deterministic with same seed", async () => {
51
+ const tool = new FakerTool({ logger });
52
+ const config = {
53
+ name: "det",
54
+ rows: 5,
55
+ fields: { name: "person.fullName" },
56
+ seed: 99,
57
+ };
58
+ const a = await tool.generate(config);
59
+ const b = await tool.generate(config);
60
+ assert.deepStrictEqual(a[0].records, b[0].records);
61
+ });
62
+
63
+ test("different seeds produce different records", async () => {
64
+ const tool = new FakerTool({ logger });
65
+ const base = { name: "det", rows: 5, fields: { name: "person.fullName" } };
66
+ const a = await tool.generate({ ...base, seed: 1 });
67
+ const b = await tool.generate({ ...base, seed: 2 });
68
+ assert.notDeepStrictEqual(a[0].records, b[0].records);
69
+ });
70
+
71
+ test("throws on unknown provider", async () => {
72
+ const tool = new FakerTool({ logger });
73
+ await assert.rejects(
74
+ () =>
75
+ tool.generate({
76
+ name: "bad",
77
+ rows: 1,
78
+ fields: { x: "nonexistent.provider" },
79
+ seed: 42,
80
+ }),
81
+ /Unknown Faker provider/,
82
+ );
83
+ });
84
+
85
+ test("throws on non-function provider", async () => {
86
+ const tool = new FakerTool({ logger });
87
+ await assert.rejects(
88
+ () =>
89
+ tool.generate({
90
+ name: "bad",
91
+ rows: 1,
92
+ fields: { x: "string" },
93
+ seed: 42,
94
+ }),
95
+ /is not a function/,
96
+ );
97
+ });
98
+ });
@@ -0,0 +1,142 @@
1
+ import { describe, test } from "node:test";
2
+ import assert from "node:assert";
3
+ import { tokenize } from "../dsl/tokenizer.js";
4
+ import { parse } from "../dsl/parser.js";
5
+
6
+ function parseDsl(source) {
7
+ return parse(tokenize(source));
8
+ }
9
+
10
+ describe("dataset and output parsing", () => {
11
+ test("parses empty universe with no datasets", () => {
12
+ const ast = parseDsl("universe test {}");
13
+ assert.deepStrictEqual(ast.datasets, []);
14
+ assert.deepStrictEqual(ast.outputs, []);
15
+ });
16
+
17
+ test("parses faker dataset block", () => {
18
+ const ast = parseDsl(`universe test {
19
+ dataset researchers {
20
+ tool faker
21
+ rows 100
22
+ fields {
23
+ name "person.fullName"
24
+ email "internet.email"
25
+ }
26
+ }
27
+ }`);
28
+ assert.strictEqual(ast.datasets.length, 1);
29
+ const ds = ast.datasets[0];
30
+ assert.strictEqual(ds.id, "researchers");
31
+ assert.strictEqual(ds.tool, "faker");
32
+ assert.strictEqual(ds.config.rows, 100);
33
+ assert.deepStrictEqual(ds.config.fields, {
34
+ name: "person.fullName",
35
+ email: "internet.email",
36
+ });
37
+ });
38
+
39
+ test("parses synthea dataset block", () => {
40
+ const ast = parseDsl(`universe test {
41
+ dataset patients {
42
+ tool synthea
43
+ population 200
44
+ modules [diabetes, cardiovascular]
45
+ }
46
+ }`);
47
+ const ds = ast.datasets[0];
48
+ assert.strictEqual(ds.tool, "synthea");
49
+ assert.strictEqual(ds.config.population, 200);
50
+ assert.deepStrictEqual(ds.config.modules, ["diabetes", "cardiovascular"]);
51
+ });
52
+
53
+ test("parses sdv dataset block", () => {
54
+ const ast = parseDsl(`universe test {
55
+ dataset claims {
56
+ tool sdv
57
+ metadata "schemas/meta.json"
58
+ data {
59
+ claims "data/sample.csv"
60
+ }
61
+ rows 5000
62
+ }
63
+ }`);
64
+ const ds = ast.datasets[0];
65
+ assert.strictEqual(ds.tool, "sdv");
66
+ assert.strictEqual(ds.config.metadata, "schemas/meta.json");
67
+ assert.deepStrictEqual(ds.config.data, { claims: "data/sample.csv" });
68
+ assert.strictEqual(ds.config.rows, 5000);
69
+ });
70
+
71
+ test("parses output blocks", () => {
72
+ const ast = parseDsl(`universe test {
73
+ output patients json { path "out/patients.json" }
74
+ output claims sql { path "out/claims.sql" table "my_claims" }
75
+ }`);
76
+ assert.strictEqual(ast.outputs.length, 2);
77
+ assert.strictEqual(ast.outputs[0].dataset, "patients");
78
+ assert.strictEqual(ast.outputs[0].format, "json");
79
+ assert.strictEqual(ast.outputs[0].config.path, "out/patients.json");
80
+ assert.strictEqual(ast.outputs[1].format, "sql");
81
+ assert.strictEqual(ast.outputs[1].config.table, "my_claims");
82
+ });
83
+
84
+ test("parses all six output formats", () => {
85
+ const formats = ["json", "yaml", "csv", "markdown", "parquet", "sql"];
86
+ for (const fmt of formats) {
87
+ const ast = parseDsl(
88
+ `universe test { output ds ${fmt} { path "out/file" } }`,
89
+ );
90
+ assert.strictEqual(ast.outputs[0].format, fmt);
91
+ }
92
+ });
93
+
94
+ test("throws on unknown output format", () => {
95
+ assert.throws(
96
+ () => parseDsl(`universe test { output ds xlsx { path "out/file" } }`),
97
+ /Unknown output format 'xlsx'/,
98
+ );
99
+ });
100
+
101
+ test("throws on unknown keyword in dataset", () => {
102
+ assert.throws(
103
+ () => parseDsl(`universe test { dataset x { tool faker bogus 5 } }`),
104
+ /Unexpected 'bogus' in dataset/,
105
+ );
106
+ });
107
+
108
+ test("throws on unknown keyword in output", () => {
109
+ assert.throws(
110
+ () => parseDsl(`universe test { output ds json { path "x" bogus "y" } }`),
111
+ /Unexpected 'bogus' in output/,
112
+ );
113
+ });
114
+
115
+ test("parses mixed org and dataset blocks", () => {
116
+ const ast = parseDsl(`universe test {
117
+ domain "example.com"
118
+ seed 42
119
+ org hq { name "HQ" location "NYC" }
120
+ dataset researchers {
121
+ tool faker
122
+ rows 10
123
+ fields { name "person.fullName" }
124
+ }
125
+ output researchers json { path "out/r.json" }
126
+ }`);
127
+ assert.strictEqual(ast.orgs.length, 1);
128
+ assert.strictEqual(ast.datasets.length, 1);
129
+ assert.strictEqual(ast.outputs.length, 1);
130
+ assert.strictEqual(ast.domain, "example.com");
131
+ });
132
+
133
+ test("parses multiple dataset blocks", () => {
134
+ const ast = parseDsl(`universe test {
135
+ dataset a { tool faker rows 5 fields { x "string.uuid" } }
136
+ dataset b { tool faker rows 10 fields { y "person.firstName" } }
137
+ }`);
138
+ assert.strictEqual(ast.datasets.length, 2);
139
+ assert.strictEqual(ast.datasets[0].id, "a");
140
+ assert.strictEqual(ast.datasets[1].id, "b");
141
+ });
142
+ });