@forwardimpact/libsyntheticgen 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dsl/parser.js +25 -1
- package/engine/entities.js +29 -9
- package/engine/prose-keys.js +13 -1
- package/engine/tier0.js +1 -0
- package/index.js +5 -0
- package/package.json +3 -1
- package/vocabulary.js +65 -0
package/dsl/parser.js
CHANGED
|
@@ -173,6 +173,15 @@ export function parse(tokens) {
|
|
|
173
173
|
people.disciplines[disc] = pct;
|
|
174
174
|
}
|
|
175
175
|
expect("RBRACE");
|
|
176
|
+
} else if (kw.value === "archetypes") {
|
|
177
|
+
expect("LBRACE");
|
|
178
|
+
people.archetypes = {};
|
|
179
|
+
while (peek().type !== "RBRACE") {
|
|
180
|
+
const archetype = parseStringOrIdent();
|
|
181
|
+
const pct = parseNumberValue();
|
|
182
|
+
people.archetypes[archetype] = pct;
|
|
183
|
+
}
|
|
184
|
+
expect("RBRACE");
|
|
176
185
|
} else
|
|
177
186
|
throw new Error(
|
|
178
187
|
`Unexpected '${kw.value}' in people at line ${kw.line}`,
|
|
@@ -199,6 +208,10 @@ export function parse(tokens) {
|
|
|
199
208
|
else if (kw.value === "prose_topic")
|
|
200
209
|
proj.prose_topic = parseStringValue();
|
|
201
210
|
else if (kw.value === "prose_tone") proj.prose_tone = parseStringValue();
|
|
211
|
+
else if (kw.value === "milestones") proj.milestones = parseArray();
|
|
212
|
+
else if (kw.value === "risks") proj.risks = parseArray();
|
|
213
|
+
else if (kw.value === "technical_choices")
|
|
214
|
+
proj.technical_choices = parseArray();
|
|
202
215
|
else
|
|
203
216
|
throw new Error(
|
|
204
217
|
`Unexpected '${kw.value}' in project at line ${kw.line}`,
|
|
@@ -263,6 +276,8 @@ export function parse(tokens) {
|
|
|
263
276
|
while (peek().type !== "RBRACE") {
|
|
264
277
|
const kw = advance();
|
|
265
278
|
if (kw.value === "name") scenario.name = parseStringValue();
|
|
279
|
+
else if (kw.value === "narrative")
|
|
280
|
+
scenario.narrative = parseStringValue();
|
|
266
281
|
else if (kw.value === "timerange_start")
|
|
267
282
|
scenario.timerange_start = parseDateValue();
|
|
268
283
|
else if (kw.value === "timerange_end")
|
|
@@ -634,7 +649,16 @@ export function parse(tokens) {
|
|
|
634
649
|
content.briefings_per_persona = parseNumberValue();
|
|
635
650
|
else if (kw.value === "notes_per_persona")
|
|
636
651
|
content.notes_per_persona = parseNumberValue();
|
|
637
|
-
else
|
|
652
|
+
else if (kw.value === "blog_topics") {
|
|
653
|
+
expect("LBRACE");
|
|
654
|
+
content.blog_topics = {};
|
|
655
|
+
while (peek().type !== "RBRACE") {
|
|
656
|
+
const topic = parseStringOrIdent();
|
|
657
|
+
const pct = parseNumberValue();
|
|
658
|
+
content.blog_topics[topic] = pct;
|
|
659
|
+
}
|
|
660
|
+
expect("RBRACE");
|
|
661
|
+
} else
|
|
638
662
|
throw new Error(
|
|
639
663
|
`Unexpected '${kw.value}' in content at line ${kw.line}`,
|
|
640
664
|
);
|
package/engine/entities.js
CHANGED
|
@@ -15,39 +15,42 @@ import {
|
|
|
15
15
|
* Build all entities from AST and RNG.
|
|
16
16
|
* @param {import('../dsl/parser.js').UniverseAST} ast
|
|
17
17
|
* @param {import('./rng.js').SeededRNG} rng
|
|
18
|
+
* @param {object} [logger] - Logger instance for warnings
|
|
18
19
|
* @returns {{ orgs: object[], departments: object[], teams: object[], people: object[], projects: object[] }}
|
|
19
20
|
*/
|
|
20
|
-
export function buildEntities(ast, rng) {
|
|
21
|
+
export function buildEntities(ast, rng, logger) {
|
|
21
22
|
const domain = ast.domain;
|
|
22
23
|
const orgs = ast.orgs.map((o) => ({
|
|
23
24
|
...o,
|
|
24
|
-
iri: `https://${domain}/org/${o.id}`,
|
|
25
|
+
iri: `https://${domain}/id/org/${o.id}`,
|
|
25
26
|
}));
|
|
26
27
|
const departments = ast.departments.map((d) => ({
|
|
27
28
|
...d,
|
|
28
|
-
iri: `https://${domain}/department/${d.id}`,
|
|
29
|
+
iri: `https://${domain}/id/department/${d.id}`,
|
|
29
30
|
}));
|
|
30
31
|
const teams = ast.teams.map((t) => ({
|
|
31
32
|
...t,
|
|
32
33
|
repos: t.repos || [],
|
|
33
|
-
iri: `https://${domain}/team/${t.id}`,
|
|
34
|
+
iri: `https://${domain}/id/team/${t.id}`,
|
|
34
35
|
getdx_team_id: `gdx_team_${t.id}`,
|
|
35
36
|
}));
|
|
36
|
-
const people = generatePeople(ast, rng, teams, domain);
|
|
37
|
+
const people = generatePeople(ast, rng, teams, domain, logger);
|
|
37
38
|
const projects = ast.projects.map((p) => ({
|
|
38
39
|
...p,
|
|
39
40
|
teams: p.teams || [],
|
|
40
41
|
phase: p.phase || null,
|
|
41
42
|
prose_topic: p.prose_topic || null,
|
|
42
43
|
prose_tone: p.prose_tone || null,
|
|
43
|
-
iri: `https://${domain}/project/${p.id}`,
|
|
44
|
+
iri: `https://${domain}/id/project/${p.id}`,
|
|
44
45
|
}));
|
|
45
46
|
|
|
46
47
|
return { orgs, departments, teams, people, projects };
|
|
47
48
|
}
|
|
48
49
|
|
|
49
|
-
function generatePeople(ast, rng, teams, domain) {
|
|
50
|
-
const { count, distribution, disciplines } = ast.people;
|
|
50
|
+
function generatePeople(ast, rng, teams, domain, logger) {
|
|
51
|
+
const { count, distribution, disciplines, archetypes } = ast.people;
|
|
52
|
+
const archetypeKeys = archetypes ? Object.keys(archetypes) : [];
|
|
53
|
+
const archetypeWeights = archetypes ? Object.values(archetypes) : [];
|
|
51
54
|
const people = [];
|
|
52
55
|
const usedNames = new Set();
|
|
53
56
|
|
|
@@ -71,6 +74,9 @@ function generatePeople(ast, rng, teams, domain) {
|
|
|
71
74
|
for (const team of teams) {
|
|
72
75
|
if (!team.manager) continue;
|
|
73
76
|
const name = managerAssignments.get(team.id);
|
|
77
|
+
const archetype = archetypeKeys.length
|
|
78
|
+
? archetypeKeys[rng.weightedPick(archetypeWeights)]
|
|
79
|
+
: "steady_contributor";
|
|
74
80
|
people.push(
|
|
75
81
|
makePerson(
|
|
76
82
|
name,
|
|
@@ -80,6 +86,8 @@ function generatePeople(ast, rng, teams, domain) {
|
|
|
80
86
|
domain,
|
|
81
87
|
true,
|
|
82
88
|
null,
|
|
89
|
+
undefined,
|
|
90
|
+
archetype,
|
|
83
91
|
),
|
|
84
92
|
);
|
|
85
93
|
}
|
|
@@ -92,6 +100,9 @@ function generatePeople(ast, rng, teams, domain) {
|
|
|
92
100
|
const disc = discKeys[rng.weightedPick(discWeights)];
|
|
93
101
|
const team = rng.pick(teams);
|
|
94
102
|
const mgr = people.find((p) => p.is_manager && p.team_id === team.id);
|
|
103
|
+
const archetype = archetypeKeys.length
|
|
104
|
+
? archetypeKeys[rng.weightedPick(archetypeWeights)]
|
|
105
|
+
: "steady_contributor";
|
|
95
106
|
people.push(
|
|
96
107
|
makePerson(
|
|
97
108
|
name,
|
|
@@ -102,10 +113,17 @@ function generatePeople(ast, rng, teams, domain) {
|
|
|
102
113
|
false,
|
|
103
114
|
mgr?.email || null,
|
|
104
115
|
`2023-${pad2(rng.randomInt(1, 12))}-${pad2(rng.randomInt(1, 28))}`,
|
|
116
|
+
archetype,
|
|
105
117
|
),
|
|
106
118
|
);
|
|
107
119
|
}
|
|
108
120
|
|
|
121
|
+
if (people.length < count && logger) {
|
|
122
|
+
logger.warn(
|
|
123
|
+
`People shortfall: requested ${count}, generated ${people.length} (name pool exhausted)`,
|
|
124
|
+
);
|
|
125
|
+
}
|
|
126
|
+
|
|
109
127
|
return people;
|
|
110
128
|
}
|
|
111
129
|
|
|
@@ -118,6 +136,7 @@ function makePerson(
|
|
|
118
136
|
isManager,
|
|
119
137
|
managerEmail,
|
|
120
138
|
hireDate = "2023-01-15",
|
|
139
|
+
archetype = "steady_contributor",
|
|
121
140
|
) {
|
|
122
141
|
const id = name.toLowerCase().replace(/\s+/g, "-");
|
|
123
142
|
return {
|
|
@@ -134,7 +153,8 @@ function makePerson(
|
|
|
134
153
|
is_manager: isManager,
|
|
135
154
|
manager_email: managerEmail,
|
|
136
155
|
hire_date: hireDate,
|
|
137
|
-
|
|
156
|
+
archetype,
|
|
157
|
+
iri: `https://${domain}/id/person/${id}`,
|
|
138
158
|
};
|
|
139
159
|
}
|
|
140
160
|
|
package/engine/prose-keys.js
CHANGED
|
@@ -11,13 +11,15 @@
|
|
|
11
11
|
*/
|
|
12
12
|
export function collectProseKeys(entities) {
|
|
13
13
|
const keys = new Map();
|
|
14
|
+
const orgName = entities.orgs[0]?.name || "BioNova";
|
|
14
15
|
|
|
15
16
|
// Organization README prose
|
|
16
17
|
keys.set("org_readme", {
|
|
17
|
-
topic: `${
|
|
18
|
+
topic: `${orgName} company overview`,
|
|
18
19
|
tone: "corporate, informative",
|
|
19
20
|
length: "3-4 paragraphs",
|
|
20
21
|
domain: entities.domain,
|
|
22
|
+
orgName,
|
|
21
23
|
});
|
|
22
24
|
|
|
23
25
|
// Project descriptions
|
|
@@ -28,6 +30,7 @@ export function collectProseKeys(entities) {
|
|
|
28
30
|
tone: proj.prose_tone || "technical",
|
|
29
31
|
length: "2-3 paragraphs",
|
|
30
32
|
domain: entities.domain,
|
|
33
|
+
orgName,
|
|
31
34
|
});
|
|
32
35
|
}
|
|
33
36
|
}
|
|
@@ -41,6 +44,7 @@ export function collectProseKeys(entities) {
|
|
|
41
44
|
tone: "technical, informative",
|
|
42
45
|
length: "6-8 paragraphs",
|
|
43
46
|
domain: entities.domain,
|
|
47
|
+
orgName,
|
|
44
48
|
});
|
|
45
49
|
}
|
|
46
50
|
|
|
@@ -51,6 +55,7 @@ export function collectProseKeys(entities) {
|
|
|
51
55
|
tone: "conversational, technical",
|
|
52
56
|
length: "4-5 paragraphs",
|
|
53
57
|
domain: entities.domain,
|
|
58
|
+
orgName,
|
|
54
59
|
});
|
|
55
60
|
}
|
|
56
61
|
|
|
@@ -61,6 +66,7 @@ export function collectProseKeys(entities) {
|
|
|
61
66
|
tone: "helpful, concise",
|
|
62
67
|
length: "1 paragraph",
|
|
63
68
|
domain: entities.domain,
|
|
69
|
+
orgName,
|
|
64
70
|
});
|
|
65
71
|
}
|
|
66
72
|
|
|
@@ -71,6 +77,7 @@ export function collectProseKeys(entities) {
|
|
|
71
77
|
tone: "instructional",
|
|
72
78
|
length: "5-6 paragraphs",
|
|
73
79
|
domain: entities.domain,
|
|
80
|
+
orgName,
|
|
74
81
|
});
|
|
75
82
|
}
|
|
76
83
|
|
|
@@ -82,6 +89,7 @@ export function collectProseKeys(entities) {
|
|
|
82
89
|
length: "1-2 sentences",
|
|
83
90
|
maxTokens: 100,
|
|
84
91
|
domain: entities.domain,
|
|
92
|
+
orgName,
|
|
85
93
|
});
|
|
86
94
|
}
|
|
87
95
|
|
|
@@ -93,6 +101,7 @@ export function collectProseKeys(entities) {
|
|
|
93
101
|
length: "1-2 sentences",
|
|
94
102
|
maxTokens: 80,
|
|
95
103
|
domain: entities.domain,
|
|
104
|
+
orgName,
|
|
96
105
|
});
|
|
97
106
|
}
|
|
98
107
|
}
|
|
@@ -111,6 +120,7 @@ export function collectProseKeys(entities) {
|
|
|
111
120
|
tone: "professional, concise",
|
|
112
121
|
length: "2-3 paragraphs",
|
|
113
122
|
domain: entities.domain,
|
|
123
|
+
orgName,
|
|
114
124
|
role: `${persona.level} ${persona.discipline}`,
|
|
115
125
|
});
|
|
116
126
|
}
|
|
@@ -122,6 +132,7 @@ export function collectProseKeys(entities) {
|
|
|
122
132
|
tone: "personal, technical",
|
|
123
133
|
length: "1-2 paragraphs",
|
|
124
134
|
domain: entities.domain,
|
|
135
|
+
orgName,
|
|
125
136
|
role: `${persona.level} ${persona.discipline}`,
|
|
126
137
|
});
|
|
127
138
|
}
|
|
@@ -141,6 +152,7 @@ export function collectProseKeys(entities) {
|
|
|
141
152
|
length: "1-2 sentences",
|
|
142
153
|
maxTokens: 80,
|
|
143
154
|
domain: entities.domain,
|
|
155
|
+
orgName,
|
|
144
156
|
role: `${ck.person_level} ${ck.person_discipline.replace(/_/g, " ")} on the ${ck.team_name}`,
|
|
145
157
|
scenario: ck.scenario_name,
|
|
146
158
|
driver: ck.driver_name,
|
package/engine/tier0.js
CHANGED
package/index.js
CHANGED
|
@@ -2,6 +2,11 @@ export { DslParser, createDslParser } from "./dsl/index.js";
|
|
|
2
2
|
export { EntityGenerator, createEntityGenerator } from "./engine/tier0.js";
|
|
3
3
|
export { createSeededRNG } from "./engine/rng.js";
|
|
4
4
|
export { collectProseKeys } from "./engine/prose-keys.js";
|
|
5
|
+
export {
|
|
6
|
+
PROFICIENCY_LEVELS,
|
|
7
|
+
MATURITY_LEVELS,
|
|
8
|
+
STAGE_NAMES,
|
|
9
|
+
} from "./vocabulary.js";
|
|
5
10
|
export { FakerTool, createFakerTool } from "./tools/faker.js";
|
|
6
11
|
export { SyntheaTool, createSyntheaTool } from "./tools/synthea.js";
|
|
7
12
|
export { SdvTool, createSdvTool } from "./tools/sdv.js";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forwardimpact/libsyntheticgen",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "DSL parsing and deterministic entity generation for synthetic data",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"repository": {
|
|
@@ -16,6 +16,8 @@
|
|
|
16
16
|
"./engine": "./engine/tier0.js",
|
|
17
17
|
"./engine/entities": "./engine/entities.js",
|
|
18
18
|
"./engine/activity": "./engine/activity.js",
|
|
19
|
+
"./vocabulary": "./vocabulary.js",
|
|
20
|
+
"./vocabulary.js": "./vocabulary.js",
|
|
19
21
|
"./rng": "./engine/rng.js",
|
|
20
22
|
"./tools/faker": "./tools/faker.js",
|
|
21
23
|
"./tools/synthea": "./tools/synthea.js",
|
package/vocabulary.js
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared vocabulary constants for synthetic data generation.
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth for proficiency levels, maturity levels,
|
|
5
|
+
* and stage names used across libsyntheticgen, libsyntheticprose,
|
|
6
|
+
* and libsyntheticrender.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/** @type {string[]} */
|
|
10
|
+
export const PROFICIENCY_LEVELS = [
|
|
11
|
+
"awareness",
|
|
12
|
+
"foundational",
|
|
13
|
+
"working",
|
|
14
|
+
"practitioner",
|
|
15
|
+
"expert",
|
|
16
|
+
];
|
|
17
|
+
|
|
18
|
+
/** @type {string[]} */
|
|
19
|
+
export const MATURITY_LEVELS = [
|
|
20
|
+
"emerging",
|
|
21
|
+
"developing",
|
|
22
|
+
"practicing",
|
|
23
|
+
"role_modeling",
|
|
24
|
+
"exemplifying",
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
/** @type {string[]} */
|
|
28
|
+
export const STAGE_NAMES = [
|
|
29
|
+
"specify",
|
|
30
|
+
"plan",
|
|
31
|
+
"scaffold",
|
|
32
|
+
"code",
|
|
33
|
+
"review",
|
|
34
|
+
"deploy",
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
/** @type {Record<string, number>} */
|
|
38
|
+
const PROFICIENCY_INDEX = Object.fromEntries(
|
|
39
|
+
PROFICIENCY_LEVELS.map((p, i) => [p, i]),
|
|
40
|
+
);
|
|
41
|
+
|
|
42
|
+
/** @type {Record<string, number>} */
|
|
43
|
+
const ARCHETYPE_OFFSET = {
|
|
44
|
+
high_performer: 1,
|
|
45
|
+
steady_contributor: 0,
|
|
46
|
+
new_hire: -1,
|
|
47
|
+
struggling: -2,
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Adjust a proficiency level based on a person's archetype.
|
|
52
|
+
* @param {string} expected - Base proficiency level
|
|
53
|
+
* @param {string} archetype - Person archetype
|
|
54
|
+
* @returns {string} Adjusted proficiency level
|
|
55
|
+
*/
|
|
56
|
+
export function adjustProficiency(expected, archetype) {
|
|
57
|
+
const base = PROFICIENCY_INDEX[expected];
|
|
58
|
+
if (base === undefined) return expected;
|
|
59
|
+
const offset = ARCHETYPE_OFFSET[archetype] || 0;
|
|
60
|
+
const clamped = Math.max(
|
|
61
|
+
0,
|
|
62
|
+
Math.min(PROFICIENCY_LEVELS.length - 1, base + offset),
|
|
63
|
+
);
|
|
64
|
+
return PROFICIENCY_LEVELS[clamped];
|
|
65
|
+
}
|