@forwardimpact/libsyntheticgen 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/dsl/index.js +36 -0
- package/dsl/parser.js +728 -0
- package/dsl/tokenizer.js +282 -0
- package/engine/activity.js +956 -0
- package/engine/entities.js +144 -0
- package/engine/names.js +290 -0
- package/engine/prose-keys.js +182 -0
- package/engine/rng.js +43 -0
- package/engine/tier0.js +63 -0
- package/index.js +7 -0
- package/package.json +35 -0
- package/test/activity.test.js +322 -0
- package/test/faker.test.js +98 -0
- package/test/parser-dataset.test.js +142 -0
- package/test/parser.test.js +596 -0
- package/test/rng.test.js +236 -0
- package/test/sdv.test.js +67 -0
- package/test/synthea.test.js +95 -0
- package/test/tokenizer.test.js +266 -0
- package/tools/faker.js +83 -0
- package/tools/sdv.js +93 -0
- package/tools/sdv_generate.py +29 -0
- package/tools/synthea.js +126 -0
package/dsl/tokenizer.js
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DSL Tokenizer — converts universe DSL source to token stream.
|
|
3
|
+
*
|
|
4
|
+
* Token types:
|
|
5
|
+
* KEYWORD - reserved words (universe, department, team, etc.)
|
|
6
|
+
* IDENT - identifiers (variable / entity names)
|
|
7
|
+
* STRING - double-quoted string literals
|
|
8
|
+
* NUMBER - integer or decimal numbers
|
|
9
|
+
* PERCENT - number followed by %
|
|
10
|
+
* DATE - YYYY-MM format
|
|
11
|
+
* AT_IDENT - @name references
|
|
12
|
+
* LBRACE - {
|
|
13
|
+
* RBRACE - }
|
|
14
|
+
* LBRACKET - [
|
|
15
|
+
* RBRACKET - ]
|
|
16
|
+
* COMMA - ,
|
|
17
|
+
* EOF - end of input
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
const KEYWORDS = new Set([
|
|
21
|
+
"universe",
|
|
22
|
+
"domain",
|
|
23
|
+
"industry",
|
|
24
|
+
"seed",
|
|
25
|
+
"org",
|
|
26
|
+
"department",
|
|
27
|
+
"team",
|
|
28
|
+
"name",
|
|
29
|
+
"location",
|
|
30
|
+
"parent",
|
|
31
|
+
"headcount",
|
|
32
|
+
"size",
|
|
33
|
+
"manager",
|
|
34
|
+
"repos",
|
|
35
|
+
"people",
|
|
36
|
+
"count",
|
|
37
|
+
"names",
|
|
38
|
+
"distribution",
|
|
39
|
+
"disciplines",
|
|
40
|
+
"project",
|
|
41
|
+
"type",
|
|
42
|
+
"phase",
|
|
43
|
+
"teams",
|
|
44
|
+
"timeline_start",
|
|
45
|
+
"timeline_end",
|
|
46
|
+
"prose_topic",
|
|
47
|
+
"prose_tone",
|
|
48
|
+
"snapshots",
|
|
49
|
+
"quarterly_from",
|
|
50
|
+
"quarterly_to",
|
|
51
|
+
"account_id",
|
|
52
|
+
"scenario",
|
|
53
|
+
"timerange_start",
|
|
54
|
+
"timerange_end",
|
|
55
|
+
"affect",
|
|
56
|
+
"github_commits",
|
|
57
|
+
"github_prs",
|
|
58
|
+
"dx_drivers",
|
|
59
|
+
"trajectory",
|
|
60
|
+
"magnitude",
|
|
61
|
+
"evidence_skills",
|
|
62
|
+
"evidence_floor",
|
|
63
|
+
"framework",
|
|
64
|
+
"proficiencies",
|
|
65
|
+
"maturities",
|
|
66
|
+
"capabilities",
|
|
67
|
+
"levels",
|
|
68
|
+
"behaviours",
|
|
69
|
+
"drivers",
|
|
70
|
+
"tracks",
|
|
71
|
+
"stages",
|
|
72
|
+
"skills",
|
|
73
|
+
"title",
|
|
74
|
+
"rank",
|
|
75
|
+
"experience",
|
|
76
|
+
"roleTitle",
|
|
77
|
+
"specialization",
|
|
78
|
+
"isProfessional",
|
|
79
|
+
"core",
|
|
80
|
+
"supporting",
|
|
81
|
+
"broad",
|
|
82
|
+
"validTracks",
|
|
83
|
+
"content",
|
|
84
|
+
"articles",
|
|
85
|
+
"article_topics",
|
|
86
|
+
"blogs",
|
|
87
|
+
"faqs",
|
|
88
|
+
"howtos",
|
|
89
|
+
"howto_topics",
|
|
90
|
+
"reviews",
|
|
91
|
+
"comments",
|
|
92
|
+
"courses",
|
|
93
|
+
"events",
|
|
94
|
+
"personas",
|
|
95
|
+
"persona_levels",
|
|
96
|
+
"briefings_per_persona",
|
|
97
|
+
"notes_per_persona",
|
|
98
|
+
"comments_per_snapshot",
|
|
99
|
+
// Dataset and output blocks
|
|
100
|
+
"dataset",
|
|
101
|
+
"tool",
|
|
102
|
+
"population",
|
|
103
|
+
"modules",
|
|
104
|
+
"metadata",
|
|
105
|
+
"data",
|
|
106
|
+
"rows",
|
|
107
|
+
"fields",
|
|
108
|
+
"output",
|
|
109
|
+
"table",
|
|
110
|
+
"path",
|
|
111
|
+
"json",
|
|
112
|
+
"yaml",
|
|
113
|
+
"csv",
|
|
114
|
+
"markdown",
|
|
115
|
+
"parquet",
|
|
116
|
+
"sql",
|
|
117
|
+
]);
|
|
118
|
+
|
|
119
|
+
const DATE_RE = /^\d{4}-\d{2}$/;
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* @typedef {{ type: string, value: string, line: number }} Token
|
|
123
|
+
*/
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Tokenize DSL source into a token stream.
|
|
127
|
+
* @param {string} source
|
|
128
|
+
* @returns {Token[]}
|
|
129
|
+
*/
|
|
130
|
+
export function tokenize(source) {
|
|
131
|
+
const tokens = [];
|
|
132
|
+
let i = 0;
|
|
133
|
+
let line = 1;
|
|
134
|
+
|
|
135
|
+
while (i < source.length) {
|
|
136
|
+
// Skip whitespace
|
|
137
|
+
if (source[i] === " " || source[i] === "\t" || source[i] === "\r") {
|
|
138
|
+
i++;
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Newline
|
|
143
|
+
if (source[i] === "\n") {
|
|
144
|
+
line++;
|
|
145
|
+
i++;
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Single-line comment
|
|
150
|
+
if (source[i] === "/" && source[i + 1] === "/") {
|
|
151
|
+
while (i < source.length && source[i] !== "\n") i++;
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Multi-line comment
|
|
156
|
+
if (source[i] === "/" && source[i + 1] === "*") {
|
|
157
|
+
i += 2;
|
|
158
|
+
while (
|
|
159
|
+
i < source.length - 1 &&
|
|
160
|
+
!(source[i] === "*" && source[i + 1] === "/")
|
|
161
|
+
) {
|
|
162
|
+
if (source[i] === "\n") line++;
|
|
163
|
+
i++;
|
|
164
|
+
}
|
|
165
|
+
i += 2;
|
|
166
|
+
continue;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// String literal
|
|
170
|
+
if (source[i] === '"') {
|
|
171
|
+
i++;
|
|
172
|
+
let str = "";
|
|
173
|
+
while (i < source.length && source[i] !== '"') {
|
|
174
|
+
if (source[i] === "\\" && i + 1 < source.length) {
|
|
175
|
+
i++;
|
|
176
|
+
if (source[i] === "n") str += "\n";
|
|
177
|
+
else if (source[i] === "t") str += "\t";
|
|
178
|
+
else str += source[i];
|
|
179
|
+
} else {
|
|
180
|
+
str += source[i];
|
|
181
|
+
}
|
|
182
|
+
i++;
|
|
183
|
+
}
|
|
184
|
+
i++; // closing quote
|
|
185
|
+
tokens.push({ type: "STRING", value: str, line });
|
|
186
|
+
continue;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Braces and brackets
|
|
190
|
+
if (source[i] === "{") {
|
|
191
|
+
tokens.push({ type: "LBRACE", value: "{", line });
|
|
192
|
+
i++;
|
|
193
|
+
continue;
|
|
194
|
+
}
|
|
195
|
+
if (source[i] === "}") {
|
|
196
|
+
tokens.push({ type: "RBRACE", value: "}", line });
|
|
197
|
+
i++;
|
|
198
|
+
continue;
|
|
199
|
+
}
|
|
200
|
+
if (source[i] === "[") {
|
|
201
|
+
tokens.push({ type: "LBRACKET", value: "[", line });
|
|
202
|
+
i++;
|
|
203
|
+
continue;
|
|
204
|
+
}
|
|
205
|
+
if (source[i] === "]") {
|
|
206
|
+
tokens.push({ type: "RBRACKET", value: "]", line });
|
|
207
|
+
i++;
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
210
|
+
if (source[i] === ",") {
|
|
211
|
+
tokens.push({ type: "COMMA", value: ",", line });
|
|
212
|
+
i++;
|
|
213
|
+
continue;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// @identifier
|
|
217
|
+
if (source[i] === "@") {
|
|
218
|
+
i++;
|
|
219
|
+
let name = "";
|
|
220
|
+
while (i < source.length && /[a-zA-Z0-9_]/.test(source[i])) {
|
|
221
|
+
name += source[i];
|
|
222
|
+
i++;
|
|
223
|
+
}
|
|
224
|
+
tokens.push({ type: "AT_IDENT", value: name, line });
|
|
225
|
+
continue;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Number, percent, date, or negative number
|
|
229
|
+
if (/[\d-]/.test(source[i])) {
|
|
230
|
+
let num = "";
|
|
231
|
+
if (source[i] === "-") {
|
|
232
|
+
num += "-";
|
|
233
|
+
i++;
|
|
234
|
+
}
|
|
235
|
+
while (i < source.length && /[\d.]/.test(source[i])) {
|
|
236
|
+
num += source[i];
|
|
237
|
+
i++;
|
|
238
|
+
}
|
|
239
|
+
// Check for date (YYYY-MM)
|
|
240
|
+
if (source[i] === "-" && /^\d{4}$/.test(num)) {
|
|
241
|
+
num += "-";
|
|
242
|
+
i++;
|
|
243
|
+
while (i < source.length && /\d/.test(source[i])) {
|
|
244
|
+
num += source[i];
|
|
245
|
+
i++;
|
|
246
|
+
}
|
|
247
|
+
if (DATE_RE.test(num)) {
|
|
248
|
+
tokens.push({ type: "DATE", value: num, line });
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
// Check for percent
|
|
253
|
+
if (source[i] === "%") {
|
|
254
|
+
tokens.push({ type: "PERCENT", value: num, line });
|
|
255
|
+
i++;
|
|
256
|
+
continue;
|
|
257
|
+
}
|
|
258
|
+
tokens.push({ type: "NUMBER", value: num, line });
|
|
259
|
+
continue;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Identifier or keyword
|
|
263
|
+
if (/[a-zA-Z_]/.test(source[i])) {
|
|
264
|
+
let word = "";
|
|
265
|
+
while (i < source.length && /[a-zA-Z0-9_]/.test(source[i])) {
|
|
266
|
+
word += source[i];
|
|
267
|
+
i++;
|
|
268
|
+
}
|
|
269
|
+
if (KEYWORDS.has(word)) {
|
|
270
|
+
tokens.push({ type: "KEYWORD", value: word, line });
|
|
271
|
+
} else {
|
|
272
|
+
tokens.push({ type: "IDENT", value: word, line });
|
|
273
|
+
}
|
|
274
|
+
continue;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
throw new Error(`Unexpected character '${source[i]}' at line ${line}`);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
tokens.push({ type: "EOF", value: "", line });
|
|
281
|
+
return tokens;
|
|
282
|
+
}
|