@forwardimpact/libuniverse 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,8 +3,11 @@
3
3
  // fit-universe CLI — run with --help for usage.
4
4
 
5
5
  import { resolve, join, dirname } from "path";
6
- import { mkdir, writeFile } from "fs/promises";
6
+ import { mkdir, writeFile, readFile, readdir, mkdtemp, rm } from "fs/promises";
7
7
  import { fileURLToPath } from "url";
8
+ import { execFile } from "child_process";
9
+ import { promisify } from "util";
10
+ import { tmpdir } from "os";
8
11
  import { format } from "prettier";
9
12
  import { createScriptConfig } from "@forwardimpact/libconfig";
10
13
  import { createLogger } from "@forwardimpact/libtelemetry";
@@ -14,6 +17,9 @@ import { TemplateLoader } from "@forwardimpact/libtemplate/loader";
14
17
  import {
15
18
  createDslParser,
16
19
  createEntityGenerator,
20
+ FakerTool,
21
+ SyntheaTool,
22
+ SdvTool,
17
23
  } from "@forwardimpact/libsyntheticgen";
18
24
  import {
19
25
  ProseEngine,
@@ -46,7 +52,11 @@ async function main() {
46
52
  SUPABASE_SERVICE_ROLE_KEY: null,
47
53
  });
48
54
 
49
- const mode = args.cached ? "cached" : args.generate ? "generate" : "no-prose";
55
+ const mode = args.noProse
56
+ ? "no-prose"
57
+ : args.generate
58
+ ? "generate"
59
+ : "cached";
50
60
 
51
61
  let llmApi = null;
52
62
  if (mode === "generate") {
@@ -100,6 +110,42 @@ async function main() {
100
110
  const validator = new ContentValidator(logger);
101
111
  const formatter = new ContentFormatter(format, logger);
102
112
 
113
+ const execFileFn = promisify(execFile);
114
+
115
+ /**
116
+ * Create a tool instance by name.
117
+ * @param {string} name
118
+ * @param {object} deps
119
+ * @returns {object}
120
+ */
121
+ function toolFactory(name, deps) {
122
+ switch (name) {
123
+ case "faker":
124
+ return new FakerTool({ logger: deps.logger });
125
+ case "synthea":
126
+ return new SyntheaTool({
127
+ logger: deps.logger,
128
+ syntheaJar:
129
+ process.env.SYNTHEA_JAR || "synthea-with-dependencies.jar",
130
+ execFileFn,
131
+ fsFns: {
132
+ readFile,
133
+ readdir,
134
+ mkdtemp: (prefix) => mkdtemp(join(tmpdir(), prefix)),
135
+ rm,
136
+ },
137
+ });
138
+ case "sdv":
139
+ return new SdvTool({
140
+ logger: deps.logger,
141
+ execFileFn,
142
+ fsFns: { writeFile, rm },
143
+ });
144
+ default:
145
+ throw new Error(`Unknown tool: ${name}`);
146
+ }
147
+ }
148
+
103
149
  const pipeline = new Pipeline({
104
150
  dslParser,
105
151
  entityGenerator,
@@ -108,19 +154,13 @@ async function main() {
108
154
  renderer,
109
155
  validator,
110
156
  formatter,
157
+ toolFactory,
111
158
  logger,
112
159
  });
113
160
 
114
161
  const result = await pipeline.run({
115
162
  universePath:
116
- args.universe ||
117
- join(
118
- dirname(
119
- fileURLToPath(import.meta.resolve("@forwardimpact/libsyntheticgen")),
120
- ),
121
- "data",
122
- "default.dsl",
123
- ),
163
+ args.universe || join(monorepoRoot, "examples", "universe.dsl"),
124
164
  only: args.only || null,
125
165
  schemaDir,
126
166
  });
@@ -154,26 +194,19 @@ async function main() {
154
194
  }
155
195
  } else if (!args.dryRun) {
156
196
  for (const [storagePath, content] of result.rawDocuments) {
157
- const fullPath = join(
158
- monorepoRoot,
159
- "examples/activity/raw",
160
- storagePath,
161
- );
197
+ const fullPath = join(monorepoRoot, "data/activity/raw", storagePath);
162
198
  await mkdir(dirname(fullPath), { recursive: true });
163
199
  await writeFile(fullPath, content);
164
200
  }
165
201
  console.log(
166
- `${result.rawDocuments.size} raw documents written to examples/activity/raw/`,
202
+ `${result.rawDocuments.size} raw documents written to data/activity/raw/`,
167
203
  );
168
204
  }
169
205
 
170
206
  // Write evidence directly (no raw source system for evidence)
171
207
  const evidence = result.entities.activity?.evidence;
172
208
  if (evidence && !args.dryRun && !args.load) {
173
- const evidencePath = join(
174
- monorepoRoot,
175
- "examples/activity/evidence.json",
176
- );
209
+ const evidencePath = join(monorepoRoot, "data/activity/evidence.json");
177
210
  await mkdir(dirname(evidencePath), { recursive: true });
178
211
  const formatted = await formatContent(
179
212
  evidencePath,
@@ -203,6 +236,16 @@ async function main() {
203
236
  console.log(` ${icon} ${check.name}`);
204
237
  }
205
238
 
239
+ // Prose cache stats
240
+ const { hits, generated, misses } = result.stats.prose;
241
+ const proseTotal = hits + generated + misses;
242
+ if (proseTotal > 0) {
243
+ const rate = Math.round((hits / proseTotal) * 100);
244
+ console.log(
245
+ `\nProse: ${hits} hits, ${generated} generated, ${misses} misses (${rate}% hit rate)`,
246
+ );
247
+ }
248
+
206
249
  if (!result.validation.passed) {
207
250
  console.error(`\n${result.validation.failures} validation failures`);
208
251
  process.exit(1);
@@ -217,7 +260,7 @@ function parseArgs(argv) {
217
260
  const args = {};
218
261
  for (const arg of argv) {
219
262
  if (arg === "--help" || arg === "-h") args.help = true;
220
- else if (arg === "--cached") args.cached = true;
263
+ else if (arg === "--no-prose") args.noProse = true;
221
264
  else if (arg === "--generate") args.generate = true;
222
265
  else if (arg === "--strict") args.strict = true;
223
266
  else if (arg === "--dry-run") args.dryRun = true;
@@ -235,9 +278,9 @@ Usage:
235
278
  npx fit-universe [options]
236
279
 
237
280
  Options:
238
- --generate Generate prose via LLM (requires LLM_TOKEN)
239
- --cached Use cached prose from .prose-cache.json
240
- --strict Fail on cache miss (use with --cached)
281
+ --generate Generate prose via LLM and update cache (requires LLM_TOKEN)
282
+ --no-prose Skip prose entirely (structural scaffolding only)
283
+ --strict Fail on cache miss (use with default cached mode)
241
284
  --dry-run Show what would be written without writing
242
285
  --load Load raw documents to Supabase Storage
243
286
  --only=<type> Render only one content type (html|pathway|raw|markdown)
@@ -245,20 +288,21 @@ Options:
245
288
  -h, --help Show this help message
246
289
 
247
290
  Prose modes:
248
- (default) Structural generation only, no LLM calls
249
- --cached Read prose from .prose-cache.json
250
- --generate Call LLM to generate prose, write to cache
291
+ (default) Use cached prose from .prose-cache.json
292
+ --generate Call LLM to generate prose and update the cache
293
+ --no-prose No prose produces minimal structural data only
251
294
 
252
295
  Content types:
253
- html Organizational articles, guides, FAQs (examples/organizational)
254
- pathway YAML framework files (examples/pathway)
255
- raw Roster, GitHub events, evidence (examples/activity)
256
- markdown Briefings, notes, KB content (examples/personal)
296
+ html Organizational articles, guides, FAQs (data/knowledge)
297
+ pathway YAML framework files (data/pathway)
298
+ raw Roster, GitHub events, evidence (data/activity)
299
+ markdown Briefings, notes, KB content (data/personal)
257
300
 
258
301
  Examples:
259
- npx fit-universe # Structural only
260
- npx fit-universe --generate # Full generation with LLM prose
261
- npx fit-universe --cached --strict # Cached prose, fail on miss
302
+ npx fit-universe # Cached prose (default)
303
+ npx fit-universe --generate # Generate new prose via LLM
304
+ npx fit-universe --strict # Cached prose, fail on miss
305
+ npx fit-universe --no-prose # Structural only, no prose
262
306
  npx fit-universe --only=pathway # Generate pathway data only
263
307
  npx fit-universe --universe=custom.dsl # Use custom DSL file
264
308
  `);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libuniverse",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "Synthetic data universe DSL and generation engine",
5
5
  "license": "Apache-2.0",
6
6
  "repository": {
@@ -27,8 +27,8 @@
27
27
  "@forwardimpact/libsyntheticrender": "^0.1.0",
28
28
  "@forwardimpact/libtelemetry": "^0.1.23",
29
29
  "@forwardimpact/libtemplate": "^0.2.0",
30
- "@supabase/supabase-js": "^2.0.0",
31
- "prettier": "^3.7.4"
30
+ "@supabase/supabase-js": "^2.100.1",
31
+ "prettier": "^3.8.1"
32
32
  },
33
33
  "engines": {
34
34
  "node": ">=18.0.0"
package/pipeline.js CHANGED
@@ -6,7 +6,11 @@
6
6
 
7
7
  import { readFile } from "fs/promises";
8
8
  import { join } from "path";
9
- import { validateLinks, validateHTML } from "@forwardimpact/libsyntheticrender";
9
+ import {
10
+ validateLinks,
11
+ validateHTML,
12
+ renderDataset,
13
+ } from "@forwardimpact/libsyntheticrender";
10
14
  import { collectProseKeys } from "@forwardimpact/libsyntheticgen";
11
15
  import { loadSchemas } from "@forwardimpact/libsyntheticprose/pathway";
12
16
 
@@ -24,6 +28,7 @@ export class Pipeline {
24
28
  * @param {import('@forwardimpact/libsyntheticrender').Renderer} deps.renderer - Renderer
25
29
  * @param {import('@forwardimpact/libsyntheticrender').ContentValidator} deps.validator - Content validator
26
30
  * @param {import('@forwardimpact/libsyntheticrender').ContentFormatter} deps.formatter - Content formatter
31
+ * @param {Function} [deps.toolFactory] - (toolName, deps) => tool instance
27
32
  * @param {object} deps.logger - Logger instance
28
33
  */
29
34
  constructor({
@@ -34,6 +39,7 @@ export class Pipeline {
34
39
  renderer,
35
40
  validator,
36
41
  formatter,
42
+ toolFactory,
37
43
  logger,
38
44
  }) {
39
45
  if (!dslParser) throw new Error("dslParser is required");
@@ -52,6 +58,7 @@ export class Pipeline {
52
58
  this.renderer = renderer;
53
59
  this.validator = validator;
54
60
  this.formatter = formatter;
61
+ this.toolFactory = toolFactory || null;
55
62
  this.logger = logger;
56
63
  }
57
64
 
@@ -62,7 +69,7 @@ export class Pipeline {
62
69
  * @param {string} options.universePath - Path to the universe.dsl file
63
70
  * @param {string} [options.only=null] - Render only a specific content type
64
71
  * @param {string|null} [options.schemaDir=null] - Path to JSON schema directory
65
- * @returns {Promise<{files: Map<string,string>, rawDocuments: Map<string,string>, entities: object, validation: object}>}
72
+ * @returns {Promise<{files: Map<string,string>, rawDocuments: Map<string,string>, entities: object, validation: object, stats: {prose: {hits: number, misses: number, generated: number}, files: number, rawDocuments: number}}>}
66
73
  */
67
74
  async run(options) {
68
75
  const { universePath, only = null, schemaDir = null } = options;
@@ -73,27 +80,36 @@ export class Pipeline {
73
80
  const source = await readFile(universePath, "utf-8");
74
81
  const ast = this.dslParser.parse(source);
75
82
 
76
- // 2. Generate entity graph (Tier 0)
77
- log.info("pipeline", "Generating entity graph");
78
- const entities = this.entityGenerator.generate(ast);
79
-
80
- // 3. Prose generation (Tier 1/2)
81
- const proseKeys = collectProseKeys(entities);
83
+ // 2–4. Org-and-pathway generation (only when org blocks are present)
84
+ const hasOrgBlocks = ast.people !== null;
85
+ let entities = { domain: ast.domain, industry: ast.industry };
82
86
  const prose = new Map();
83
- const totalKeys = proseKeys.size;
84
- let keyIndex = 0;
85
- if (this.proseEngine.mode !== "no-prose") {
86
- log.info(
87
- "pipeline",
88
- `Generating prose (${this.proseEngine.mode} mode, ${totalKeys} keys)`,
89
- );
90
- }
91
- for (const [key, context] of proseKeys) {
92
- keyIndex++;
93
- const result = await this.proseEngine.generateProse(key, context);
94
- if (result) prose.set(key, result);
87
+
88
+ if (hasOrgBlocks) {
89
+ // 2. Generate entity graph (Tier 0)
90
+ log.info("pipeline", "Generating entity graph");
91
+ entities = this.entityGenerator.generate(ast);
92
+
93
+ // 3. Prose generation (Tier 1/2)
94
+ const proseKeys = collectProseKeys(entities);
95
+ const totalKeys = proseKeys.size;
96
+ let keyIndex = 0;
95
97
  if (this.proseEngine.mode !== "no-prose") {
96
- log.info("prose", `[${keyIndex}/${totalKeys}] ${key}`);
98
+ log.info(
99
+ "pipeline",
100
+ `Generating prose (${this.proseEngine.mode} mode, ${totalKeys} keys)`,
101
+ );
102
+ }
103
+ for (const [key, context] of proseKeys) {
104
+ keyIndex++;
105
+ const result = await this.proseEngine.generateProse(key, context);
106
+ if (result) prose.set(key, result);
107
+ if (this.proseEngine.mode !== "no-prose") {
108
+ log.info("prose", `[${keyIndex}/${totalKeys}] ${key}`);
109
+ if (keyIndex % 25 === 0) {
110
+ this.proseEngine.saveCache();
111
+ }
112
+ }
97
113
  }
98
114
  }
99
115
 
@@ -102,7 +118,7 @@ export class Pipeline {
102
118
  const rawDocuments = new Map();
103
119
  let htmlLinked = null;
104
120
 
105
- const shouldRender = (type) => !only || only === type;
121
+ const shouldRender = (type) => hasOrgBlocks && (!only || only === type);
106
122
 
107
123
  if (shouldRender("html")) {
108
124
  log.info("render", "Rendering HTML (Pass 1: deterministic skeleton)");
@@ -122,22 +138,27 @@ export class Pipeline {
122
138
  entities.domain,
123
139
  );
124
140
  for (const [name, content] of enriched) {
125
- files.set(join("examples/organizational", name), content);
141
+ files.set(join("data/knowledge", name), content);
126
142
  }
127
143
  } else {
128
144
  for (const [name, content] of htmlFiles) {
129
- files.set(join("examples/organizational", name), content);
145
+ files.set(join("data/knowledge", name), content);
130
146
  }
131
147
  }
132
148
 
133
149
  files.set(
134
- "examples/organizational/README.md",
150
+ "data/knowledge/README.md",
135
151
  this.renderer.renderReadme(entities, prose),
136
152
  );
137
153
  files.set(
138
- "examples/organizational/ONTOLOGY.md",
154
+ "data/knowledge/ONTOLOGY.md",
139
155
  this.renderer.renderOntology(entities),
140
156
  );
157
+
158
+ const htmlCount = [...files.keys()].filter((p) =>
159
+ p.startsWith("data/knowledge/"),
160
+ ).length;
161
+ log.info("render", `HTML: ${htmlCount} files`);
141
162
  }
142
163
 
143
164
  if (shouldRender("pathway")) {
@@ -156,8 +177,9 @@ export class Pipeline {
156
177
  });
157
178
  const pathwayFiles = this.renderer.renderPathway(pathwayData);
158
179
  for (const [name, content] of pathwayFiles) {
159
- files.set(`examples/pathway/${name}`, content);
180
+ files.set(`data/pathway/${name}`, content);
160
181
  }
182
+ log.info("render", `Pathway: ${pathwayFiles.size} files`);
161
183
  }
162
184
  }
163
185
 
@@ -170,28 +192,88 @@ export class Pipeline {
170
192
 
171
193
  const activityFiles = this.renderer.renderActivity(entities);
172
194
  for (const [name, content] of activityFiles) {
173
- files.set(join("examples/activity", name), content);
195
+ files.set(join("data/activity", name), content);
174
196
  }
197
+ log.info(
198
+ "render",
199
+ `Raw: ${raw.size} documents, ${activityFiles.size} activity files`,
200
+ );
175
201
  }
176
202
 
177
203
  if (shouldRender("markdown")) {
178
204
  log.info("render", "Rendering markdown");
179
205
  const md = this.renderer.renderMarkdown(entities, prose);
180
206
  for (const [name, content] of md) {
181
- files.set(join("examples/personal", name), content);
207
+ files.set(join("data/personal", name), content);
208
+ }
209
+ log.info("render", `Markdown: ${md.size} files`);
210
+ }
211
+
212
+ // Dataset tool execution and output rendering
213
+ if (ast.datasets.length > 0 && this.toolFactory) {
214
+ log.info("pipeline", `Generating ${ast.datasets.length} dataset(s)`);
215
+ const datasets = new Map();
216
+ for (const ds of ast.datasets) {
217
+ const tool = this.toolFactory(ds.tool, { logger: log });
218
+ try {
219
+ await tool.checkAvailability();
220
+ } catch (err) {
221
+ log.info(
222
+ "pipeline",
223
+ `Skipping dataset '${ds.id}': ${ds.tool} not available (${err.message})`,
224
+ );
225
+ continue;
226
+ }
227
+ const results = await tool.generate({
228
+ ...ds.config,
229
+ seed: ast.seed,
230
+ name: ds.id,
231
+ });
232
+ for (const dataset of results) {
233
+ datasets.set(dataset.name, dataset);
234
+ }
235
+ }
236
+
237
+ log.info("pipeline", `Rendering ${ast.outputs.length} dataset output(s)`);
238
+ for (const out of ast.outputs) {
239
+ const dataset = datasets.get(out.dataset);
240
+ if (!dataset) {
241
+ log.info(
242
+ "pipeline",
243
+ `Skipping output '${out.dataset}': dataset not generated`,
244
+ );
245
+ continue;
246
+ }
247
+ const rendered = await renderDataset(dataset, out.format, out.config);
248
+ for (const [path, content] of rendered) {
249
+ files.set(path, content);
250
+ }
182
251
  }
183
252
  }
184
253
 
185
254
  // Save prose cache after all generation
186
- this.proseEngine.saveCache();
255
+ if (hasOrgBlocks) {
256
+ this.proseEngine.saveCache();
257
+ }
187
258
 
188
259
  // 5. Format outputs with Prettier
189
260
  log.info("format", "Formatting output files with Prettier");
190
261
  const formattedFiles = await this.formatter.format(files);
191
262
  const formattedRawDocuments = await this.formatter.format(rawDocuments);
263
+ log.info(
264
+ "format",
265
+ `Formatted ${formattedFiles.size} files, ${formattedRawDocuments.size} raw documents`,
266
+ );
192
267
 
193
268
  // 6. Validate
194
- const validation = this.validator.validate(entities);
269
+ const validation = hasOrgBlocks
270
+ ? this.validator.validate(entities)
271
+ : { checks: [], failures: 0, passed: true };
272
+
273
+ log.info(
274
+ "validate",
275
+ `${validation.checks.length} checks, ${validation.failures} failures`,
276
+ );
195
277
 
196
278
  if (htmlLinked) {
197
279
  const linkValidation = validateLinks(htmlLinked, entities.domain);
@@ -210,10 +292,7 @@ export class Pipeline {
210
292
 
211
293
  const orgFiles = new Map();
212
294
  for (const [path, content] of formattedFiles) {
213
- if (
214
- path.startsWith("examples/organizational/") &&
215
- path.endsWith(".html")
216
- ) {
295
+ if (path.startsWith("data/knowledge/") && path.endsWith(".html")) {
217
296
  orgFiles.set(path, content);
218
297
  }
219
298
  }
@@ -235,6 +314,11 @@ export class Pipeline {
235
314
  rawDocuments: formattedRawDocuments,
236
315
  entities,
237
316
  validation,
317
+ stats: {
318
+ prose: this.proseEngine.stats,
319
+ files: formattedFiles.size,
320
+ rawDocuments: formattedRawDocuments.size,
321
+ },
238
322
  };
239
323
  }
240
324
  }