npm - @forwardimpact/libuniverse - Versions diffs - 0.1.0 → 0.1.2 - Mend

@forwardimpact/libuniverse 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/bin/fit-universe.js CHANGED Viewed

@@ -3,8 +3,11 @@
 // fit-universe CLI — run with --help for usage.
 import { resolve, join, dirname } from "path";
-import { mkdir, writeFile } from "fs/promises";
+import { mkdir, writeFile, readFile, readdir, mkdtemp, rm } from "fs/promises";
 import { fileURLToPath } from "url";
+import { execFile } from "child_process";
+import { promisify } from "util";
+import { tmpdir } from "os";
 import { format } from "prettier";
 import { createScriptConfig } from "@forwardimpact/libconfig";
 import { createLogger } from "@forwardimpact/libtelemetry";
@@ -14,6 +17,9 @@ import { TemplateLoader } from "@forwardimpact/libtemplate/loader";
 import {
   createDslParser,
   createEntityGenerator,
+  FakerTool,
+  SyntheaTool,
+  SdvTool,
 } from "@forwardimpact/libsyntheticgen";
 import {
   ProseEngine,
@@ -46,7 +52,11 @@ async function main() {
     SUPABASE_SERVICE_ROLE_KEY: null,
   });
-  const mode = args.cached ? "cached" : args.generate ? "generate" : "no-prose";
+  const mode = args.noProse
+    ? "no-prose"
+    : args.generate
+      ? "generate"
+      : "cached";
   let llmApi = null;
   if (mode === "generate") {
@@ -100,6 +110,42 @@ async function main() {
   const validator = new ContentValidator(logger);
   const formatter = new ContentFormatter(format, logger);
+  const execFileFn = promisify(execFile);
+  /**
+   * Create a tool instance by name.
+   * @param {string} name
+   * @param {object} deps
+   * @returns {object}
+   */
+  function toolFactory(name, deps) {
+    switch (name) {
+      case "faker":
+        return new FakerTool({ logger: deps.logger });
+      case "synthea":
+        return new SyntheaTool({
+          logger: deps.logger,
+          syntheaJar:
+            process.env.SYNTHEA_JAR || "synthea-with-dependencies.jar",
+          execFileFn,
+          fsFns: {
+            readFile,
+            readdir,
+            mkdtemp: (prefix) => mkdtemp(join(tmpdir(), prefix)),
+            rm,
+          },
+        });
+      case "sdv":
+        return new SdvTool({
+          logger: deps.logger,
+          execFileFn,
+          fsFns: { writeFile, rm },
+        });
+      default:
+        throw new Error(`Unknown tool: ${name}`);
+    }
+  }
   const pipeline = new Pipeline({
     dslParser,
     entityGenerator,
@@ -108,19 +154,13 @@ async function main() {
     renderer,
     validator,
     formatter,
+    toolFactory,
     logger,
   });
   const result = await pipeline.run({
     universePath:
-      args.universe ||
-      join(
-        dirname(
-          fileURLToPath(import.meta.resolve("@forwardimpact/libsyntheticgen")),
-        ),
-        "data",
-        "default.dsl",
-      ),
+      args.universe || join(monorepoRoot, "examples", "universe.dsl"),
     only: args.only || null,
     schemaDir,
   });
@@ -154,26 +194,19 @@ async function main() {
       }
     } else if (!args.dryRun) {
       for (const [storagePath, content] of result.rawDocuments) {
-        const fullPath = join(
-          monorepoRoot,
-          "examples/activity/raw",
-          storagePath,
-        );
+        const fullPath = join(monorepoRoot, "data/activity/raw", storagePath);
         await mkdir(dirname(fullPath), { recursive: true });
         await writeFile(fullPath, content);
       }
       console.log(
-        `${result.rawDocuments.size} raw documents written to examples/activity/raw/`,
+        `${result.rawDocuments.size} raw documents written to data/activity/raw/`,
       );
     }
     // Write evidence directly (no raw source system for evidence)
     const evidence = result.entities.activity?.evidence;
     if (evidence && !args.dryRun && !args.load) {
-      const evidencePath = join(
-        monorepoRoot,
-        "examples/activity/evidence.json",
-      );
+      const evidencePath = join(monorepoRoot, "data/activity/evidence.json");
       await mkdir(dirname(evidencePath), { recursive: true });
       const formatted = await formatContent(
         evidencePath,
@@ -203,6 +236,16 @@ async function main() {
     console.log(`  ${icon} ${check.name}`);
   }
+  // Prose cache stats
+  const { hits, generated, misses } = result.stats.prose;
+  const proseTotal = hits + generated + misses;
+  if (proseTotal > 0) {
+    const rate = Math.round((hits / proseTotal) * 100);
+    console.log(
+      `\nProse: ${hits} hits, ${generated} generated, ${misses} misses (${rate}% hit rate)`,
+    );
+  }
   if (!result.validation.passed) {
     console.error(`\n${result.validation.failures} validation failures`);
     process.exit(1);
@@ -217,7 +260,7 @@ function parseArgs(argv) {
   const args = {};
   for (const arg of argv) {
     if (arg === "--help" || arg === "-h") args.help = true;
-    else if (arg === "--cached") args.cached = true;
+    else if (arg === "--no-prose") args.noProse = true;
     else if (arg === "--generate") args.generate = true;
     else if (arg === "--strict") args.strict = true;
     else if (arg === "--dry-run") args.dryRun = true;
@@ -235,9 +278,9 @@ Usage:
   npx fit-universe [options]
 Options:
-  --generate          Generate prose via LLM (requires LLM_TOKEN)
-  --cached            Use cached prose from .prose-cache.json
-  --strict            Fail on cache miss (use with --cached)
+  --generate          Generate prose via LLM and update cache (requires LLM_TOKEN)
+  --no-prose          Skip prose entirely (structural scaffolding only)
+  --strict            Fail on cache miss (use with default cached mode)
   --dry-run           Show what would be written without writing
   --load              Load raw documents to Supabase Storage
   --only=<type>       Render only one content type (html|pathway|raw|markdown)
@@ -245,20 +288,21 @@ Options:
   -h, --help          Show this help message
 Prose modes:
-  (default)           Structural generation only, no LLM calls
-  --cached            Read prose from .prose-cache.json
-  --generate          Call LLM to generate prose, write to cache
+  (default)           Use cached prose from .prose-cache.json
+  --generate          Call LLM to generate prose and update the cache
+  --no-prose          No prose — produces minimal structural data only
 Content types:
-  html                Organizational articles, guides, FAQs (examples/organizational)
-  pathway             YAML framework files (examples/pathway)
-  raw                 Roster, GitHub events, evidence (examples/activity)
-  markdown            Briefings, notes, KB content (examples/personal)
+  html                Organizational articles, guides, FAQs (data/knowledge)
+  pathway             YAML framework files (data/pathway)
+  raw                 Roster, GitHub events, evidence (data/activity)
+  markdown            Briefings, notes, KB content (data/personal)
 Examples:
-  npx fit-universe                           # Structural only
-  npx fit-universe --generate                # Full generation with LLM prose
-  npx fit-universe --cached --strict         # Cached prose, fail on miss
+  npx fit-universe                           # Cached prose (default)
+  npx fit-universe --generate                # Generate new prose via LLM
+  npx fit-universe --strict                  # Cached prose, fail on miss
+  npx fit-universe --no-prose                # Structural only, no prose
   npx fit-universe --only=pathway            # Generate pathway data only
   npx fit-universe --universe=custom.dsl     # Use custom DSL file
 `);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@forwardimpact/libuniverse",
-  "version": "0.1.0",
+  "version": "0.1.2",
   "description": "Synthetic data universe DSL and generation engine",
   "license": "Apache-2.0",
   "repository": {
@@ -27,8 +27,8 @@
     "@forwardimpact/libsyntheticrender": "^0.1.0",
     "@forwardimpact/libtelemetry": "^0.1.23",
     "@forwardimpact/libtemplate": "^0.2.0",
-    "@supabase/supabase-js": "^2.0.0",
-    "prettier": "^3.7.4"
+    "@supabase/supabase-js": "^2.100.1",
+    "prettier": "^3.8.1"
   },
   "engines": {
     "node": ">=18.0.0"

package/pipeline.js CHANGED Viewed

@@ -6,7 +6,11 @@
 import { readFile } from "fs/promises";
 import { join } from "path";
-import { validateLinks, validateHTML } from "@forwardimpact/libsyntheticrender";
+import {
+  validateLinks,
+  validateHTML,
+  renderDataset,
+} from "@forwardimpact/libsyntheticrender";
 import { collectProseKeys } from "@forwardimpact/libsyntheticgen";
 import { loadSchemas } from "@forwardimpact/libsyntheticprose/pathway";
@@ -24,6 +28,7 @@ export class Pipeline {
    * @param {import('@forwardimpact/libsyntheticrender').Renderer} deps.renderer - Renderer
    * @param {import('@forwardimpact/libsyntheticrender').ContentValidator} deps.validator - Content validator
    * @param {import('@forwardimpact/libsyntheticrender').ContentFormatter} deps.formatter - Content formatter
+   * @param {Function} [deps.toolFactory] - (toolName, deps) => tool instance
    * @param {object} deps.logger - Logger instance
    */
   constructor({
@@ -34,6 +39,7 @@ export class Pipeline {
     renderer,
     validator,
     formatter,
+    toolFactory,
     logger,
   }) {
     if (!dslParser) throw new Error("dslParser is required");
@@ -52,6 +58,7 @@ export class Pipeline {
     this.renderer = renderer;
     this.validator = validator;
     this.formatter = formatter;
+    this.toolFactory = toolFactory || null;
     this.logger = logger;
   }
@@ -62,7 +69,7 @@ export class Pipeline {
    * @param {string} options.universePath - Path to the universe.dsl file
    * @param {string} [options.only=null] - Render only a specific content type
    * @param {string|null} [options.schemaDir=null] - Path to JSON schema directory
-   * @returns {Promise<{files: Map<string,string>, rawDocuments: Map<string,string>, entities: object, validation: object}>}
+   * @returns {Promise<{files: Map<string,string>, rawDocuments: Map<string,string>, entities: object, validation: object, stats: {prose: {hits: number, misses: number, generated: number}, files: number, rawDocuments: number}}>}
    */
   async run(options) {
     const { universePath, only = null, schemaDir = null } = options;
@@ -73,27 +80,36 @@ export class Pipeline {
     const source = await readFile(universePath, "utf-8");
     const ast = this.dslParser.parse(source);
-    // 2. Generate entity graph (Tier 0)
-    log.info("pipeline", "Generating entity graph");
-    const entities = this.entityGenerator.generate(ast);
-    // 3. Prose generation (Tier 1/2)
-    const proseKeys = collectProseKeys(entities);
+    // 2–4. Org-and-pathway generation (only when org blocks are present)
+    const hasOrgBlocks = ast.people !== null;
+    let entities = { domain: ast.domain, industry: ast.industry };
     const prose = new Map();
-    const totalKeys = proseKeys.size;
-    let keyIndex = 0;
-    if (this.proseEngine.mode !== "no-prose") {
-      log.info(
-        "pipeline",
-        `Generating prose (${this.proseEngine.mode} mode, ${totalKeys} keys)`,
-      );
-    }
-    for (const [key, context] of proseKeys) {
-      keyIndex++;
-      const result = await this.proseEngine.generateProse(key, context);
-      if (result) prose.set(key, result);
+    if (hasOrgBlocks) {
+      // 2. Generate entity graph (Tier 0)
+      log.info("pipeline", "Generating entity graph");
+      entities = this.entityGenerator.generate(ast);
+      // 3. Prose generation (Tier 1/2)
+      const proseKeys = collectProseKeys(entities);
+      const totalKeys = proseKeys.size;
+      let keyIndex = 0;
       if (this.proseEngine.mode !== "no-prose") {
-        log.info("prose", `[${keyIndex}/${totalKeys}] ${key}`);
+        log.info(
+          "pipeline",
+          `Generating prose (${this.proseEngine.mode} mode, ${totalKeys} keys)`,
+        );
+      }
+      for (const [key, context] of proseKeys) {
+        keyIndex++;
+        const result = await this.proseEngine.generateProse(key, context);
+        if (result) prose.set(key, result);
+        if (this.proseEngine.mode !== "no-prose") {
+          log.info("prose", `[${keyIndex}/${totalKeys}] ${key}`);
+          if (keyIndex % 25 === 0) {
+            this.proseEngine.saveCache();
+          }
+        }
       }
     }
@@ -102,7 +118,7 @@ export class Pipeline {
     const rawDocuments = new Map();
     let htmlLinked = null;
-    const shouldRender = (type) => !only || only === type;
+    const shouldRender = (type) => hasOrgBlocks && (!only || only === type);
     if (shouldRender("html")) {
       log.info("render", "Rendering HTML (Pass 1: deterministic skeleton)");
@@ -122,22 +138,27 @@ export class Pipeline {
           entities.domain,
         );
         for (const [name, content] of enriched) {
-          files.set(join("examples/organizational", name), content);
+          files.set(join("data/knowledge", name), content);
         }
       } else {
         for (const [name, content] of htmlFiles) {
-          files.set(join("examples/organizational", name), content);
+          files.set(join("data/knowledge", name), content);
         }
       }
       files.set(
-        "examples/organizational/README.md",
+        "data/knowledge/README.md",
         this.renderer.renderReadme(entities, prose),
       );
       files.set(
-        "examples/organizational/ONTOLOGY.md",
+        "data/knowledge/ONTOLOGY.md",
         this.renderer.renderOntology(entities),
       );
+      const htmlCount = [...files.keys()].filter((p) =>
+        p.startsWith("data/knowledge/"),
+      ).length;
+      log.info("render", `HTML: ${htmlCount} files`);
     }
     if (shouldRender("pathway")) {
@@ -156,8 +177,9 @@ export class Pipeline {
         });
         const pathwayFiles = this.renderer.renderPathway(pathwayData);
         for (const [name, content] of pathwayFiles) {
-          files.set(`examples/pathway/${name}`, content);
+          files.set(`data/pathway/${name}`, content);
         }
+        log.info("render", `Pathway: ${pathwayFiles.size} files`);
       }
     }
@@ -170,28 +192,88 @@ export class Pipeline {
       const activityFiles = this.renderer.renderActivity(entities);
       for (const [name, content] of activityFiles) {
-        files.set(join("examples/activity", name), content);
+        files.set(join("data/activity", name), content);
       }
+      log.info(
+        "render",
+        `Raw: ${raw.size} documents, ${activityFiles.size} activity files`,
+      );
     }
     if (shouldRender("markdown")) {
       log.info("render", "Rendering markdown");
       const md = this.renderer.renderMarkdown(entities, prose);
       for (const [name, content] of md) {
-        files.set(join("examples/personal", name), content);
+        files.set(join("data/personal", name), content);
+      }
+      log.info("render", `Markdown: ${md.size} files`);
+    }
+    // Dataset tool execution and output rendering
+    if (ast.datasets.length > 0 && this.toolFactory) {
+      log.info("pipeline", `Generating ${ast.datasets.length} dataset(s)`);
+      const datasets = new Map();
+      for (const ds of ast.datasets) {
+        const tool = this.toolFactory(ds.tool, { logger: log });
+        try {
+          await tool.checkAvailability();
+        } catch (err) {
+          log.info(
+            "pipeline",
+            `Skipping dataset '${ds.id}': ${ds.tool} not available (${err.message})`,
+          );
+          continue;
+        }
+        const results = await tool.generate({
+          ...ds.config,
+          seed: ast.seed,
+          name: ds.id,
+        });
+        for (const dataset of results) {
+          datasets.set(dataset.name, dataset);
+        }
+      }
+      log.info("pipeline", `Rendering ${ast.outputs.length} dataset output(s)`);
+      for (const out of ast.outputs) {
+        const dataset = datasets.get(out.dataset);
+        if (!dataset) {
+          log.info(
+            "pipeline",
+            `Skipping output '${out.dataset}': dataset not generated`,
+          );
+          continue;
+        }
+        const rendered = await renderDataset(dataset, out.format, out.config);
+        for (const [path, content] of rendered) {
+          files.set(path, content);
+        }
       }
     }
     // Save prose cache after all generation
-    this.proseEngine.saveCache();
+    if (hasOrgBlocks) {
+      this.proseEngine.saveCache();
+    }
     // 5. Format outputs with Prettier
     log.info("format", "Formatting output files with Prettier");
     const formattedFiles = await this.formatter.format(files);
     const formattedRawDocuments = await this.formatter.format(rawDocuments);
+    log.info(
+      "format",
+      `Formatted ${formattedFiles.size} files, ${formattedRawDocuments.size} raw documents`,
+    );
     // 6. Validate
-    const validation = this.validator.validate(entities);
+    const validation = hasOrgBlocks
+      ? this.validator.validate(entities)
+      : { checks: [], failures: 0, passed: true };
+    log.info(
+      "validate",
+      `${validation.checks.length} checks, ${validation.failures} failures`,
+    );
     if (htmlLinked) {
       const linkValidation = validateLinks(htmlLinked, entities.domain);
@@ -210,10 +292,7 @@ export class Pipeline {
       const orgFiles = new Map();
       for (const [path, content] of formattedFiles) {
-        if (
-          path.startsWith("examples/organizational/") &&
-          path.endsWith(".html")
-        ) {
+        if (path.startsWith("data/knowledge/") && path.endsWith(".html")) {
           orgFiles.set(path, content);
         }
       }
@@ -235,6 +314,11 @@ export class Pipeline {
       rawDocuments: formattedRawDocuments,
       entities,
       validation,
+      stats: {
+        prose: this.proseEngine.stats,
+        files: formattedFiles.size,
+        rawDocuments: formattedRawDocuments.size,
+      },
     };
   }
 }