@forwardimpact/libuniverse 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.prose-cache.json +138 -2
- package/bin/fit-universe.js +78 -34
- package/package.json +3 -3
- package/pipeline.js +119 -35
package/bin/fit-universe.js
CHANGED
|
@@ -3,8 +3,11 @@
|
|
|
3
3
|
// fit-universe CLI — run with --help for usage.
|
|
4
4
|
|
|
5
5
|
import { resolve, join, dirname } from "path";
|
|
6
|
-
import { mkdir, writeFile } from "fs/promises";
|
|
6
|
+
import { mkdir, writeFile, readFile, readdir, mkdtemp, rm } from "fs/promises";
|
|
7
7
|
import { fileURLToPath } from "url";
|
|
8
|
+
import { execFile } from "child_process";
|
|
9
|
+
import { promisify } from "util";
|
|
10
|
+
import { tmpdir } from "os";
|
|
8
11
|
import { format } from "prettier";
|
|
9
12
|
import { createScriptConfig } from "@forwardimpact/libconfig";
|
|
10
13
|
import { createLogger } from "@forwardimpact/libtelemetry";
|
|
@@ -14,6 +17,9 @@ import { TemplateLoader } from "@forwardimpact/libtemplate/loader";
|
|
|
14
17
|
import {
|
|
15
18
|
createDslParser,
|
|
16
19
|
createEntityGenerator,
|
|
20
|
+
FakerTool,
|
|
21
|
+
SyntheaTool,
|
|
22
|
+
SdvTool,
|
|
17
23
|
} from "@forwardimpact/libsyntheticgen";
|
|
18
24
|
import {
|
|
19
25
|
ProseEngine,
|
|
@@ -46,7 +52,11 @@ async function main() {
|
|
|
46
52
|
SUPABASE_SERVICE_ROLE_KEY: null,
|
|
47
53
|
});
|
|
48
54
|
|
|
49
|
-
const mode = args.
|
|
55
|
+
const mode = args.noProse
|
|
56
|
+
? "no-prose"
|
|
57
|
+
: args.generate
|
|
58
|
+
? "generate"
|
|
59
|
+
: "cached";
|
|
50
60
|
|
|
51
61
|
let llmApi = null;
|
|
52
62
|
if (mode === "generate") {
|
|
@@ -100,6 +110,42 @@ async function main() {
|
|
|
100
110
|
const validator = new ContentValidator(logger);
|
|
101
111
|
const formatter = new ContentFormatter(format, logger);
|
|
102
112
|
|
|
113
|
+
const execFileFn = promisify(execFile);
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Create a tool instance by name.
|
|
117
|
+
* @param {string} name
|
|
118
|
+
* @param {object} deps
|
|
119
|
+
* @returns {object}
|
|
120
|
+
*/
|
|
121
|
+
function toolFactory(name, deps) {
|
|
122
|
+
switch (name) {
|
|
123
|
+
case "faker":
|
|
124
|
+
return new FakerTool({ logger: deps.logger });
|
|
125
|
+
case "synthea":
|
|
126
|
+
return new SyntheaTool({
|
|
127
|
+
logger: deps.logger,
|
|
128
|
+
syntheaJar:
|
|
129
|
+
process.env.SYNTHEA_JAR || "synthea-with-dependencies.jar",
|
|
130
|
+
execFileFn,
|
|
131
|
+
fsFns: {
|
|
132
|
+
readFile,
|
|
133
|
+
readdir,
|
|
134
|
+
mkdtemp: (prefix) => mkdtemp(join(tmpdir(), prefix)),
|
|
135
|
+
rm,
|
|
136
|
+
},
|
|
137
|
+
});
|
|
138
|
+
case "sdv":
|
|
139
|
+
return new SdvTool({
|
|
140
|
+
logger: deps.logger,
|
|
141
|
+
execFileFn,
|
|
142
|
+
fsFns: { writeFile, rm },
|
|
143
|
+
});
|
|
144
|
+
default:
|
|
145
|
+
throw new Error(`Unknown tool: ${name}`);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
103
149
|
const pipeline = new Pipeline({
|
|
104
150
|
dslParser,
|
|
105
151
|
entityGenerator,
|
|
@@ -108,19 +154,13 @@ async function main() {
|
|
|
108
154
|
renderer,
|
|
109
155
|
validator,
|
|
110
156
|
formatter,
|
|
157
|
+
toolFactory,
|
|
111
158
|
logger,
|
|
112
159
|
});
|
|
113
160
|
|
|
114
161
|
const result = await pipeline.run({
|
|
115
162
|
universePath:
|
|
116
|
-
args.universe ||
|
|
117
|
-
join(
|
|
118
|
-
dirname(
|
|
119
|
-
fileURLToPath(import.meta.resolve("@forwardimpact/libsyntheticgen")),
|
|
120
|
-
),
|
|
121
|
-
"data",
|
|
122
|
-
"default.dsl",
|
|
123
|
-
),
|
|
163
|
+
args.universe || join(monorepoRoot, "examples", "universe.dsl"),
|
|
124
164
|
only: args.only || null,
|
|
125
165
|
schemaDir,
|
|
126
166
|
});
|
|
@@ -154,26 +194,19 @@ async function main() {
|
|
|
154
194
|
}
|
|
155
195
|
} else if (!args.dryRun) {
|
|
156
196
|
for (const [storagePath, content] of result.rawDocuments) {
|
|
157
|
-
const fullPath = join(
|
|
158
|
-
monorepoRoot,
|
|
159
|
-
"examples/activity/raw",
|
|
160
|
-
storagePath,
|
|
161
|
-
);
|
|
197
|
+
const fullPath = join(monorepoRoot, "data/activity/raw", storagePath);
|
|
162
198
|
await mkdir(dirname(fullPath), { recursive: true });
|
|
163
199
|
await writeFile(fullPath, content);
|
|
164
200
|
}
|
|
165
201
|
console.log(
|
|
166
|
-
`${result.rawDocuments.size} raw documents written to
|
|
202
|
+
`${result.rawDocuments.size} raw documents written to data/activity/raw/`,
|
|
167
203
|
);
|
|
168
204
|
}
|
|
169
205
|
|
|
170
206
|
// Write evidence directly (no raw source system for evidence)
|
|
171
207
|
const evidence = result.entities.activity?.evidence;
|
|
172
208
|
if (evidence && !args.dryRun && !args.load) {
|
|
173
|
-
const evidencePath = join(
|
|
174
|
-
monorepoRoot,
|
|
175
|
-
"examples/activity/evidence.json",
|
|
176
|
-
);
|
|
209
|
+
const evidencePath = join(monorepoRoot, "data/activity/evidence.json");
|
|
177
210
|
await mkdir(dirname(evidencePath), { recursive: true });
|
|
178
211
|
const formatted = await formatContent(
|
|
179
212
|
evidencePath,
|
|
@@ -203,6 +236,16 @@ async function main() {
|
|
|
203
236
|
console.log(` ${icon} ${check.name}`);
|
|
204
237
|
}
|
|
205
238
|
|
|
239
|
+
// Prose cache stats
|
|
240
|
+
const { hits, generated, misses } = result.stats.prose;
|
|
241
|
+
const proseTotal = hits + generated + misses;
|
|
242
|
+
if (proseTotal > 0) {
|
|
243
|
+
const rate = Math.round((hits / proseTotal) * 100);
|
|
244
|
+
console.log(
|
|
245
|
+
`\nProse: ${hits} hits, ${generated} generated, ${misses} misses (${rate}% hit rate)`,
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
|
|
206
249
|
if (!result.validation.passed) {
|
|
207
250
|
console.error(`\n${result.validation.failures} validation failures`);
|
|
208
251
|
process.exit(1);
|
|
@@ -217,7 +260,7 @@ function parseArgs(argv) {
|
|
|
217
260
|
const args = {};
|
|
218
261
|
for (const arg of argv) {
|
|
219
262
|
if (arg === "--help" || arg === "-h") args.help = true;
|
|
220
|
-
else if (arg === "--
|
|
263
|
+
else if (arg === "--no-prose") args.noProse = true;
|
|
221
264
|
else if (arg === "--generate") args.generate = true;
|
|
222
265
|
else if (arg === "--strict") args.strict = true;
|
|
223
266
|
else if (arg === "--dry-run") args.dryRun = true;
|
|
@@ -235,9 +278,9 @@ Usage:
|
|
|
235
278
|
npx fit-universe [options]
|
|
236
279
|
|
|
237
280
|
Options:
|
|
238
|
-
--generate Generate prose via LLM (requires LLM_TOKEN)
|
|
239
|
-
--
|
|
240
|
-
--strict Fail on cache miss (use with
|
|
281
|
+
--generate Generate prose via LLM and update cache (requires LLM_TOKEN)
|
|
282
|
+
--no-prose Skip prose entirely (structural scaffolding only)
|
|
283
|
+
--strict Fail on cache miss (use with default cached mode)
|
|
241
284
|
--dry-run Show what would be written without writing
|
|
242
285
|
--load Load raw documents to Supabase Storage
|
|
243
286
|
--only=<type> Render only one content type (html|pathway|raw|markdown)
|
|
@@ -245,20 +288,21 @@ Options:
|
|
|
245
288
|
-h, --help Show this help message
|
|
246
289
|
|
|
247
290
|
Prose modes:
|
|
248
|
-
(default)
|
|
249
|
-
--
|
|
250
|
-
--
|
|
291
|
+
(default) Use cached prose from .prose-cache.json
|
|
292
|
+
--generate Call LLM to generate prose and update the cache
|
|
293
|
+
--no-prose No prose — produces minimal structural data only
|
|
251
294
|
|
|
252
295
|
Content types:
|
|
253
|
-
html Organizational articles, guides, FAQs (
|
|
254
|
-
pathway YAML framework files (
|
|
255
|
-
raw Roster, GitHub events, evidence (
|
|
256
|
-
markdown Briefings, notes, KB content (
|
|
296
|
+
html Organizational articles, guides, FAQs (data/knowledge)
|
|
297
|
+
pathway YAML framework files (data/pathway)
|
|
298
|
+
raw Roster, GitHub events, evidence (data/activity)
|
|
299
|
+
markdown Briefings, notes, KB content (data/personal)
|
|
257
300
|
|
|
258
301
|
Examples:
|
|
259
|
-
npx fit-universe #
|
|
260
|
-
npx fit-universe --generate #
|
|
261
|
-
npx fit-universe --
|
|
302
|
+
npx fit-universe # Cached prose (default)
|
|
303
|
+
npx fit-universe --generate # Generate new prose via LLM
|
|
304
|
+
npx fit-universe --strict # Cached prose, fail on miss
|
|
305
|
+
npx fit-universe --no-prose # Structural only, no prose
|
|
262
306
|
npx fit-universe --only=pathway # Generate pathway data only
|
|
263
307
|
npx fit-universe --universe=custom.dsl # Use custom DSL file
|
|
264
308
|
`);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forwardimpact/libuniverse",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "Synthetic data universe DSL and generation engine",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"repository": {
|
|
@@ -27,8 +27,8 @@
|
|
|
27
27
|
"@forwardimpact/libsyntheticrender": "^0.1.0",
|
|
28
28
|
"@forwardimpact/libtelemetry": "^0.1.23",
|
|
29
29
|
"@forwardimpact/libtemplate": "^0.2.0",
|
|
30
|
-
"@supabase/supabase-js": "^2.
|
|
31
|
-
"prettier": "^3.
|
|
30
|
+
"@supabase/supabase-js": "^2.100.1",
|
|
31
|
+
"prettier": "^3.8.1"
|
|
32
32
|
},
|
|
33
33
|
"engines": {
|
|
34
34
|
"node": ">=18.0.0"
|
package/pipeline.js
CHANGED
|
@@ -6,7 +6,11 @@
|
|
|
6
6
|
|
|
7
7
|
import { readFile } from "fs/promises";
|
|
8
8
|
import { join } from "path";
|
|
9
|
-
import {
|
|
9
|
+
import {
|
|
10
|
+
validateLinks,
|
|
11
|
+
validateHTML,
|
|
12
|
+
renderDataset,
|
|
13
|
+
} from "@forwardimpact/libsyntheticrender";
|
|
10
14
|
import { collectProseKeys } from "@forwardimpact/libsyntheticgen";
|
|
11
15
|
import { loadSchemas } from "@forwardimpact/libsyntheticprose/pathway";
|
|
12
16
|
|
|
@@ -24,6 +28,7 @@ export class Pipeline {
|
|
|
24
28
|
* @param {import('@forwardimpact/libsyntheticrender').Renderer} deps.renderer - Renderer
|
|
25
29
|
* @param {import('@forwardimpact/libsyntheticrender').ContentValidator} deps.validator - Content validator
|
|
26
30
|
* @param {import('@forwardimpact/libsyntheticrender').ContentFormatter} deps.formatter - Content formatter
|
|
31
|
+
* @param {Function} [deps.toolFactory] - (toolName, deps) => tool instance
|
|
27
32
|
* @param {object} deps.logger - Logger instance
|
|
28
33
|
*/
|
|
29
34
|
constructor({
|
|
@@ -34,6 +39,7 @@ export class Pipeline {
|
|
|
34
39
|
renderer,
|
|
35
40
|
validator,
|
|
36
41
|
formatter,
|
|
42
|
+
toolFactory,
|
|
37
43
|
logger,
|
|
38
44
|
}) {
|
|
39
45
|
if (!dslParser) throw new Error("dslParser is required");
|
|
@@ -52,6 +58,7 @@ export class Pipeline {
|
|
|
52
58
|
this.renderer = renderer;
|
|
53
59
|
this.validator = validator;
|
|
54
60
|
this.formatter = formatter;
|
|
61
|
+
this.toolFactory = toolFactory || null;
|
|
55
62
|
this.logger = logger;
|
|
56
63
|
}
|
|
57
64
|
|
|
@@ -62,7 +69,7 @@ export class Pipeline {
|
|
|
62
69
|
* @param {string} options.universePath - Path to the universe.dsl file
|
|
63
70
|
* @param {string} [options.only=null] - Render only a specific content type
|
|
64
71
|
* @param {string|null} [options.schemaDir=null] - Path to JSON schema directory
|
|
65
|
-
* @returns {Promise<{files: Map<string,string>, rawDocuments: Map<string,string>, entities: object, validation: object}>}
|
|
72
|
+
* @returns {Promise<{files: Map<string,string>, rawDocuments: Map<string,string>, entities: object, validation: object, stats: {prose: {hits: number, misses: number, generated: number}, files: number, rawDocuments: number}}>}
|
|
66
73
|
*/
|
|
67
74
|
async run(options) {
|
|
68
75
|
const { universePath, only = null, schemaDir = null } = options;
|
|
@@ -73,27 +80,36 @@ export class Pipeline {
|
|
|
73
80
|
const source = await readFile(universePath, "utf-8");
|
|
74
81
|
const ast = this.dslParser.parse(source);
|
|
75
82
|
|
|
76
|
-
// 2.
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
// 3. Prose generation (Tier 1/2)
|
|
81
|
-
const proseKeys = collectProseKeys(entities);
|
|
83
|
+
// 2–4. Org-and-pathway generation (only when org blocks are present)
|
|
84
|
+
const hasOrgBlocks = ast.people !== null;
|
|
85
|
+
let entities = { domain: ast.domain, industry: ast.industry };
|
|
82
86
|
const prose = new Map();
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
log.info(
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
keyIndex
|
|
93
|
-
const result = await this.proseEngine.generateProse(key, context);
|
|
94
|
-
if (result) prose.set(key, result);
|
|
87
|
+
|
|
88
|
+
if (hasOrgBlocks) {
|
|
89
|
+
// 2. Generate entity graph (Tier 0)
|
|
90
|
+
log.info("pipeline", "Generating entity graph");
|
|
91
|
+
entities = this.entityGenerator.generate(ast);
|
|
92
|
+
|
|
93
|
+
// 3. Prose generation (Tier 1/2)
|
|
94
|
+
const proseKeys = collectProseKeys(entities);
|
|
95
|
+
const totalKeys = proseKeys.size;
|
|
96
|
+
let keyIndex = 0;
|
|
95
97
|
if (this.proseEngine.mode !== "no-prose") {
|
|
96
|
-
log.info(
|
|
98
|
+
log.info(
|
|
99
|
+
"pipeline",
|
|
100
|
+
`Generating prose (${this.proseEngine.mode} mode, ${totalKeys} keys)`,
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
for (const [key, context] of proseKeys) {
|
|
104
|
+
keyIndex++;
|
|
105
|
+
const result = await this.proseEngine.generateProse(key, context);
|
|
106
|
+
if (result) prose.set(key, result);
|
|
107
|
+
if (this.proseEngine.mode !== "no-prose") {
|
|
108
|
+
log.info("prose", `[${keyIndex}/${totalKeys}] ${key}`);
|
|
109
|
+
if (keyIndex % 25 === 0) {
|
|
110
|
+
this.proseEngine.saveCache();
|
|
111
|
+
}
|
|
112
|
+
}
|
|
97
113
|
}
|
|
98
114
|
}
|
|
99
115
|
|
|
@@ -102,7 +118,7 @@ export class Pipeline {
|
|
|
102
118
|
const rawDocuments = new Map();
|
|
103
119
|
let htmlLinked = null;
|
|
104
120
|
|
|
105
|
-
const shouldRender = (type) => !only || only === type;
|
|
121
|
+
const shouldRender = (type) => hasOrgBlocks && (!only || only === type);
|
|
106
122
|
|
|
107
123
|
if (shouldRender("html")) {
|
|
108
124
|
log.info("render", "Rendering HTML (Pass 1: deterministic skeleton)");
|
|
@@ -122,22 +138,27 @@ export class Pipeline {
|
|
|
122
138
|
entities.domain,
|
|
123
139
|
);
|
|
124
140
|
for (const [name, content] of enriched) {
|
|
125
|
-
files.set(join("
|
|
141
|
+
files.set(join("data/knowledge", name), content);
|
|
126
142
|
}
|
|
127
143
|
} else {
|
|
128
144
|
for (const [name, content] of htmlFiles) {
|
|
129
|
-
files.set(join("
|
|
145
|
+
files.set(join("data/knowledge", name), content);
|
|
130
146
|
}
|
|
131
147
|
}
|
|
132
148
|
|
|
133
149
|
files.set(
|
|
134
|
-
"
|
|
150
|
+
"data/knowledge/README.md",
|
|
135
151
|
this.renderer.renderReadme(entities, prose),
|
|
136
152
|
);
|
|
137
153
|
files.set(
|
|
138
|
-
"
|
|
154
|
+
"data/knowledge/ONTOLOGY.md",
|
|
139
155
|
this.renderer.renderOntology(entities),
|
|
140
156
|
);
|
|
157
|
+
|
|
158
|
+
const htmlCount = [...files.keys()].filter((p) =>
|
|
159
|
+
p.startsWith("data/knowledge/"),
|
|
160
|
+
).length;
|
|
161
|
+
log.info("render", `HTML: ${htmlCount} files`);
|
|
141
162
|
}
|
|
142
163
|
|
|
143
164
|
if (shouldRender("pathway")) {
|
|
@@ -156,8 +177,9 @@ export class Pipeline {
|
|
|
156
177
|
});
|
|
157
178
|
const pathwayFiles = this.renderer.renderPathway(pathwayData);
|
|
158
179
|
for (const [name, content] of pathwayFiles) {
|
|
159
|
-
files.set(`
|
|
180
|
+
files.set(`data/pathway/${name}`, content);
|
|
160
181
|
}
|
|
182
|
+
log.info("render", `Pathway: ${pathwayFiles.size} files`);
|
|
161
183
|
}
|
|
162
184
|
}
|
|
163
185
|
|
|
@@ -170,28 +192,88 @@ export class Pipeline {
|
|
|
170
192
|
|
|
171
193
|
const activityFiles = this.renderer.renderActivity(entities);
|
|
172
194
|
for (const [name, content] of activityFiles) {
|
|
173
|
-
files.set(join("
|
|
195
|
+
files.set(join("data/activity", name), content);
|
|
174
196
|
}
|
|
197
|
+
log.info(
|
|
198
|
+
"render",
|
|
199
|
+
`Raw: ${raw.size} documents, ${activityFiles.size} activity files`,
|
|
200
|
+
);
|
|
175
201
|
}
|
|
176
202
|
|
|
177
203
|
if (shouldRender("markdown")) {
|
|
178
204
|
log.info("render", "Rendering markdown");
|
|
179
205
|
const md = this.renderer.renderMarkdown(entities, prose);
|
|
180
206
|
for (const [name, content] of md) {
|
|
181
|
-
files.set(join("
|
|
207
|
+
files.set(join("data/personal", name), content);
|
|
208
|
+
}
|
|
209
|
+
log.info("render", `Markdown: ${md.size} files`);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Dataset tool execution and output rendering
|
|
213
|
+
if (ast.datasets.length > 0 && this.toolFactory) {
|
|
214
|
+
log.info("pipeline", `Generating ${ast.datasets.length} dataset(s)`);
|
|
215
|
+
const datasets = new Map();
|
|
216
|
+
for (const ds of ast.datasets) {
|
|
217
|
+
const tool = this.toolFactory(ds.tool, { logger: log });
|
|
218
|
+
try {
|
|
219
|
+
await tool.checkAvailability();
|
|
220
|
+
} catch (err) {
|
|
221
|
+
log.info(
|
|
222
|
+
"pipeline",
|
|
223
|
+
`Skipping dataset '${ds.id}': ${ds.tool} not available (${err.message})`,
|
|
224
|
+
);
|
|
225
|
+
continue;
|
|
226
|
+
}
|
|
227
|
+
const results = await tool.generate({
|
|
228
|
+
...ds.config,
|
|
229
|
+
seed: ast.seed,
|
|
230
|
+
name: ds.id,
|
|
231
|
+
});
|
|
232
|
+
for (const dataset of results) {
|
|
233
|
+
datasets.set(dataset.name, dataset);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
log.info("pipeline", `Rendering ${ast.outputs.length} dataset output(s)`);
|
|
238
|
+
for (const out of ast.outputs) {
|
|
239
|
+
const dataset = datasets.get(out.dataset);
|
|
240
|
+
if (!dataset) {
|
|
241
|
+
log.info(
|
|
242
|
+
"pipeline",
|
|
243
|
+
`Skipping output '${out.dataset}': dataset not generated`,
|
|
244
|
+
);
|
|
245
|
+
continue;
|
|
246
|
+
}
|
|
247
|
+
const rendered = await renderDataset(dataset, out.format, out.config);
|
|
248
|
+
for (const [path, content] of rendered) {
|
|
249
|
+
files.set(path, content);
|
|
250
|
+
}
|
|
182
251
|
}
|
|
183
252
|
}
|
|
184
253
|
|
|
185
254
|
// Save prose cache after all generation
|
|
186
|
-
|
|
255
|
+
if (hasOrgBlocks) {
|
|
256
|
+
this.proseEngine.saveCache();
|
|
257
|
+
}
|
|
187
258
|
|
|
188
259
|
// 5. Format outputs with Prettier
|
|
189
260
|
log.info("format", "Formatting output files with Prettier");
|
|
190
261
|
const formattedFiles = await this.formatter.format(files);
|
|
191
262
|
const formattedRawDocuments = await this.formatter.format(rawDocuments);
|
|
263
|
+
log.info(
|
|
264
|
+
"format",
|
|
265
|
+
`Formatted ${formattedFiles.size} files, ${formattedRawDocuments.size} raw documents`,
|
|
266
|
+
);
|
|
192
267
|
|
|
193
268
|
// 6. Validate
|
|
194
|
-
const validation =
|
|
269
|
+
const validation = hasOrgBlocks
|
|
270
|
+
? this.validator.validate(entities)
|
|
271
|
+
: { checks: [], failures: 0, passed: true };
|
|
272
|
+
|
|
273
|
+
log.info(
|
|
274
|
+
"validate",
|
|
275
|
+
`${validation.checks.length} checks, ${validation.failures} failures`,
|
|
276
|
+
);
|
|
195
277
|
|
|
196
278
|
if (htmlLinked) {
|
|
197
279
|
const linkValidation = validateLinks(htmlLinked, entities.domain);
|
|
@@ -210,10 +292,7 @@ export class Pipeline {
|
|
|
210
292
|
|
|
211
293
|
const orgFiles = new Map();
|
|
212
294
|
for (const [path, content] of formattedFiles) {
|
|
213
|
-
if (
|
|
214
|
-
path.startsWith("examples/organizational/") &&
|
|
215
|
-
path.endsWith(".html")
|
|
216
|
-
) {
|
|
295
|
+
if (path.startsWith("data/knowledge/") && path.endsWith(".html")) {
|
|
217
296
|
orgFiles.set(path, content);
|
|
218
297
|
}
|
|
219
298
|
}
|
|
@@ -235,6 +314,11 @@ export class Pipeline {
|
|
|
235
314
|
rawDocuments: formattedRawDocuments,
|
|
236
315
|
entities,
|
|
237
316
|
validation,
|
|
317
|
+
stats: {
|
|
318
|
+
prose: this.proseEngine.stats,
|
|
319
|
+
files: formattedFiles.size,
|
|
320
|
+
rawDocuments: formattedRawDocuments.size,
|
|
321
|
+
},
|
|
238
322
|
};
|
|
239
323
|
}
|
|
240
324
|
}
|