@forwardimpact/libsyntheticrender 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/format.js +84 -0
- package/index.js +7 -0
- package/package.json +37 -0
- package/render/dataset-renderers.js +187 -0
- package/render/enricher.js +384 -0
- package/render/html.js +458 -0
- package/render/industry-data.js +434 -0
- package/render/link-assigner.js +350 -0
- package/render/markdown.js +126 -0
- package/render/pathway.js +124 -0
- package/render/raw.js +465 -0
- package/render/renderer.js +122 -0
- package/render/validate-links.js +329 -0
- package/templates/article.html +31 -0
- package/templates/blog-post.html +34 -0
- package/templates/blog.html +27 -0
- package/templates/briefing.md +20 -0
- package/templates/comments.html +15 -0
- package/templates/courses.html +37 -0
- package/templates/departments.html +29 -0
- package/templates/drugs.html +23 -0
- package/templates/events.html +38 -0
- package/templates/faq.html +22 -0
- package/templates/howto.html +8 -0
- package/templates/leadership.html +8 -0
- package/templates/ontology.md +34 -0
- package/templates/page.html +12 -0
- package/templates/platforms.html +23 -0
- package/templates/project-note.md +19 -0
- package/templates/projects.html +42 -0
- package/templates/readme.md +28 -0
- package/templates/reviews.html +19 -0
- package/templates/roles.html +11 -0
- package/templates/skill-reflection.md +18 -0
- package/templates/weekly.md +18 -0
- package/test/dataset-renderers.test.js +214 -0
- package/test/validate.test.js +396 -0
- package/validate.js +535 -0
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enricher — Pass 2 LLM enrichment of prose blocks.
|
|
3
|
+
*
|
|
4
|
+
* Finds all elements with `data-enrich` attributes in Pass 1 HTML,
|
|
5
|
+
* calls ProseEngine to generate rich prose with inline microdata,
|
|
6
|
+
* and replaces placeholder content.
|
|
7
|
+
*
|
|
8
|
+
* @module libuniverse/render/enricher
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Regex to find data-enrich blocks and their content.
|
|
13
|
+
* Matches: <div ... data-enrich="key"> ... </div>
|
|
14
|
+
*/
|
|
15
|
+
const ENRICH_PATTERN =
|
|
16
|
+
/(<div[^>]+data-enrich="([^"]+)"[^>]*>)([\s\S]*?)(<\/div>)/g;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Build entity context for enrichment from linked entities.
|
|
20
|
+
* @param {string} enrichKey
|
|
21
|
+
* @param {object} linked - LinkedEntities from link-assigner
|
|
22
|
+
* @returns {{ entityType: string, entityName: string, mentionTargets: object[] } | null}
|
|
23
|
+
*/
|
|
24
|
+
function buildEnrichContext(enrichKey, linked) {
|
|
25
|
+
const [type, ...rest] = enrichKey.split("_");
|
|
26
|
+
const id = rest.join("_");
|
|
27
|
+
|
|
28
|
+
switch (type) {
|
|
29
|
+
case "project": {
|
|
30
|
+
const proj = linked.projects.find((p) => p.id === id);
|
|
31
|
+
if (!proj) return null;
|
|
32
|
+
const mentions = [
|
|
33
|
+
...proj.drugLinks.map((d) => ({
|
|
34
|
+
type: "Drug",
|
|
35
|
+
name: d.name,
|
|
36
|
+
iri: d.iri,
|
|
37
|
+
})),
|
|
38
|
+
...proj.platformLinks.map((p) => ({
|
|
39
|
+
type: "SoftwareApplication",
|
|
40
|
+
name: p.name,
|
|
41
|
+
iri: p.iri,
|
|
42
|
+
})),
|
|
43
|
+
...proj.members
|
|
44
|
+
.slice(0, 3)
|
|
45
|
+
.map((m) => ({ type: "Person", name: m.name, iri: m.iri })),
|
|
46
|
+
];
|
|
47
|
+
return {
|
|
48
|
+
entityType: "Project",
|
|
49
|
+
entityName: proj.name,
|
|
50
|
+
mentionTargets: mentions,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
case "platform": {
|
|
54
|
+
const plat = linked.platforms.find((p) => p.id === id);
|
|
55
|
+
if (!plat) return null;
|
|
56
|
+
const deps = plat.dependencies || [];
|
|
57
|
+
const depPlatforms = deps
|
|
58
|
+
.map((d) =>
|
|
59
|
+
linked.platforms.find(
|
|
60
|
+
(p) => p.id === (typeof d === "string" ? d : d.id),
|
|
61
|
+
),
|
|
62
|
+
)
|
|
63
|
+
.filter(Boolean);
|
|
64
|
+
const mentions = [
|
|
65
|
+
...depPlatforms.map((p) => ({
|
|
66
|
+
type: "SoftwareApplication",
|
|
67
|
+
name: p.name,
|
|
68
|
+
iri: p.iri,
|
|
69
|
+
})),
|
|
70
|
+
...(plat.projectLinks || [])
|
|
71
|
+
.slice(0, 2)
|
|
72
|
+
.map((p) => ({ type: "Project", name: p.name, iri: p.iri })),
|
|
73
|
+
...(plat.drugLinks || [])
|
|
74
|
+
.slice(0, 2)
|
|
75
|
+
.map((d) => ({ type: "Drug", name: d.name, iri: d.iri })),
|
|
76
|
+
];
|
|
77
|
+
return {
|
|
78
|
+
entityType: "SoftwareApplication",
|
|
79
|
+
entityName: plat.name,
|
|
80
|
+
mentionTargets: mentions,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
case "drug": {
|
|
84
|
+
const drug = linked.drugs.find((d) => d.id === id);
|
|
85
|
+
if (!drug) return null;
|
|
86
|
+
const mentions = [
|
|
87
|
+
...(drug.projectLinks || [])
|
|
88
|
+
.slice(0, 2)
|
|
89
|
+
.map((p) => ({ type: "Project", name: p.name, iri: p.iri })),
|
|
90
|
+
...(drug.platformLinks || []).slice(0, 2).map((p) => ({
|
|
91
|
+
type: "SoftwareApplication",
|
|
92
|
+
name: p.name,
|
|
93
|
+
iri: p.iri,
|
|
94
|
+
})),
|
|
95
|
+
];
|
|
96
|
+
if (drug.parentDrug) {
|
|
97
|
+
const parent = linked.drugs.find((d) => d.id === drug.parentDrug);
|
|
98
|
+
if (parent)
|
|
99
|
+
mentions.push({ type: "Drug", name: parent.name, iri: parent.iri });
|
|
100
|
+
}
|
|
101
|
+
return {
|
|
102
|
+
entityType: "Drug",
|
|
103
|
+
entityName: drug.name,
|
|
104
|
+
mentionTargets: mentions,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
case "course": {
|
|
108
|
+
const course = linked.courses.find((c) => c.id === id);
|
|
109
|
+
if (!course) return null;
|
|
110
|
+
const mentions = [
|
|
111
|
+
...(course.platformLink
|
|
112
|
+
? [
|
|
113
|
+
{
|
|
114
|
+
type: "SoftwareApplication",
|
|
115
|
+
name: course.platformLink.name,
|
|
116
|
+
iri: course.platformLink.iri,
|
|
117
|
+
},
|
|
118
|
+
]
|
|
119
|
+
: []),
|
|
120
|
+
...(course.drugLink
|
|
121
|
+
? [
|
|
122
|
+
{
|
|
123
|
+
type: "Drug",
|
|
124
|
+
name: course.drugLink.name,
|
|
125
|
+
iri: course.drugLink.iri,
|
|
126
|
+
},
|
|
127
|
+
]
|
|
128
|
+
: []),
|
|
129
|
+
...course.attendees
|
|
130
|
+
.slice(0, 2)
|
|
131
|
+
.map((a) => ({ type: "Person", name: a.name, iri: a.iri })),
|
|
132
|
+
];
|
|
133
|
+
return {
|
|
134
|
+
entityType: "Course",
|
|
135
|
+
entityName: course.title,
|
|
136
|
+
mentionTargets: mentions,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
case "event": {
|
|
140
|
+
const idx = parseInt(id, 10) - 1;
|
|
141
|
+
const event = linked.events[idx];
|
|
142
|
+
if (!event) return null;
|
|
143
|
+
const mentions = [
|
|
144
|
+
{
|
|
145
|
+
type: "Person",
|
|
146
|
+
name: event.organizer.name,
|
|
147
|
+
iri: event.organizer.iri,
|
|
148
|
+
},
|
|
149
|
+
...event.aboutProjects
|
|
150
|
+
.slice(0, 2)
|
|
151
|
+
.map((p) => ({ type: "Project", name: p.name, iri: p.iri })),
|
|
152
|
+
...event.aboutDrugs
|
|
153
|
+
.slice(0, 2)
|
|
154
|
+
.map((d) => ({ type: "Drug", name: d.name, iri: d.iri })),
|
|
155
|
+
...event.attendees
|
|
156
|
+
.slice(0, 2)
|
|
157
|
+
.map((a) => ({ type: "Person", name: a.name, iri: a.iri })),
|
|
158
|
+
];
|
|
159
|
+
return {
|
|
160
|
+
entityType: "Event",
|
|
161
|
+
entityName: event.title,
|
|
162
|
+
mentionTargets: mentions,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
case "blog": {
|
|
166
|
+
const idx = parseInt(id, 10) - 1;
|
|
167
|
+
const post = linked.blogPosts[idx];
|
|
168
|
+
if (!post) return null;
|
|
169
|
+
const mentions = [
|
|
170
|
+
...post.aboutDrugs
|
|
171
|
+
.slice(0, 2)
|
|
172
|
+
.map((d) => ({ type: "Drug", name: d.name, iri: d.iri })),
|
|
173
|
+
...post.aboutPlatforms.slice(0, 2).map((p) => ({
|
|
174
|
+
type: "SoftwareApplication",
|
|
175
|
+
name: p.name,
|
|
176
|
+
iri: p.iri,
|
|
177
|
+
})),
|
|
178
|
+
...post.mentionsPeople
|
|
179
|
+
.slice(0, 3)
|
|
180
|
+
.map((p) => ({ type: "Person", name: p.name, iri: p.iri })),
|
|
181
|
+
];
|
|
182
|
+
return {
|
|
183
|
+
entityType: "BlogPosting",
|
|
184
|
+
entityName: post.headline,
|
|
185
|
+
mentionTargets: mentions,
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
case "article": {
|
|
189
|
+
const article = linked.articles?.find((a) => a.topic === id);
|
|
190
|
+
if (!article) return null;
|
|
191
|
+
const mentions = [
|
|
192
|
+
...(article.drugLinks || [])
|
|
193
|
+
.slice(0, 2)
|
|
194
|
+
.map((d) => ({ type: "Drug", name: d.name, iri: d.iri })),
|
|
195
|
+
...(article.platformLinks || []).slice(0, 2).map((p) => ({
|
|
196
|
+
type: "SoftwareApplication",
|
|
197
|
+
name: p.name,
|
|
198
|
+
iri: p.iri,
|
|
199
|
+
})),
|
|
200
|
+
...(article.projectLinks || [])
|
|
201
|
+
.slice(0, 2)
|
|
202
|
+
.map((p) => ({ type: "Project", name: p.name, iri: p.iri })),
|
|
203
|
+
...(article.authorLinks || [])
|
|
204
|
+
.slice(0, 2)
|
|
205
|
+
.map((a) => ({ type: "Person", name: a.name, iri: a.iri })),
|
|
206
|
+
];
|
|
207
|
+
return {
|
|
208
|
+
entityType: "ScholarlyArticle",
|
|
209
|
+
entityName: article.title,
|
|
210
|
+
mentionTargets: mentions,
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
default:
|
|
214
|
+
return null;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Build the LLM prompt for enriching a prose block.
|
|
220
|
+
* @param {object} ctx - Context from buildEnrichContext
|
|
221
|
+
* @param {string} placeholder - Current placeholder text
|
|
222
|
+
* @param {string} domain - Universe domain for IRI constraint
|
|
223
|
+
* @returns {object[]} Messages array for ProseEngine.generateStructured
|
|
224
|
+
*/
|
|
225
|
+
function buildEnrichMessages(ctx, placeholder, domain) {
|
|
226
|
+
const mentionList = ctx.mentionTargets
|
|
227
|
+
.map((m) => `- ${m.type}: ${m.name} (${m.iri})`)
|
|
228
|
+
.join("\n");
|
|
229
|
+
|
|
230
|
+
const system = `You are a technical writer producing HTML content with Schema.org microdata for a pharmaceutical company knowledge base.
|
|
231
|
+
Output only the inner HTML content — no wrapper tags, no markdown fences.
|
|
232
|
+
Write 300-500 words of detailed, rich prose across multiple paragraphs. Mention entities using inline Schema.org microdata spans.
|
|
233
|
+
Only use the exact IRIs provided. Do not invent new IRIs. All itemid values must start with "https://${domain}/id/".`;
|
|
234
|
+
|
|
235
|
+
const user = `Rewrite this text block for a ${ctx.entityType} document about "${ctx.entityName}".
|
|
236
|
+
|
|
237
|
+
Current text: "${placeholder}"
|
|
238
|
+
|
|
239
|
+
Write 300-500 words of detailed prose across 3-5 paragraphs. Naturally mention these entities using Schema.org microdata:
|
|
240
|
+
|
|
241
|
+
${mentionList}
|
|
242
|
+
|
|
243
|
+
Use this pattern for inline mentions:
|
|
244
|
+
<span itemprop="mentions" itemscope itemtype="https://schema.org/{{type}}" itemid="{{iri}}"><span itemprop="name">{{name}}</span></span>
|
|
245
|
+
|
|
246
|
+
Output only the HTML content for the block — no wrapper tags.`;
|
|
247
|
+
|
|
248
|
+
return [
|
|
249
|
+
{ role: "system", content: system },
|
|
250
|
+
{ role: "user", content: user },
|
|
251
|
+
];
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Strip or sanitize itemid attributes that don't match the universe domain.
|
|
256
|
+
* @param {string} html
|
|
257
|
+
* @param {string} domain
|
|
258
|
+
* @returns {string}
|
|
259
|
+
*/
|
|
260
|
+
function stripOffDomainIris(html, domain) {
|
|
261
|
+
const prefix = `https://${domain}/id/`;
|
|
262
|
+
return html
|
|
263
|
+
.replace(
|
|
264
|
+
/(<[^>]*?)\s+itemscope\s+itemtype="[^"]*"\s+itemid="([^"]*)"/g,
|
|
265
|
+
(match, before, iri) => {
|
|
266
|
+
if (iri.startsWith(prefix)) return match;
|
|
267
|
+
return before;
|
|
268
|
+
},
|
|
269
|
+
)
|
|
270
|
+
.replace(
|
|
271
|
+
/(<[^>]*?)\s+itemid="([^"]*)"\s+itemscope\s+itemtype="[^"]*"/g,
|
|
272
|
+
(match, before, iri) => {
|
|
273
|
+
if (iri.startsWith(prefix)) return match;
|
|
274
|
+
return before;
|
|
275
|
+
},
|
|
276
|
+
)
|
|
277
|
+
.replace(/(<[^>]*?)\s+itemid="([^"]*)"/g, (match, before, iri) => {
|
|
278
|
+
if (iri.startsWith(prefix)) return match;
|
|
279
|
+
return before;
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Close any unclosed HTML tags in LLM-generated prose.
|
|
285
|
+
* Tracks open/close tags and appends missing closing tags.
|
|
286
|
+
* @param {string} html
|
|
287
|
+
* @returns {string}
|
|
288
|
+
*/
|
|
289
|
+
function balanceTags(html) {
|
|
290
|
+
const VOID = new Set([
|
|
291
|
+
"area",
|
|
292
|
+
"base",
|
|
293
|
+
"br",
|
|
294
|
+
"col",
|
|
295
|
+
"embed",
|
|
296
|
+
"hr",
|
|
297
|
+
"img",
|
|
298
|
+
"input",
|
|
299
|
+
"link",
|
|
300
|
+
"meta",
|
|
301
|
+
"source",
|
|
302
|
+
"track",
|
|
303
|
+
"wbr",
|
|
304
|
+
]);
|
|
305
|
+
const stack = [];
|
|
306
|
+
const tagRe = /<\/?([a-zA-Z][a-zA-Z0-9]*)\b[^>]*\/?>/g;
|
|
307
|
+
let m;
|
|
308
|
+
while ((m = tagRe.exec(html)) !== null) {
|
|
309
|
+
const full = m[0];
|
|
310
|
+
const tag = m[1].toLowerCase();
|
|
311
|
+
if (VOID.has(tag) || full.endsWith("/>")) continue;
|
|
312
|
+
if (full.startsWith("</")) {
|
|
313
|
+
const idx = stack.lastIndexOf(tag);
|
|
314
|
+
if (idx !== -1) stack.splice(idx, 1);
|
|
315
|
+
} else {
|
|
316
|
+
stack.push(tag);
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
let suffix = "";
|
|
320
|
+
for (let i = stack.length - 1; i >= 0; i--) {
|
|
321
|
+
suffix += `</${stack[i]}>`;
|
|
322
|
+
}
|
|
323
|
+
return html + suffix;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
/**
|
|
327
|
+
* Enrich all prose blocks in HTML documents via LLM.
|
|
328
|
+
* @param {Map<string, string>} htmlFiles - filename → HTML content from Pass 1
|
|
329
|
+
* @param {object} linked - LinkedEntities from link-assigner
|
|
330
|
+
* @param {import('../engine/prose.js').ProseEngine} proseEngine
|
|
331
|
+
* @param {string} domain - Universe domain
|
|
332
|
+
* @param {object} logger - Logger instance
|
|
333
|
+
* @returns {Promise<Map<string, string>>} filename → enriched HTML content
|
|
334
|
+
*/
|
|
335
|
+
export async function enrichDocuments(
|
|
336
|
+
htmlFiles,
|
|
337
|
+
linked,
|
|
338
|
+
proseEngine,
|
|
339
|
+
domain,
|
|
340
|
+
logger,
|
|
341
|
+
) {
|
|
342
|
+
if (!logger) throw new Error("logger is required");
|
|
343
|
+
const enriched = new Map();
|
|
344
|
+
let totalBlocks = 0;
|
|
345
|
+
let enrichedBlocks = 0;
|
|
346
|
+
|
|
347
|
+
for (const [filename, html] of htmlFiles) {
|
|
348
|
+
let result = html;
|
|
349
|
+
const matches = [...html.matchAll(ENRICH_PATTERN)];
|
|
350
|
+
totalBlocks += matches.length;
|
|
351
|
+
|
|
352
|
+
for (const match of matches) {
|
|
353
|
+
const [, openTag, enrichKey, content, closeTag] = match;
|
|
354
|
+
const ctx = buildEnrichContext(enrichKey, linked);
|
|
355
|
+
if (!ctx) continue;
|
|
356
|
+
|
|
357
|
+
const placeholder = content.replace(/<[^>]+>/g, "").trim();
|
|
358
|
+
const messages = buildEnrichMessages(ctx, placeholder, domain);
|
|
359
|
+
let prose = await proseEngine.generateStructured(
|
|
360
|
+
`enrich_${enrichKey}`,
|
|
361
|
+
messages,
|
|
362
|
+
);
|
|
363
|
+
|
|
364
|
+
if (prose) {
|
|
365
|
+
if (domain) prose = stripOffDomainIris(prose, domain);
|
|
366
|
+
prose = balanceTags(prose);
|
|
367
|
+
const cleanOpen = openTag.replace(/\s*data-enrich="[^"]*"/, "");
|
|
368
|
+
result = result.replace(
|
|
369
|
+
match[0],
|
|
370
|
+
`${cleanOpen}\n ${prose}\n ${closeTag}`,
|
|
371
|
+
);
|
|
372
|
+
enrichedBlocks++;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
enriched.set(filename, result);
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
logger.info(
|
|
380
|
+
"enricher",
|
|
381
|
+
`Enriched ${enrichedBlocks}/${totalBlocks} prose blocks`,
|
|
382
|
+
);
|
|
383
|
+
return enriched;
|
|
384
|
+
}
|