portable-agent-layer 0.41.1 → 0.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,409 @@
1
+ /**
2
+ * Knowledge ingest — merge extracted entities into the markdown store.
3
+ *
4
+ * Called by the `pal cli knowledge ingest` subcommand (which the `entities`
5
+ * skill drives). Accepts the canonical entity extraction JSON shape and:
6
+ *
7
+ * 1. Upserts each person and company as a markdown file (Phase 1 store).
8
+ * 2. Preserves all rich fields (role, social, context, industry, etc.) as
9
+ * frontmatter — old behavior dropped these.
10
+ * 3. Auto-creates a `part-of` related edge when a person record carries a
11
+ * `company` field (and stub-creates the company if missing).
12
+ * 4. Appends a per-source log section to the body, fingerprinted with the
13
+ * sourceId so re-ingesting the same source is idempotent.
14
+ *
15
+ * Merge rule: a non-null value in the new payload updates the entity;
16
+ * null/undefined leaves the prior value intact. Arrays (socials, tags) are
17
+ * unioned, not overwritten.
18
+ */
19
+
20
+ import {
21
+ type Entity,
22
+ type EntityFrontmatter,
23
+ list,
24
+ load,
25
+ type Related,
26
+ save,
27
+ slugify,
28
+ } from "./lib";
29
+
30
+ // --- Public input shape -----------------------------------------------------
31
+
32
+ export interface PersonInput {
33
+ name: string;
34
+ role?: string | null;
35
+ title?: string | null;
36
+ company?: string | null;
37
+ social?: Record<string, string | null> | null;
38
+ context?: string | null;
39
+ importance?: "primary" | "secondary" | "minor" | null;
40
+ [extra: string]: unknown;
41
+ }
42
+
43
+ export interface CompanyInput {
44
+ name: string;
45
+ domain?: string | null;
46
+ industry?: string | null;
47
+ context?: string | null;
48
+ mentioned_as?: string | null;
49
+ sentiment?: "positive" | "neutral" | "negative" | "mixed" | null;
50
+ [extra: string]: unknown;
51
+ }
52
+
53
+ export interface IngestInput {
54
+ people?: PersonInput[];
55
+ companies?: CompanyInput[];
56
+ }
57
+
58
+ export interface IngestResult {
59
+ people: Array<{ slug: string; created: boolean }>;
60
+ companies: Array<{ slug: string; created: boolean }>;
61
+ }
62
+
63
+ // --- Constants --------------------------------------------------------------
64
+
65
+ const SOURCE_MARKER_PREFIX = "<!-- src:";
66
+ const SOURCE_MARKER_SUFFIX = " -->";
67
+
68
+ // --- Field merge ------------------------------------------------------------
69
+
70
+ /** Non-null new wins; null/undefined leaves prior intact. */
71
+ function mergeScalar<T>(prior: T | undefined, next: T | null | undefined): T | undefined {
72
+ if (next === null || next === undefined) return prior;
73
+ return next;
74
+ }
75
+
76
+ /** Union arrays of strings, preserving first-seen order. */
77
+ function mergeStringArray(
78
+ prior: string[] | undefined,
79
+ next: string[] | undefined
80
+ ): string[] {
81
+ const out = [...(prior ?? [])];
82
+ const seen = new Set(out);
83
+ for (const item of next ?? []) {
84
+ if (!seen.has(item)) {
85
+ out.push(item);
86
+ seen.add(item);
87
+ }
88
+ }
89
+ return out;
90
+ }
91
+
92
+ /** Merge two `socials` objects: non-null values from new override prior. */
93
+ function mergeSocials(
94
+ prior: unknown,
95
+ next: Record<string, string | null> | null | undefined
96
+ ): Record<string, string> {
97
+ const out: Record<string, string> = {};
98
+ if (prior && typeof prior === "object") {
99
+ for (const [k, v] of Object.entries(prior as Record<string, unknown>)) {
100
+ if (typeof v === "string" && v.length > 0) out[k] = v;
101
+ }
102
+ }
103
+ for (const [k, v] of Object.entries(next ?? {})) {
104
+ if (typeof v === "string" && v.length > 0) out[k] = v;
105
+ }
106
+ return out;
107
+ }
108
+
109
+ /** Add a `Related` edge if not already present (by slug+type). */
110
+ function addRelated(list: Related[], rel: Related): Related[] {
111
+ for (const existing of list) {
112
+ if (existing.slug === rel.slug && existing.type === rel.type) return list;
113
+ }
114
+ return [...list, rel];
115
+ }
116
+
117
+ /**
118
+ * Split a company industry string into atomic, topic-prefixed tags.
119
+ * Whitespace and `/` are separators; hyphens preserved (so `ai-research`
120
+ * stays one token). Each token gets the `topic:` prefix so the graph
121
+ * builder can recognize them as facet-style filters and skip them when
122
+ * generating tag co-occurrence edges (ISC-18 — prevents phantom edges
123
+ * between unrelated entities that merely share an industry word).
124
+ */
125
+ function industryToTopicTags(industry: string): string[] {
126
+ const seen = new Set<string>();
127
+ const out: string[] = [];
128
+ for (const token of industry.toLowerCase().split(/[\s/]+/)) {
129
+ if (!token) continue;
130
+ const prefixed = `topic:${token}`;
131
+ if (!seen.has(prefixed)) {
132
+ seen.add(prefixed);
133
+ out.push(prefixed);
134
+ }
135
+ }
136
+ return out;
137
+ }
138
+
139
+ // --- Source log -------------------------------------------------------------
140
+
141
+ function sourceMarker(sourceId: string): string {
142
+ return `${SOURCE_MARKER_PREFIX}${sourceId}${SOURCE_MARKER_SUFFIX}`;
143
+ }
144
+
145
+ function bodyHasSource(body: string, sourceId: string): boolean {
146
+ return body.includes(sourceMarker(sourceId));
147
+ }
148
+
149
+ /**
150
+ * Append a per-source section to the body. Idempotent on `sourceId`.
151
+ *
152
+ * `date` defaults to today's ISO date. The legacy-store migration overrides
153
+ * it with each entity's original `first_seen` so replayed provenance retains
154
+ * its real date instead of stamping today on every record.
155
+ */
156
+ export function appendSourceLog(
157
+ body: string,
158
+ sourceId: string,
159
+ contextSnippet: string | null | undefined,
160
+ attrs: Record<string, string | null | undefined>,
161
+ date?: string
162
+ ): string {
163
+ if (bodyHasSource(body, sourceId)) return body;
164
+ const dateStr = (date ?? new Date().toISOString()).slice(0, 10);
165
+ const attrLine = Object.entries(attrs)
166
+ .filter(([, v]) => typeof v === "string" && v.length > 0)
167
+ .map(([k, v]) => `${k}: ${v}`)
168
+ .join(" · ");
169
+ const lines: string[] = ["", `### ${dateStr} — ${sourceId}`, sourceMarker(sourceId)];
170
+ if (attrLine) lines.push(attrLine);
171
+ if (contextSnippet?.trim()) lines.push("", contextSnippet.trim());
172
+ const prefix = body.endsWith("\n") || body === "" ? body : `${body}\n`;
173
+ return `${prefix}${lines.join("\n")}\n`;
174
+ }
175
+
176
+ // --- Per-domain upsert ------------------------------------------------------
177
+
178
+ interface UpsertResult {
179
+ slug: string;
180
+ created: boolean;
181
+ entity: Entity;
182
+ }
183
+
184
+ function newPersonEntity(input: PersonInput, slug: string): Entity {
185
+ const now = new Date().toISOString();
186
+ const fm: EntityFrontmatter = {
187
+ title: input.name,
188
+ type: "person",
189
+ tags: [],
190
+ created: now,
191
+ updated: now,
192
+ quality: 5,
193
+ status: "seedling",
194
+ related: [],
195
+ };
196
+ if (input.role) fm.role = input.role;
197
+ if (input.title) fm.position = input.title;
198
+ if (input.company) fm.company = input.company;
199
+ if (input.importance) fm.importance = input.importance;
200
+ const socials = mergeSocials(undefined, input.social);
201
+ if (Object.keys(socials).length > 0) {
202
+ fm.socials = Object.entries(socials).map(([k, v]) => `${k}:${v}`);
203
+ }
204
+ return { domain: "People", slug, frontmatter: fm, body: "" };
205
+ }
206
+
207
+ function newCompanyEntity(input: CompanyInput, slug: string): Entity {
208
+ const now = new Date().toISOString();
209
+ const fm: EntityFrontmatter = {
210
+ title: input.name,
211
+ type: "company",
212
+ tags: input.industry ? industryToTopicTags(input.industry) : [],
213
+ created: now,
214
+ updated: now,
215
+ quality: 5,
216
+ status: "seedling",
217
+ related: [],
218
+ };
219
+ if (input.domain) fm.domain_name = input.domain;
220
+ if (input.industry) fm.industry = input.industry;
221
+ if (input.mentioned_as) fm.mentioned_as = input.mentioned_as;
222
+ if (input.sentiment) fm.sentiment = input.sentiment;
223
+ return { domain: "Companies", slug, frontmatter: fm, body: "" };
224
+ }
225
+
226
+ function mergePerson(prior: Entity, input: PersonInput): Entity {
227
+ const fm = { ...prior.frontmatter };
228
+ fm.role = mergeScalar(fm.role, input.role);
229
+ fm.position = mergeScalar(fm.position, input.title);
230
+ fm.company = mergeScalar(fm.company, input.company);
231
+ fm.importance = mergeScalar(fm.importance, input.importance);
232
+ const socials = mergeSocials(
233
+ Array.isArray(fm.socials)
234
+ ? Object.fromEntries(
235
+ (fm.socials as string[])
236
+ .map((entry): [string, string] | null => {
237
+ // Split on FIRST ':' only — values like 'https://...' contain
238
+ // additional colons that must stay inside the value.
239
+ const idx = entry.indexOf(":");
240
+ if (idx <= 0) return null;
241
+ return [entry.slice(0, idx), entry.slice(idx + 1)];
242
+ })
243
+ .filter((kv): kv is [string, string] => kv !== null && kv[1].length > 0)
244
+ )
245
+ : (fm.socials ?? {}),
246
+ input.social
247
+ );
248
+ if (Object.keys(socials).length > 0) {
249
+ fm.socials = Object.entries(socials).map(([k, v]) => `${k}:${v}`);
250
+ }
251
+ return { ...prior, frontmatter: fm };
252
+ }
253
+
254
+ function mergeCompany(prior: Entity, input: CompanyInput): Entity {
255
+ const fm = { ...prior.frontmatter };
256
+ fm.domain_name = mergeScalar(fm.domain_name, input.domain);
257
+ fm.industry = mergeScalar(fm.industry, input.industry);
258
+ fm.mentioned_as = mergeScalar(fm.mentioned_as, input.mentioned_as);
259
+ fm.sentiment = mergeScalar(fm.sentiment, input.sentiment);
260
+ if (input.industry) {
261
+ fm.tags = mergeStringArray(fm.tags, industryToTopicTags(input.industry));
262
+ }
263
+ return { ...prior, frontmatter: fm };
264
+ }
265
+
266
+ function upsertPerson(
267
+ input: PersonInput,
268
+ sourceId: string,
269
+ rootDir?: string
270
+ ): UpsertResult {
271
+ const slug = slugify(input.name);
272
+ if (!slug) throw new Error(`ingest: cannot slugify person name "${input.name}"`);
273
+ const prior = load("People", slug, rootDir);
274
+ const created = prior === null;
275
+ let entity = prior ? mergePerson(prior, input) : newPersonEntity(input, slug);
276
+ entity = {
277
+ ...entity,
278
+ frontmatter: {
279
+ ...entity.frontmatter,
280
+ updated: new Date().toISOString(),
281
+ },
282
+ body: appendSourceLog(entity.body, sourceId, input.context, {
283
+ role: input.role ?? null,
284
+ importance: input.importance ?? null,
285
+ }),
286
+ };
287
+ save(entity, rootDir);
288
+ return { slug, created, entity };
289
+ }
290
+
291
+ function upsertCompany(
292
+ input: CompanyInput,
293
+ sourceId: string,
294
+ rootDir?: string
295
+ ): UpsertResult {
296
+ const baseKey = input.domain?.trim() ? input.domain : input.name;
297
+ const slug = slugify(baseKey);
298
+ if (!slug) throw new Error(`ingest: cannot slugify company "${input.name}"`);
299
+ const prior = load("Companies", slug, rootDir);
300
+ const created = prior === null;
301
+ let entity = prior ? mergeCompany(prior, input) : newCompanyEntity(input, slug);
302
+ entity = {
303
+ ...entity,
304
+ frontmatter: {
305
+ ...entity.frontmatter,
306
+ updated: new Date().toISOString(),
307
+ },
308
+ body: appendSourceLog(entity.body, sourceId, input.context, {
309
+ mentioned_as: input.mentioned_as ?? null,
310
+ sentiment: input.sentiment ?? null,
311
+ }),
312
+ };
313
+ save(entity, rootDir);
314
+ return { slug, created, entity };
315
+ }
316
+
317
+ /**
318
+ * Ensure a `part-of` edge from person → company.
319
+ *
320
+ * `nameToSlug` is the lookup built from companies ingested in this call — it
321
+ * lets us prefer a domain-derived slug ("acme.example" → "acme-example") over
322
+ * the naive name-derived one ("Acme Labs" → "acme-labs") when the same
323
+ * payload defines both. Falls back to slugify(name) when no match, and
324
+ * stub-creates the company so the edge has a target.
325
+ */
326
+ /**
327
+ * Find an existing Companies entity whose frontmatter title matches `name`
328
+ * (case-insensitive). Used by `linkPersonToCompany` to avoid stubbing a
329
+ * duplicate when the canonical company already lives at a non-name-derived
330
+ * slug (e.g. domain-derived "acme-example" for "Acme Labs"). ISC-21.
331
+ */
332
+ function findExistingCompanyByTitle(name: string, rootDir?: string): string | null {
333
+ const target = name.toLowerCase();
334
+ for (const e of list("Companies", rootDir)) {
335
+ if (e.frontmatter.title.toLowerCase() === target) return e.slug;
336
+ }
337
+ return null;
338
+ }
339
+
340
+ function linkPersonToCompany(
341
+ personSlug: string,
342
+ companyName: string,
343
+ nameToSlug: Map<string, string>,
344
+ sourceId: string,
345
+ rootDir?: string
346
+ ): void {
347
+ // Resolution order: (1) in-call lookup map (domain-derived slug wins when
348
+ // the same payload defines both), (2) existing-store title scan (ISC-21
349
+ // — re-ingest of a person referencing an already-known company), (3)
350
+ // fall back to slugify(name) and stub the company.
351
+ const companySlug =
352
+ nameToSlug.get(companyName.toLowerCase()) ??
353
+ findExistingCompanyByTitle(companyName, rootDir) ??
354
+ slugify(companyName);
355
+ if (!companySlug) return;
356
+ if (!load("Companies", companySlug, rootDir)) {
357
+ upsertCompany({ name: companyName }, sourceId, rootDir);
358
+ }
359
+ const person = load("People", personSlug, rootDir);
360
+ if (!person) return;
361
+ // ISC-18: inherit ONLY the company's topic:* tags. Structural tags stay
362
+ // company-scoped so we don't create phantom graph edges between unrelated
363
+ // people sharing only their employer's slug or other non-facet labels.
364
+ const company = load("Companies", companySlug, rootDir);
365
+ const inheritedTopicTags = company
366
+ ? company.frontmatter.tags.filter((t) => t.startsWith("topic:"))
367
+ : [];
368
+ const updated: Entity = {
369
+ ...person,
370
+ frontmatter: {
371
+ ...person.frontmatter,
372
+ related: addRelated(person.frontmatter.related, {
373
+ slug: companySlug,
374
+ type: "part-of",
375
+ }),
376
+ tags: mergeStringArray(person.frontmatter.tags, inheritedTopicTags),
377
+ },
378
+ };
379
+ save(updated, rootDir);
380
+ }
381
+
382
+ // --- Public API -------------------------------------------------------------
383
+
384
+ export function ingestEntities(
385
+ input: IngestInput,
386
+ sourceId: string,
387
+ rootDir?: string
388
+ ): IngestResult {
389
+ // Ingest companies first so person→company links can resolve to the
390
+ // canonical (possibly domain-derived) slug rather than guessing.
391
+ const companies = (input.companies ?? []).map((c) => {
392
+ const r = upsertCompany(c, sourceId, rootDir);
393
+ return { slug: r.slug, created: r.created, name: c.name };
394
+ });
395
+ const nameToSlug = new Map<string, string>(
396
+ companies.map((c) => [c.name.toLowerCase(), c.slug])
397
+ );
398
+ const people = (input.people ?? []).map((p) => {
399
+ const r = upsertPerson(p, sourceId, rootDir);
400
+ if (p.company) {
401
+ linkPersonToCompany(r.slug, p.company, nameToSlug, sourceId, rootDir);
402
+ }
403
+ return { slug: r.slug, created: r.created };
404
+ });
405
+ return {
406
+ people,
407
+ companies: companies.map(({ slug, created }) => ({ slug, created })),
408
+ };
409
+ }