portable-agent-layer 0.41.0 → 0.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.husky/install.mjs +8 -0
- package/README.md +2 -1
- package/assets/skills/analyze-youtube/SKILL.md +1 -1
- package/assets/skills/entities/SKILL.md +95 -0
- package/assets/templates/PAL/README.md +1 -1
- package/package.json +10 -12
- package/src/cli/index.ts +8 -0
- package/src/cli/knowledge.ts +620 -0
- package/src/cli/migrate.ts +188 -3
- package/src/hooks/handlers/reflect-trigger.ts +1 -0
- package/src/hooks/handlers/update-check.ts +4 -0
- package/src/hooks/lib/detached-inference.ts +1 -0
- package/src/hooks/lib/export.ts +1 -1
- package/src/hooks/lib/inference.ts +1 -0
- package/src/hooks/lib/paths.ts +2 -1
- package/src/hooks/lib/retrieval-index.ts +1 -0
- package/src/targets/lib.ts +23 -36
- package/src/tools/knowledge/graph.ts +395 -0
- package/src/tools/knowledge/ingest.ts +409 -0
- package/src/tools/knowledge/lib.ts +493 -0
- package/assets/skills/extract-entities/SKILL.md +0 -62
- package/assets/skills/extract-entities/tools/entity-save.ts +0 -110
- package/src/hooks/lib/entities.ts +0 -304
- package/src/tools/export.ts +0 -40
- package/src/tools/import.ts +0 -111
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge ingest — merge extracted entities into the markdown store.
|
|
3
|
+
*
|
|
4
|
+
* Called by the `pal cli knowledge ingest` subcommand (which the `entities`
|
|
5
|
+
* skill drives). Accepts the canonical entity extraction JSON shape and:
|
|
6
|
+
*
|
|
7
|
+
* 1. Upserts each person and company as a markdown file (Phase 1 store).
|
|
8
|
+
* 2. Preserves all rich fields (role, social, context, industry, etc.) as
|
|
9
|
+
* frontmatter — old behavior dropped these.
|
|
10
|
+
* 3. Auto-creates a `part-of` related edge when a person record carries a
|
|
11
|
+
* `company` field (and stub-creates the company if missing).
|
|
12
|
+
* 4. Appends a per-source log section to the body, fingerprinted with the
|
|
13
|
+
* sourceId so re-ingesting the same source is idempotent.
|
|
14
|
+
*
|
|
15
|
+
* Merge rule: a non-null value in the new payload updates the entity;
|
|
16
|
+
* null/undefined leaves the prior value intact. Arrays (socials, tags) are
|
|
17
|
+
* unioned, not overwritten.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import {
|
|
21
|
+
type Entity,
|
|
22
|
+
type EntityFrontmatter,
|
|
23
|
+
list,
|
|
24
|
+
load,
|
|
25
|
+
type Related,
|
|
26
|
+
save,
|
|
27
|
+
slugify,
|
|
28
|
+
} from "./lib";
|
|
29
|
+
|
|
30
|
+
// --- Public input shape -----------------------------------------------------
|
|
31
|
+
|
|
32
|
+
export interface PersonInput {
|
|
33
|
+
name: string;
|
|
34
|
+
role?: string | null;
|
|
35
|
+
title?: string | null;
|
|
36
|
+
company?: string | null;
|
|
37
|
+
social?: Record<string, string | null> | null;
|
|
38
|
+
context?: string | null;
|
|
39
|
+
importance?: "primary" | "secondary" | "minor" | null;
|
|
40
|
+
[extra: string]: unknown;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export interface CompanyInput {
|
|
44
|
+
name: string;
|
|
45
|
+
domain?: string | null;
|
|
46
|
+
industry?: string | null;
|
|
47
|
+
context?: string | null;
|
|
48
|
+
mentioned_as?: string | null;
|
|
49
|
+
sentiment?: "positive" | "neutral" | "negative" | "mixed" | null;
|
|
50
|
+
[extra: string]: unknown;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export interface IngestInput {
|
|
54
|
+
people?: PersonInput[];
|
|
55
|
+
companies?: CompanyInput[];
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export interface IngestResult {
|
|
59
|
+
people: Array<{ slug: string; created: boolean }>;
|
|
60
|
+
companies: Array<{ slug: string; created: boolean }>;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// --- Constants --------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
const SOURCE_MARKER_PREFIX = "<!-- src:";
|
|
66
|
+
const SOURCE_MARKER_SUFFIX = " -->";
|
|
67
|
+
|
|
68
|
+
// --- Field merge ------------------------------------------------------------
|
|
69
|
+
|
|
70
|
+
/** Non-null new wins; null/undefined leaves prior intact. */
|
|
71
|
+
function mergeScalar<T>(prior: T | undefined, next: T | null | undefined): T | undefined {
|
|
72
|
+
if (next === null || next === undefined) return prior;
|
|
73
|
+
return next;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Union arrays of strings, preserving first-seen order. */
|
|
77
|
+
function mergeStringArray(
|
|
78
|
+
prior: string[] | undefined,
|
|
79
|
+
next: string[] | undefined
|
|
80
|
+
): string[] {
|
|
81
|
+
const out = [...(prior ?? [])];
|
|
82
|
+
const seen = new Set(out);
|
|
83
|
+
for (const item of next ?? []) {
|
|
84
|
+
if (!seen.has(item)) {
|
|
85
|
+
out.push(item);
|
|
86
|
+
seen.add(item);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return out;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** Merge two `socials` objects: non-null values from new override prior. */
|
|
93
|
+
function mergeSocials(
|
|
94
|
+
prior: unknown,
|
|
95
|
+
next: Record<string, string | null> | null | undefined
|
|
96
|
+
): Record<string, string> {
|
|
97
|
+
const out: Record<string, string> = {};
|
|
98
|
+
if (prior && typeof prior === "object") {
|
|
99
|
+
for (const [k, v] of Object.entries(prior as Record<string, unknown>)) {
|
|
100
|
+
if (typeof v === "string" && v.length > 0) out[k] = v;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
for (const [k, v] of Object.entries(next ?? {})) {
|
|
104
|
+
if (typeof v === "string" && v.length > 0) out[k] = v;
|
|
105
|
+
}
|
|
106
|
+
return out;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/** Add a `Related` edge if not already present (by slug+type). */
|
|
110
|
+
function addRelated(list: Related[], rel: Related): Related[] {
|
|
111
|
+
for (const existing of list) {
|
|
112
|
+
if (existing.slug === rel.slug && existing.type === rel.type) return list;
|
|
113
|
+
}
|
|
114
|
+
return [...list, rel];
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Split a company industry string into atomic, topic-prefixed tags.
|
|
119
|
+
* Whitespace and `/` are separators; hyphens preserved (so `ai-research`
|
|
120
|
+
* stays one token). Each token gets the `topic:` prefix so the graph
|
|
121
|
+
* builder can recognize them as facet-style filters and skip them when
|
|
122
|
+
* generating tag co-occurrence edges (ISC-18 — prevents phantom edges
|
|
123
|
+
* between unrelated entities that merely share an industry word).
|
|
124
|
+
*/
|
|
125
|
+
function industryToTopicTags(industry: string): string[] {
|
|
126
|
+
const seen = new Set<string>();
|
|
127
|
+
const out: string[] = [];
|
|
128
|
+
for (const token of industry.toLowerCase().split(/[\s/]+/)) {
|
|
129
|
+
if (!token) continue;
|
|
130
|
+
const prefixed = `topic:${token}`;
|
|
131
|
+
if (!seen.has(prefixed)) {
|
|
132
|
+
seen.add(prefixed);
|
|
133
|
+
out.push(prefixed);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return out;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// --- Source log -------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
function sourceMarker(sourceId: string): string {
|
|
142
|
+
return `${SOURCE_MARKER_PREFIX}${sourceId}${SOURCE_MARKER_SUFFIX}`;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function bodyHasSource(body: string, sourceId: string): boolean {
|
|
146
|
+
return body.includes(sourceMarker(sourceId));
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Append a per-source section to the body. Idempotent on `sourceId`.
|
|
151
|
+
*
|
|
152
|
+
* `date` defaults to today's ISO date. The legacy-store migration overrides
|
|
153
|
+
* it with each entity's original `first_seen` so replayed provenance retains
|
|
154
|
+
* its real date instead of stamping today on every record.
|
|
155
|
+
*/
|
|
156
|
+
export function appendSourceLog(
|
|
157
|
+
body: string,
|
|
158
|
+
sourceId: string,
|
|
159
|
+
contextSnippet: string | null | undefined,
|
|
160
|
+
attrs: Record<string, string | null | undefined>,
|
|
161
|
+
date?: string
|
|
162
|
+
): string {
|
|
163
|
+
if (bodyHasSource(body, sourceId)) return body;
|
|
164
|
+
const dateStr = (date ?? new Date().toISOString()).slice(0, 10);
|
|
165
|
+
const attrLine = Object.entries(attrs)
|
|
166
|
+
.filter(([, v]) => typeof v === "string" && v.length > 0)
|
|
167
|
+
.map(([k, v]) => `${k}: ${v}`)
|
|
168
|
+
.join(" · ");
|
|
169
|
+
const lines: string[] = ["", `### ${dateStr} — ${sourceId}`, sourceMarker(sourceId)];
|
|
170
|
+
if (attrLine) lines.push(attrLine);
|
|
171
|
+
if (contextSnippet?.trim()) lines.push("", contextSnippet.trim());
|
|
172
|
+
const prefix = body.endsWith("\n") || body === "" ? body : `${body}\n`;
|
|
173
|
+
return `${prefix}${lines.join("\n")}\n`;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// --- Per-domain upsert ------------------------------------------------------
|
|
177
|
+
|
|
178
|
+
interface UpsertResult {
|
|
179
|
+
slug: string;
|
|
180
|
+
created: boolean;
|
|
181
|
+
entity: Entity;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function newPersonEntity(input: PersonInput, slug: string): Entity {
|
|
185
|
+
const now = new Date().toISOString();
|
|
186
|
+
const fm: EntityFrontmatter = {
|
|
187
|
+
title: input.name,
|
|
188
|
+
type: "person",
|
|
189
|
+
tags: [],
|
|
190
|
+
created: now,
|
|
191
|
+
updated: now,
|
|
192
|
+
quality: 5,
|
|
193
|
+
status: "seedling",
|
|
194
|
+
related: [],
|
|
195
|
+
};
|
|
196
|
+
if (input.role) fm.role = input.role;
|
|
197
|
+
if (input.title) fm.position = input.title;
|
|
198
|
+
if (input.company) fm.company = input.company;
|
|
199
|
+
if (input.importance) fm.importance = input.importance;
|
|
200
|
+
const socials = mergeSocials(undefined, input.social);
|
|
201
|
+
if (Object.keys(socials).length > 0) {
|
|
202
|
+
fm.socials = Object.entries(socials).map(([k, v]) => `${k}:${v}`);
|
|
203
|
+
}
|
|
204
|
+
return { domain: "People", slug, frontmatter: fm, body: "" };
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function newCompanyEntity(input: CompanyInput, slug: string): Entity {
|
|
208
|
+
const now = new Date().toISOString();
|
|
209
|
+
const fm: EntityFrontmatter = {
|
|
210
|
+
title: input.name,
|
|
211
|
+
type: "company",
|
|
212
|
+
tags: input.industry ? industryToTopicTags(input.industry) : [],
|
|
213
|
+
created: now,
|
|
214
|
+
updated: now,
|
|
215
|
+
quality: 5,
|
|
216
|
+
status: "seedling",
|
|
217
|
+
related: [],
|
|
218
|
+
};
|
|
219
|
+
if (input.domain) fm.domain_name = input.domain;
|
|
220
|
+
if (input.industry) fm.industry = input.industry;
|
|
221
|
+
if (input.mentioned_as) fm.mentioned_as = input.mentioned_as;
|
|
222
|
+
if (input.sentiment) fm.sentiment = input.sentiment;
|
|
223
|
+
return { domain: "Companies", slug, frontmatter: fm, body: "" };
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
function mergePerson(prior: Entity, input: PersonInput): Entity {
|
|
227
|
+
const fm = { ...prior.frontmatter };
|
|
228
|
+
fm.role = mergeScalar(fm.role, input.role);
|
|
229
|
+
fm.position = mergeScalar(fm.position, input.title);
|
|
230
|
+
fm.company = mergeScalar(fm.company, input.company);
|
|
231
|
+
fm.importance = mergeScalar(fm.importance, input.importance);
|
|
232
|
+
const socials = mergeSocials(
|
|
233
|
+
Array.isArray(fm.socials)
|
|
234
|
+
? Object.fromEntries(
|
|
235
|
+
(fm.socials as string[])
|
|
236
|
+
.map((entry): [string, string] | null => {
|
|
237
|
+
// Split on FIRST ':' only — values like 'https://...' contain
|
|
238
|
+
// additional colons that must stay inside the value.
|
|
239
|
+
const idx = entry.indexOf(":");
|
|
240
|
+
if (idx <= 0) return null;
|
|
241
|
+
return [entry.slice(0, idx), entry.slice(idx + 1)];
|
|
242
|
+
})
|
|
243
|
+
.filter((kv): kv is [string, string] => kv !== null && kv[1].length > 0)
|
|
244
|
+
)
|
|
245
|
+
: (fm.socials ?? {}),
|
|
246
|
+
input.social
|
|
247
|
+
);
|
|
248
|
+
if (Object.keys(socials).length > 0) {
|
|
249
|
+
fm.socials = Object.entries(socials).map(([k, v]) => `${k}:${v}`);
|
|
250
|
+
}
|
|
251
|
+
return { ...prior, frontmatter: fm };
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
function mergeCompany(prior: Entity, input: CompanyInput): Entity {
|
|
255
|
+
const fm = { ...prior.frontmatter };
|
|
256
|
+
fm.domain_name = mergeScalar(fm.domain_name, input.domain);
|
|
257
|
+
fm.industry = mergeScalar(fm.industry, input.industry);
|
|
258
|
+
fm.mentioned_as = mergeScalar(fm.mentioned_as, input.mentioned_as);
|
|
259
|
+
fm.sentiment = mergeScalar(fm.sentiment, input.sentiment);
|
|
260
|
+
if (input.industry) {
|
|
261
|
+
fm.tags = mergeStringArray(fm.tags, industryToTopicTags(input.industry));
|
|
262
|
+
}
|
|
263
|
+
return { ...prior, frontmatter: fm };
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
function upsertPerson(
|
|
267
|
+
input: PersonInput,
|
|
268
|
+
sourceId: string,
|
|
269
|
+
rootDir?: string
|
|
270
|
+
): UpsertResult {
|
|
271
|
+
const slug = slugify(input.name);
|
|
272
|
+
if (!slug) throw new Error(`ingest: cannot slugify person name "${input.name}"`);
|
|
273
|
+
const prior = load("People", slug, rootDir);
|
|
274
|
+
const created = prior === null;
|
|
275
|
+
let entity = prior ? mergePerson(prior, input) : newPersonEntity(input, slug);
|
|
276
|
+
entity = {
|
|
277
|
+
...entity,
|
|
278
|
+
frontmatter: {
|
|
279
|
+
...entity.frontmatter,
|
|
280
|
+
updated: new Date().toISOString(),
|
|
281
|
+
},
|
|
282
|
+
body: appendSourceLog(entity.body, sourceId, input.context, {
|
|
283
|
+
role: input.role ?? null,
|
|
284
|
+
importance: input.importance ?? null,
|
|
285
|
+
}),
|
|
286
|
+
};
|
|
287
|
+
save(entity, rootDir);
|
|
288
|
+
return { slug, created, entity };
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function upsertCompany(
|
|
292
|
+
input: CompanyInput,
|
|
293
|
+
sourceId: string,
|
|
294
|
+
rootDir?: string
|
|
295
|
+
): UpsertResult {
|
|
296
|
+
const baseKey = input.domain?.trim() ? input.domain : input.name;
|
|
297
|
+
const slug = slugify(baseKey);
|
|
298
|
+
if (!slug) throw new Error(`ingest: cannot slugify company "${input.name}"`);
|
|
299
|
+
const prior = load("Companies", slug, rootDir);
|
|
300
|
+
const created = prior === null;
|
|
301
|
+
let entity = prior ? mergeCompany(prior, input) : newCompanyEntity(input, slug);
|
|
302
|
+
entity = {
|
|
303
|
+
...entity,
|
|
304
|
+
frontmatter: {
|
|
305
|
+
...entity.frontmatter,
|
|
306
|
+
updated: new Date().toISOString(),
|
|
307
|
+
},
|
|
308
|
+
body: appendSourceLog(entity.body, sourceId, input.context, {
|
|
309
|
+
mentioned_as: input.mentioned_as ?? null,
|
|
310
|
+
sentiment: input.sentiment ?? null,
|
|
311
|
+
}),
|
|
312
|
+
};
|
|
313
|
+
save(entity, rootDir);
|
|
314
|
+
return { slug, created, entity };
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Ensure a `part-of` edge from person → company.
|
|
319
|
+
*
|
|
320
|
+
* `nameToSlug` is the lookup built from companies ingested in this call — it
|
|
321
|
+
* lets us prefer a domain-derived slug ("acme.example" → "acme-example") over
|
|
322
|
+
* the naive name-derived one ("Acme Labs" → "acme-labs") when the same
|
|
323
|
+
* payload defines both. Falls back to slugify(name) when no match, and
|
|
324
|
+
* stub-creates the company so the edge has a target.
|
|
325
|
+
*/
|
|
326
|
+
/**
|
|
327
|
+
* Find an existing Companies entity whose frontmatter title matches `name`
|
|
328
|
+
* (case-insensitive). Used by `linkPersonToCompany` to avoid stubbing a
|
|
329
|
+
* duplicate when the canonical company already lives at a non-name-derived
|
|
330
|
+
* slug (e.g. domain-derived "acme-example" for "Acme Labs"). ISC-21.
|
|
331
|
+
*/
|
|
332
|
+
function findExistingCompanyByTitle(name: string, rootDir?: string): string | null {
|
|
333
|
+
const target = name.toLowerCase();
|
|
334
|
+
for (const e of list("Companies", rootDir)) {
|
|
335
|
+
if (e.frontmatter.title.toLowerCase() === target) return e.slug;
|
|
336
|
+
}
|
|
337
|
+
return null;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
function linkPersonToCompany(
|
|
341
|
+
personSlug: string,
|
|
342
|
+
companyName: string,
|
|
343
|
+
nameToSlug: Map<string, string>,
|
|
344
|
+
sourceId: string,
|
|
345
|
+
rootDir?: string
|
|
346
|
+
): void {
|
|
347
|
+
// Resolution order: (1) in-call lookup map (domain-derived slug wins when
|
|
348
|
+
// the same payload defines both), (2) existing-store title scan (ISC-21
|
|
349
|
+
// — re-ingest of a person referencing an already-known company), (3)
|
|
350
|
+
// fall back to slugify(name) and stub the company.
|
|
351
|
+
const companySlug =
|
|
352
|
+
nameToSlug.get(companyName.toLowerCase()) ??
|
|
353
|
+
findExistingCompanyByTitle(companyName, rootDir) ??
|
|
354
|
+
slugify(companyName);
|
|
355
|
+
if (!companySlug) return;
|
|
356
|
+
if (!load("Companies", companySlug, rootDir)) {
|
|
357
|
+
upsertCompany({ name: companyName }, sourceId, rootDir);
|
|
358
|
+
}
|
|
359
|
+
const person = load("People", personSlug, rootDir);
|
|
360
|
+
if (!person) return;
|
|
361
|
+
// ISC-18: inherit ONLY the company's topic:* tags. Structural tags stay
|
|
362
|
+
// company-scoped so we don't create phantom graph edges between unrelated
|
|
363
|
+
// people sharing only their employer's slug or other non-facet labels.
|
|
364
|
+
const company = load("Companies", companySlug, rootDir);
|
|
365
|
+
const inheritedTopicTags = company
|
|
366
|
+
? company.frontmatter.tags.filter((t) => t.startsWith("topic:"))
|
|
367
|
+
: [];
|
|
368
|
+
const updated: Entity = {
|
|
369
|
+
...person,
|
|
370
|
+
frontmatter: {
|
|
371
|
+
...person.frontmatter,
|
|
372
|
+
related: addRelated(person.frontmatter.related, {
|
|
373
|
+
slug: companySlug,
|
|
374
|
+
type: "part-of",
|
|
375
|
+
}),
|
|
376
|
+
tags: mergeStringArray(person.frontmatter.tags, inheritedTopicTags),
|
|
377
|
+
},
|
|
378
|
+
};
|
|
379
|
+
save(updated, rootDir);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// --- Public API -------------------------------------------------------------
|
|
383
|
+
|
|
384
|
+
export function ingestEntities(
|
|
385
|
+
input: IngestInput,
|
|
386
|
+
sourceId: string,
|
|
387
|
+
rootDir?: string
|
|
388
|
+
): IngestResult {
|
|
389
|
+
// Ingest companies first so person→company links can resolve to the
|
|
390
|
+
// canonical (possibly domain-derived) slug rather than guessing.
|
|
391
|
+
const companies = (input.companies ?? []).map((c) => {
|
|
392
|
+
const r = upsertCompany(c, sourceId, rootDir);
|
|
393
|
+
return { slug: r.slug, created: r.created, name: c.name };
|
|
394
|
+
});
|
|
395
|
+
const nameToSlug = new Map<string, string>(
|
|
396
|
+
companies.map((c) => [c.name.toLowerCase(), c.slug])
|
|
397
|
+
);
|
|
398
|
+
const people = (input.people ?? []).map((p) => {
|
|
399
|
+
const r = upsertPerson(p, sourceId, rootDir);
|
|
400
|
+
if (p.company) {
|
|
401
|
+
linkPersonToCompany(r.slug, p.company, nameToSlug, sourceId, rootDir);
|
|
402
|
+
}
|
|
403
|
+
return { slug: r.slug, created: r.created };
|
|
404
|
+
});
|
|
405
|
+
return {
|
|
406
|
+
people,
|
|
407
|
+
companies: companies.map(({ slug, created }) => ({ slug, created })),
|
|
408
|
+
};
|
|
409
|
+
}
|