portable-agent-layer 0.41.1 → 0.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.husky/install.mjs +8 -0
- package/README.md +2 -1
- package/assets/skills/analyze-youtube/SKILL.md +1 -1
- package/assets/skills/entities/SKILL.md +95 -0
- package/assets/templates/PAL/README.md +1 -1
- package/package.json +10 -12
- package/src/cli/index.ts +8 -0
- package/src/cli/knowledge.ts +620 -0
- package/src/cli/migrate.ts +188 -3
- package/src/hooks/lib/export.ts +1 -1
- package/src/hooks/lib/paths.ts +2 -1
- package/src/targets/lib.ts +23 -36
- package/src/tools/knowledge/graph.ts +395 -0
- package/src/tools/knowledge/ingest.ts +409 -0
- package/src/tools/knowledge/lib.ts +493 -0
- package/assets/skills/extract-entities/SKILL.md +0 -62
- package/assets/skills/extract-entities/tools/entity-save.ts +0 -110
- package/src/hooks/lib/entities.ts +0 -304
- package/src/tools/export.ts +0 -40
- package/src/tools/import.ts +0 -111
|
@@ -0,0 +1,493 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge — markdown-per-entity store with typed relationships.
|
|
3
|
+
*
|
|
4
|
+
* Each entity lives at:
|
|
5
|
+
* ~/.pal/memory/knowledge/<Domain>/<slug>.md
|
|
6
|
+
*
|
|
7
|
+
* Frontmatter schema (canonical fields):
|
|
8
|
+
* title: human label
|
|
9
|
+
* type: free-form sub-type (e.g. "person", "ai-lab")
|
|
10
|
+
* tags: string[]
|
|
11
|
+
* created: ISO timestamp
|
|
12
|
+
* updated: ISO timestamp
|
|
13
|
+
* quality: 0-10
|
|
14
|
+
* status: seedling | budding | evergreen
|
|
15
|
+
* related: Array<{ slug, type }> -- type ∈ RELATION_TYPES
|
|
16
|
+
*
|
|
17
|
+
* Any additional frontmatter keys (role, company, sentiment, etc.) are
|
|
18
|
+
* preserved verbatim so domain-specific extractors can store rich
|
|
19
|
+
* attributes without changing the core schema.
|
|
20
|
+
*
|
|
21
|
+
* Ported from PAI's KNOWLEDGE/ pattern (see
|
|
22
|
+
* Personal_AI_Infrastructure/Releases/v5.0.0/.claude/PAI/TOOLS/KnowledgeGraph.ts).
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import {
|
|
26
|
+
existsSync,
|
|
27
|
+
mkdirSync,
|
|
28
|
+
readdirSync,
|
|
29
|
+
readFileSync,
|
|
30
|
+
renameSync,
|
|
31
|
+
writeFileSync,
|
|
32
|
+
} from "node:fs";
|
|
33
|
+
import { resolve } from "node:path";
|
|
34
|
+
import { ensureDir, paths } from "../../hooks/lib/paths";
|
|
35
|
+
|
|
36
|
+
// --- Constants --------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
export const DOMAINS = ["People", "Companies", "Ideas", "Research"] as const;
|
|
39
|
+
export type Domain = (typeof DOMAINS)[number];
|
|
40
|
+
|
|
41
|
+
const DEFAULT_TYPE_BY_DOMAIN: Record<Domain, string> = {
|
|
42
|
+
People: "person",
|
|
43
|
+
Companies: "company",
|
|
44
|
+
Ideas: "idea",
|
|
45
|
+
Research: "research",
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
export const RELATION_TYPES = [
|
|
49
|
+
"supports",
|
|
50
|
+
"contradicts",
|
|
51
|
+
"extends",
|
|
52
|
+
"part-of",
|
|
53
|
+
"instance-of",
|
|
54
|
+
"caused-by",
|
|
55
|
+
"preceded-by",
|
|
56
|
+
"related",
|
|
57
|
+
] as const;
|
|
58
|
+
export type RelationType = (typeof RELATION_TYPES)[number];
|
|
59
|
+
|
|
60
|
+
export const STATUSES = ["seedling", "budding", "evergreen"] as const;
|
|
61
|
+
export type Status = (typeof STATUSES)[number];
|
|
62
|
+
|
|
63
|
+
const CANONICAL_KEYS = new Set([
|
|
64
|
+
"title",
|
|
65
|
+
"type",
|
|
66
|
+
"tags",
|
|
67
|
+
"created",
|
|
68
|
+
"updated",
|
|
69
|
+
"quality",
|
|
70
|
+
"status",
|
|
71
|
+
"related",
|
|
72
|
+
]);
|
|
73
|
+
|
|
74
|
+
// --- Types ------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
export interface Related {
|
|
77
|
+
slug: string;
|
|
78
|
+
type: RelationType;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export interface EntityFrontmatter {
|
|
82
|
+
title: string;
|
|
83
|
+
type: string;
|
|
84
|
+
tags: string[];
|
|
85
|
+
created: string;
|
|
86
|
+
updated: string;
|
|
87
|
+
quality: number;
|
|
88
|
+
status: Status;
|
|
89
|
+
related: Related[];
|
|
90
|
+
[key: string]: unknown;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export interface Entity {
|
|
94
|
+
domain: Domain;
|
|
95
|
+
slug: string;
|
|
96
|
+
frontmatter: EntityFrontmatter;
|
|
97
|
+
body: string;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// --- Slug -------------------------------------------------------------------
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Deterministic slug: NFKD-normalize, strip diacritics, lowercase, replace
|
|
104
|
+
* any run of non-alnum chars with a single dash, trim leading/trailing dashes.
|
|
105
|
+
*/
|
|
106
|
+
export function slugify(input: string): string {
|
|
107
|
+
const normalized = input.normalize("NFKD").replace(/[̀-ͯ]/g, "");
|
|
108
|
+
const kebab = normalized
|
|
109
|
+
.toLowerCase()
|
|
110
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
111
|
+
.replace(/^-+|-+$/g, "");
|
|
112
|
+
return kebab;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// --- YAML emitter (bounded schema) -----------------------------------------
|
|
116
|
+
|
|
117
|
+
function emitScalar(v: unknown): string {
|
|
118
|
+
if (v === null || v === undefined) return '""';
|
|
119
|
+
if (typeof v === "number" || typeof v === "boolean") return String(v);
|
|
120
|
+
return JSON.stringify(String(v));
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function emitStringArray(arr: readonly string[]): string {
|
|
124
|
+
if (arr.length === 0) return "[]";
|
|
125
|
+
return `[${arr.map((s) => JSON.stringify(s)).join(", ")}]`;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function emitFrontmatter(fm: EntityFrontmatter): string {
|
|
129
|
+
const lines: string[] = [
|
|
130
|
+
"---",
|
|
131
|
+
`title: ${emitScalar(fm.title)}`,
|
|
132
|
+
`type: ${emitScalar(fm.type)}`,
|
|
133
|
+
`tags: ${emitStringArray(fm.tags)}`,
|
|
134
|
+
`created: ${emitScalar(fm.created)}`,
|
|
135
|
+
`updated: ${emitScalar(fm.updated)}`,
|
|
136
|
+
`quality: ${fm.quality}`,
|
|
137
|
+
`status: ${emitScalar(fm.status)}`,
|
|
138
|
+
];
|
|
139
|
+
|
|
140
|
+
if (fm.related.length === 0) {
|
|
141
|
+
lines.push("related: []");
|
|
142
|
+
} else {
|
|
143
|
+
lines.push("related:");
|
|
144
|
+
for (const r of fm.related) {
|
|
145
|
+
lines.push(` - slug: ${emitScalar(r.slug)}`, ` type: ${emitScalar(r.type)}`);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
for (const [k, v] of Object.entries(fm)) {
|
|
150
|
+
if (CANONICAL_KEYS.has(k)) continue;
|
|
151
|
+
if (v === undefined || v === null) continue;
|
|
152
|
+
if (Array.isArray(v)) {
|
|
153
|
+
if (v.every((x) => typeof x === "string")) {
|
|
154
|
+
lines.push(`${k}: ${emitStringArray(v as string[])}`);
|
|
155
|
+
}
|
|
156
|
+
// Other array shapes are out of scope in v1 — skipped silently.
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
if (typeof v === "object") {
|
|
160
|
+
// Nested objects out of scope in v1.
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
lines.push(`${k}: ${emitScalar(v)}`);
|
|
164
|
+
}
|
|
165
|
+
lines.push("---");
|
|
166
|
+
return lines.join("\n");
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// --- YAML parser (bounded schema) ------------------------------------------
|
|
170
|
+
|
|
171
|
+
function parseScalar(raw: string): unknown {
|
|
172
|
+
const trimmed = raw.trim();
|
|
173
|
+
if (trimmed === "") return "";
|
|
174
|
+
if (trimmed === "true") return true;
|
|
175
|
+
if (trimmed === "false") return false;
|
|
176
|
+
if (/^-?\d+(\.\d+)?$/.test(trimmed)) return Number(trimmed);
|
|
177
|
+
if (trimmed.startsWith('"') && trimmed.endsWith('"')) {
|
|
178
|
+
try {
|
|
179
|
+
return JSON.parse(trimmed);
|
|
180
|
+
} catch {
|
|
181
|
+
return trimmed.slice(1, -1);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
return trimmed;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function parseStringArray(raw: string): string[] {
|
|
188
|
+
const trimmed = raw.trim();
|
|
189
|
+
if (trimmed === "[]") return [];
|
|
190
|
+
if (!(trimmed.startsWith("[") && trimmed.endsWith("]"))) return [];
|
|
191
|
+
const inner = trimmed.slice(1, -1).trim();
|
|
192
|
+
if (inner === "") return [];
|
|
193
|
+
// Split on commas not inside quotes.
|
|
194
|
+
const out: string[] = [];
|
|
195
|
+
let buf = "";
|
|
196
|
+
let inStr = false;
|
|
197
|
+
let esc = false;
|
|
198
|
+
for (const ch of inner) {
|
|
199
|
+
if (esc) {
|
|
200
|
+
buf += ch;
|
|
201
|
+
esc = false;
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
if (ch === "\\" && inStr) {
|
|
205
|
+
buf += ch;
|
|
206
|
+
esc = true;
|
|
207
|
+
continue;
|
|
208
|
+
}
|
|
209
|
+
if (ch === '"') {
|
|
210
|
+
inStr = !inStr;
|
|
211
|
+
buf += ch;
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
if (ch === "," && !inStr) {
|
|
215
|
+
out.push(String(parseScalar(buf)));
|
|
216
|
+
buf = "";
|
|
217
|
+
continue;
|
|
218
|
+
}
|
|
219
|
+
buf += ch;
|
|
220
|
+
}
|
|
221
|
+
if (buf.trim() !== "") out.push(String(parseScalar(buf)));
|
|
222
|
+
return out;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
interface SplitResult {
|
|
226
|
+
fm: string;
|
|
227
|
+
body: string;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function splitFrontmatter(raw: string): SplitResult {
|
|
231
|
+
const match = /^---\n([\s\S]*?)\n---\n?([\s\S]*)$/.exec(raw);
|
|
232
|
+
if (!match) return { fm: "", body: raw };
|
|
233
|
+
return { fm: match[1], body: match[2] };
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function parseFrontmatter(fmText: string): EntityFrontmatter {
|
|
237
|
+
const out: Record<string, unknown> = {
|
|
238
|
+
title: "",
|
|
239
|
+
type: "",
|
|
240
|
+
tags: [],
|
|
241
|
+
created: "",
|
|
242
|
+
updated: "",
|
|
243
|
+
quality: 0,
|
|
244
|
+
status: "seedling",
|
|
245
|
+
related: [] as Related[],
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
const lines = fmText.split("\n");
|
|
249
|
+
let i = 0;
|
|
250
|
+
while (i < lines.length) {
|
|
251
|
+
const line = lines[i];
|
|
252
|
+
if (line.trim() === "") {
|
|
253
|
+
i++;
|
|
254
|
+
continue;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Top-level keys start with no indentation.
|
|
258
|
+
if (line.startsWith(" ") || line.startsWith("\t") || line.startsWith("-")) {
|
|
259
|
+
i++;
|
|
260
|
+
continue;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
const colonIdx = line.indexOf(":");
|
|
264
|
+
if (colonIdx < 0) {
|
|
265
|
+
i++;
|
|
266
|
+
continue;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
const key = line.slice(0, colonIdx).trim();
|
|
270
|
+
const rawVal = line.slice(colonIdx + 1);
|
|
271
|
+
|
|
272
|
+
if (key === "tags") {
|
|
273
|
+
out.tags = parseStringArray(rawVal);
|
|
274
|
+
i++;
|
|
275
|
+
continue;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
if (key === "related") {
|
|
279
|
+
const v = rawVal.trim();
|
|
280
|
+
if (v === "[]" || v === "") {
|
|
281
|
+
// Could be inline empty or block-style. Look ahead.
|
|
282
|
+
if (v === "[]") {
|
|
283
|
+
out.related = [];
|
|
284
|
+
i++;
|
|
285
|
+
continue;
|
|
286
|
+
}
|
|
287
|
+
const items: Related[] = [];
|
|
288
|
+
i++;
|
|
289
|
+
let current: Partial<Related> | null = null;
|
|
290
|
+
while (i < lines.length) {
|
|
291
|
+
const child = lines[i];
|
|
292
|
+
if (
|
|
293
|
+
child.trim() === "" ||
|
|
294
|
+
(!child.startsWith(" ") && !child.startsWith("\t"))
|
|
295
|
+
) {
|
|
296
|
+
break;
|
|
297
|
+
}
|
|
298
|
+
const t = child.trim();
|
|
299
|
+
if (t.startsWith("- slug:")) {
|
|
300
|
+
if (current?.slug) {
|
|
301
|
+
items.push({
|
|
302
|
+
slug: current.slug,
|
|
303
|
+
type: (current.type ?? "related") as RelationType,
|
|
304
|
+
});
|
|
305
|
+
}
|
|
306
|
+
current = {
|
|
307
|
+
slug: String(parseScalar(t.slice("- slug:".length))),
|
|
308
|
+
};
|
|
309
|
+
} else if (t.startsWith("slug:") && current) {
|
|
310
|
+
current.slug = String(parseScalar(t.slice("slug:".length)));
|
|
311
|
+
} else if (t.startsWith("type:") && current) {
|
|
312
|
+
current.type = parseScalar(t.slice("type:".length)) as RelationType;
|
|
313
|
+
}
|
|
314
|
+
i++;
|
|
315
|
+
}
|
|
316
|
+
if (current?.slug) {
|
|
317
|
+
items.push({
|
|
318
|
+
slug: current.slug,
|
|
319
|
+
type: (current.type ?? "related") as RelationType,
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
out.related = items;
|
|
323
|
+
continue;
|
|
324
|
+
}
|
|
325
|
+
out.related = [];
|
|
326
|
+
i++;
|
|
327
|
+
continue;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// Inline string-array values.
|
|
331
|
+
const v = rawVal.trim();
|
|
332
|
+
if (v.startsWith("[") && v.endsWith("]")) {
|
|
333
|
+
out[key] = parseStringArray(v);
|
|
334
|
+
} else {
|
|
335
|
+
out[key] = parseScalar(v);
|
|
336
|
+
}
|
|
337
|
+
i++;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
return out as EntityFrontmatter;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// --- Validation -------------------------------------------------------------
|
|
344
|
+
|
|
345
|
+
export function validate(entity: Entity): void {
|
|
346
|
+
const fm = entity.frontmatter;
|
|
347
|
+
if (!fm.title || typeof fm.title !== "string") {
|
|
348
|
+
throw new Error(`knowledge: missing or invalid 'title' for ${entity.slug}`);
|
|
349
|
+
}
|
|
350
|
+
if (typeof fm.quality !== "number" || fm.quality < 0 || fm.quality > 10) {
|
|
351
|
+
throw new Error(`knowledge: 'quality' must be 0-10 for ${entity.slug}`);
|
|
352
|
+
}
|
|
353
|
+
if (!STATUSES.includes(fm.status)) {
|
|
354
|
+
throw new Error(
|
|
355
|
+
`knowledge: 'status' must be one of ${STATUSES.join("|")} for ${entity.slug}`
|
|
356
|
+
);
|
|
357
|
+
}
|
|
358
|
+
if (!DOMAINS.includes(entity.domain)) {
|
|
359
|
+
throw new Error(
|
|
360
|
+
`knowledge: 'domain' must be one of ${DOMAINS.join("|")} (got ${entity.domain})`
|
|
361
|
+
);
|
|
362
|
+
}
|
|
363
|
+
if (!Array.isArray(fm.tags) || fm.tags.some((t) => typeof t !== "string")) {
|
|
364
|
+
throw new Error(`knowledge: 'tags' must be string[] for ${entity.slug}`);
|
|
365
|
+
}
|
|
366
|
+
if (!Array.isArray(fm.related)) {
|
|
367
|
+
throw new Error(`knowledge: 'related' must be array for ${entity.slug}`);
|
|
368
|
+
}
|
|
369
|
+
for (const r of fm.related) {
|
|
370
|
+
if (!r.slug || !RELATION_TYPES.includes(r.type)) {
|
|
371
|
+
throw new Error(
|
|
372
|
+
`knowledge: invalid related entry on ${entity.slug}: ${JSON.stringify(r)}`
|
|
373
|
+
);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// --- Serialize / parse ------------------------------------------------------
|
|
379
|
+
|
|
380
|
+
export function serialize(entity: Entity): string {
|
|
381
|
+
validate(entity);
|
|
382
|
+
const fmText = emitFrontmatter(entity.frontmatter);
|
|
383
|
+
const body = entity.body.endsWith("\n") ? entity.body : `${entity.body}\n`;
|
|
384
|
+
return `${fmText}\n\n${body}`;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
export function parse(domain: Domain, slug: string, raw: string): Entity {
|
|
388
|
+
const { fm, body } = splitFrontmatter(raw);
|
|
389
|
+
const frontmatter = parseFrontmatter(fm);
|
|
390
|
+
return { domain, slug, frontmatter, body: body.replace(/^\n+/, "") };
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// --- Filesystem -------------------------------------------------------------
|
|
394
|
+
|
|
395
|
+
function domainDir(domain: Domain, rootDir?: string): string {
|
|
396
|
+
if (rootDir) {
|
|
397
|
+
const d = resolve(rootDir, domain);
|
|
398
|
+
if (!existsSync(d)) mkdirSync(d, { recursive: true });
|
|
399
|
+
return d;
|
|
400
|
+
}
|
|
401
|
+
return ensureDir(resolve(paths.knowledge(), domain));
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
/** @lintignore — consumed by Phase 2 graph layer (needs path-per-slug to read files) */
|
|
405
|
+
export function entityPath(domain: Domain, slug: string, rootDir?: string): string {
|
|
406
|
+
return resolve(domainDir(domain, rootDir), `${slug}.md`);
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
export function exists(domain: Domain, slug: string, rootDir?: string): boolean {
|
|
410
|
+
return existsSync(entityPath(domain, slug, rootDir));
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
export function save(entity: Entity, rootDir?: string): void {
|
|
414
|
+
const p = entityPath(entity.domain, entity.slug, rootDir);
|
|
415
|
+
const tmp = `${p}.tmp`;
|
|
416
|
+
writeFileSync(tmp, serialize(entity), "utf-8");
|
|
417
|
+
renameSync(tmp, p);
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
export function load(domain: Domain, slug: string, rootDir?: string): Entity | null {
|
|
421
|
+
const p = entityPath(domain, slug, rootDir);
|
|
422
|
+
if (!existsSync(p)) return null;
|
|
423
|
+
return parse(domain, slug, readFileSync(p, "utf-8"));
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
export function list(domain?: Domain, rootDir?: string): Entity[] {
|
|
427
|
+
const target = domain ? [domain] : DOMAINS;
|
|
428
|
+
const out: Entity[] = [];
|
|
429
|
+
for (const d of target) {
|
|
430
|
+
const dir = domainDir(d, rootDir);
|
|
431
|
+
if (!existsSync(dir)) continue;
|
|
432
|
+
for (const entry of readdirSync(dir)) {
|
|
433
|
+
if (!entry.endsWith(".md") || entry.startsWith("_")) continue;
|
|
434
|
+
const slug = entry.slice(0, -3);
|
|
435
|
+
const e = load(d, slug, rootDir);
|
|
436
|
+
if (e) out.push(e);
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
return out;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// --- Create / get -----------------------------------------------------------
|
|
443
|
+
|
|
444
|
+
export interface CreateInput {
|
|
445
|
+
domain: Domain;
|
|
446
|
+
name: string;
|
|
447
|
+
type?: string;
|
|
448
|
+
tags?: string[];
|
|
449
|
+
quality?: number;
|
|
450
|
+
status?: Status;
|
|
451
|
+
related?: Related[];
|
|
452
|
+
body?: string;
|
|
453
|
+
extra?: Record<string, unknown>;
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
/**
|
|
457
|
+
* Idempotent: if an entity already exists at the slug derived from `name`,
|
|
458
|
+
* return it untouched. Otherwise create a new file with sensible defaults
|
|
459
|
+
* (quality 5, status "seedling", today's ISO timestamps).
|
|
460
|
+
*
|
|
461
|
+
* Merging new attributes into an existing entity is intentionally NOT here —
|
|
462
|
+
* that lives in `ingest.ts`. Keeping `getOrCreate` idempotent makes it a safe
|
|
463
|
+
* storage primitive callable from anywhere.
|
|
464
|
+
*/
|
|
465
|
+
export function getOrCreate(input: CreateInput, rootDir?: string): Entity {
|
|
466
|
+
const slug = slugify(input.name);
|
|
467
|
+
if (!slug) {
|
|
468
|
+
throw new Error(`knowledge: cannot derive slug from name "${input.name}"`);
|
|
469
|
+
}
|
|
470
|
+
const existing = load(input.domain, slug, rootDir);
|
|
471
|
+
if (existing) return existing;
|
|
472
|
+
|
|
473
|
+
const now = new Date().toISOString();
|
|
474
|
+
const fm: EntityFrontmatter = {
|
|
475
|
+
title: input.name,
|
|
476
|
+
type: input.type ?? DEFAULT_TYPE_BY_DOMAIN[input.domain],
|
|
477
|
+
tags: input.tags ?? [],
|
|
478
|
+
created: now,
|
|
479
|
+
updated: now,
|
|
480
|
+
quality: input.quality ?? 5,
|
|
481
|
+
status: input.status ?? "seedling",
|
|
482
|
+
related: input.related ?? [],
|
|
483
|
+
...(input.extra ?? {}),
|
|
484
|
+
};
|
|
485
|
+
const entity: Entity = {
|
|
486
|
+
domain: input.domain,
|
|
487
|
+
slug,
|
|
488
|
+
frontmatter: fm,
|
|
489
|
+
body: input.body ?? "",
|
|
490
|
+
};
|
|
491
|
+
save(entity, rootDir);
|
|
492
|
+
return entity;
|
|
493
|
+
}
|
|
@@ -1,62 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: extract-entities
|
|
3
|
-
description: Extract people and companies from content (articles, videos, URLs, pasted text). Use when identifying who and what organizations are mentioned in content.
|
|
4
|
-
argument-hint: <content, URL, or pasted text>
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
Extract people and companies from $ARGUMENTS:
|
|
8
|
-
|
|
9
|
-
1. Read/fetch the content
|
|
10
|
-
2. Extract ALL people and companies mentioned
|
|
11
|
-
|
|
12
|
-
## People
|
|
13
|
-
|
|
14
|
-
For each person, extract:
|
|
15
|
-
- **name**: Full name
|
|
16
|
-
- **role**: author | subject | mentioned | quoted | expert | interviewer | interviewee
|
|
17
|
-
- **title**: Job title (null if unknown)
|
|
18
|
-
- **company**: Company affiliation (null if unknown)
|
|
19
|
-
- **social**: twitter (@handle), linkedin (URL), email, website — null if unknown
|
|
20
|
-
- **context**: Why this person is mentioned and their relevance
|
|
21
|
-
- **importance**: primary (central to content) | secondary (supporting) | minor (brief mention)
|
|
22
|
-
|
|
23
|
-
## Companies
|
|
24
|
-
|
|
25
|
-
For each company/organization, extract:
|
|
26
|
-
- **name**: Official name
|
|
27
|
-
- **domain**: Primary website domain (e.g. "anthropic.com", null if unknown)
|
|
28
|
-
- **industry**: Classification (AI, security, fintech, healthcare, etc.)
|
|
29
|
-
- **context**: How and why mentioned
|
|
30
|
-
- **mentioned_as**: subject | source | example | competitor | partner | acquisition | product | other
|
|
31
|
-
- **sentiment**: positive | neutral | negative | mixed
|
|
32
|
-
|
|
33
|
-
## Output
|
|
34
|
-
|
|
35
|
-
Return structured JSON:
|
|
36
|
-
|
|
37
|
-
```json
|
|
38
|
-
{
|
|
39
|
-
"people": [...],
|
|
40
|
-
"companies": [...]
|
|
41
|
-
}
|
|
42
|
-
```
|
|
43
|
-
|
|
44
|
-
## Guidelines
|
|
45
|
-
|
|
46
|
-
- Accuracy over quantity — use null for unknown fields, never guess
|
|
47
|
-
- Include authors, subjects, quoted individuals, and anyone significantly mentioned
|
|
48
|
-
- For research papers: all authors get "author" role
|
|
49
|
-
- For interviews: distinguish interviewer vs interviewee
|
|
50
|
-
- Universities and research institutions count as companies
|
|
51
|
-
- Extract social handles from bios, signatures, or text body
|
|
52
|
-
- Context fields should explain relevance, not just repeat the mention
|
|
53
|
-
|
|
54
|
-
## Persistence
|
|
55
|
-
|
|
56
|
-
After displaying results, ask the user if they want to save. When saving, pipe the JSON output through the entity-save tool which handles deduplication automatically:
|
|
57
|
-
|
|
58
|
-
```bash
|
|
59
|
-
echo '<the JSON output>' | bun ~/.pal/skills/extract-entities/tools/entity-save.ts -- --source "<URL or content origin>"
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
The tool deduplicates against the entity index (`memory/entities/entity-index.json`), assigns stable UUIDs, tracks occurrences, and reports what was new vs existing.
|
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bun
|
|
2
|
-
/**
|
|
3
|
-
* Entity Save — Deduplicate and persist extracted entities.
|
|
4
|
-
*
|
|
5
|
-
* Accepts extracted people/companies JSON via stdin or --file,
|
|
6
|
-
* deduplicates against the entity index, and saves.
|
|
7
|
-
*
|
|
8
|
-
* Usage:
|
|
9
|
-
* echo '{"people":[...],"companies":[...]}' | bun entity-save.ts -- --source "https://example.com"
|
|
10
|
-
* bun entity-save.ts -- --file /path/to/extracted.json --source "https://example.com"
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import { readFileSync } from "node:fs";
|
|
14
|
-
import { parseArgs } from "node:util";
|
|
15
|
-
import { loadEntityIndex, processEntities } from "../../../../src/hooks/lib/entities";
|
|
16
|
-
|
|
17
|
-
const { values } = parseArgs({
|
|
18
|
-
args: Bun.argv.slice(2),
|
|
19
|
-
options: {
|
|
20
|
-
source: { type: "string", short: "s", default: "manual" },
|
|
21
|
-
file: { type: "string", short: "f" },
|
|
22
|
-
},
|
|
23
|
-
strict: true,
|
|
24
|
-
});
|
|
25
|
-
|
|
26
|
-
const sourceId = values.source ?? "manual";
|
|
27
|
-
|
|
28
|
-
let raw: string;
|
|
29
|
-
if (values.file) {
|
|
30
|
-
raw = readFileSync(values.file, "utf-8");
|
|
31
|
-
} else {
|
|
32
|
-
raw = await Bun.stdin.text();
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
if (!raw.trim()) {
|
|
36
|
-
console.error("Error: No input provided. Pipe JSON via stdin or use --file.");
|
|
37
|
-
process.exit(1);
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
let data: {
|
|
41
|
-
people: Array<Record<string, unknown>>;
|
|
42
|
-
companies: Array<Record<string, unknown>>;
|
|
43
|
-
links?: Array<Record<string, unknown>>;
|
|
44
|
-
sources?: Array<Record<string, unknown>>;
|
|
45
|
-
};
|
|
46
|
-
try {
|
|
47
|
-
data = JSON.parse(raw);
|
|
48
|
-
} catch {
|
|
49
|
-
console.error("Error: Invalid JSON input.");
|
|
50
|
-
process.exit(1);
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
if (!Array.isArray(data.people) || !Array.isArray(data.companies)) {
|
|
54
|
-
console.error('Error: JSON must have "people" and "companies" arrays.');
|
|
55
|
-
process.exit(1);
|
|
56
|
-
}
|
|
57
|
-
data.links ??= [];
|
|
58
|
-
data.sources ??= [];
|
|
59
|
-
|
|
60
|
-
const before = loadEntityIndex();
|
|
61
|
-
const counts = (idx: ReturnType<typeof loadEntityIndex>) => ({
|
|
62
|
-
people: Object.keys(idx.people).length,
|
|
63
|
-
companies: Object.keys(idx.companies).length,
|
|
64
|
-
links: Object.keys(idx.links).length,
|
|
65
|
-
sources: Object.keys(idx.sources).length,
|
|
66
|
-
});
|
|
67
|
-
const cb = counts(before);
|
|
68
|
-
|
|
69
|
-
const result = processEntities(
|
|
70
|
-
{
|
|
71
|
-
people: data.people as Array<{ name: string; [key: string]: unknown }>,
|
|
72
|
-
companies: data.companies as Array<{
|
|
73
|
-
name: string;
|
|
74
|
-
domain: string | null;
|
|
75
|
-
[key: string]: unknown;
|
|
76
|
-
}>,
|
|
77
|
-
links: data.links as Array<{ url: string; [key: string]: unknown }>,
|
|
78
|
-
sources: data.sources as Array<{
|
|
79
|
-
url: string | null;
|
|
80
|
-
author: string | null;
|
|
81
|
-
publication: string | null;
|
|
82
|
-
[key: string]: unknown;
|
|
83
|
-
}>,
|
|
84
|
-
},
|
|
85
|
-
sourceId
|
|
86
|
-
);
|
|
87
|
-
|
|
88
|
-
const ca = counts(loadEntityIndex());
|
|
89
|
-
|
|
90
|
-
console.log(
|
|
91
|
-
JSON.stringify(
|
|
92
|
-
{
|
|
93
|
-
saved: {
|
|
94
|
-
people: result.people.length,
|
|
95
|
-
companies: result.companies.length,
|
|
96
|
-
links: result.links.length,
|
|
97
|
-
sources: result.sources.length,
|
|
98
|
-
},
|
|
99
|
-
new: {
|
|
100
|
-
people: ca.people - cb.people,
|
|
101
|
-
companies: ca.companies - cb.companies,
|
|
102
|
-
links: ca.links - cb.links,
|
|
103
|
-
sources: ca.sources - cb.sources,
|
|
104
|
-
},
|
|
105
|
-
total: ca,
|
|
106
|
-
},
|
|
107
|
-
null,
|
|
108
|
-
2
|
|
109
|
-
)
|
|
110
|
-
);
|