portable-agent-layer 0.41.0 → 0.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,493 @@
1
+ /**
2
+ * Knowledge — markdown-per-entity store with typed relationships.
3
+ *
4
+ * Each entity lives at:
5
+ * ~/.pal/memory/knowledge/<Domain>/<slug>.md
6
+ *
7
+ * Frontmatter schema (canonical fields):
8
+ * title: human label
9
+ * type: free-form sub-type (e.g. "person", "ai-lab")
10
+ * tags: string[]
11
+ * created: ISO timestamp
12
+ * updated: ISO timestamp
13
+ * quality: 0-10
14
+ * status: seedling | budding | evergreen
15
+ * related: Array<{ slug, type }> -- type ∈ RELATION_TYPES
16
+ *
17
+ * Any additional frontmatter keys (role, company, sentiment, etc.) are
18
+ * preserved verbatim so domain-specific extractors can store rich
19
+ * attributes without changing the core schema.
20
+ *
21
+ * Ported from PAI's KNOWLEDGE/ pattern (see
22
+ * Personal_AI_Infrastructure/Releases/v5.0.0/.claude/PAI/TOOLS/KnowledgeGraph.ts).
23
+ */
24
+
25
+ import {
26
+ existsSync,
27
+ mkdirSync,
28
+ readdirSync,
29
+ readFileSync,
30
+ renameSync,
31
+ writeFileSync,
32
+ } from "node:fs";
33
+ import { resolve } from "node:path";
34
+ import { ensureDir, paths } from "../../hooks/lib/paths";
35
+
36
+ // --- Constants --------------------------------------------------------------
37
+
38
+ export const DOMAINS = ["People", "Companies", "Ideas", "Research"] as const;
39
+ export type Domain = (typeof DOMAINS)[number];
40
+
41
+ const DEFAULT_TYPE_BY_DOMAIN: Record<Domain, string> = {
42
+ People: "person",
43
+ Companies: "company",
44
+ Ideas: "idea",
45
+ Research: "research",
46
+ };
47
+
48
+ export const RELATION_TYPES = [
49
+ "supports",
50
+ "contradicts",
51
+ "extends",
52
+ "part-of",
53
+ "instance-of",
54
+ "caused-by",
55
+ "preceded-by",
56
+ "related",
57
+ ] as const;
58
+ export type RelationType = (typeof RELATION_TYPES)[number];
59
+
60
+ export const STATUSES = ["seedling", "budding", "evergreen"] as const;
61
+ export type Status = (typeof STATUSES)[number];
62
+
63
+ const CANONICAL_KEYS = new Set([
64
+ "title",
65
+ "type",
66
+ "tags",
67
+ "created",
68
+ "updated",
69
+ "quality",
70
+ "status",
71
+ "related",
72
+ ]);
73
+
74
+ // --- Types ------------------------------------------------------------------
75
+
76
+ export interface Related {
77
+ slug: string;
78
+ type: RelationType;
79
+ }
80
+
81
+ export interface EntityFrontmatter {
82
+ title: string;
83
+ type: string;
84
+ tags: string[];
85
+ created: string;
86
+ updated: string;
87
+ quality: number;
88
+ status: Status;
89
+ related: Related[];
90
+ [key: string]: unknown;
91
+ }
92
+
93
+ export interface Entity {
94
+ domain: Domain;
95
+ slug: string;
96
+ frontmatter: EntityFrontmatter;
97
+ body: string;
98
+ }
99
+
100
+ // --- Slug -------------------------------------------------------------------
101
+
102
+ /**
103
+ * Deterministic slug: NFKD-normalize, strip diacritics, lowercase, replace
104
+ * any run of non-alnum chars with a single dash, trim leading/trailing dashes.
105
+ */
106
+ export function slugify(input: string): string {
107
+ const normalized = input.normalize("NFKD").replace(/[̀-ͯ]/g, "");
108
+ const kebab = normalized
109
+ .toLowerCase()
110
+ .replace(/[^a-z0-9]+/g, "-")
111
+ .replace(/^-+|-+$/g, "");
112
+ return kebab;
113
+ }
114
+
115
+ // --- YAML emitter (bounded schema) -----------------------------------------
116
+
117
+ function emitScalar(v: unknown): string {
118
+ if (v === null || v === undefined) return '""';
119
+ if (typeof v === "number" || typeof v === "boolean") return String(v);
120
+ return JSON.stringify(String(v));
121
+ }
122
+
123
+ function emitStringArray(arr: readonly string[]): string {
124
+ if (arr.length === 0) return "[]";
125
+ return `[${arr.map((s) => JSON.stringify(s)).join(", ")}]`;
126
+ }
127
+
128
+ function emitFrontmatter(fm: EntityFrontmatter): string {
129
+ const lines: string[] = [
130
+ "---",
131
+ `title: ${emitScalar(fm.title)}`,
132
+ `type: ${emitScalar(fm.type)}`,
133
+ `tags: ${emitStringArray(fm.tags)}`,
134
+ `created: ${emitScalar(fm.created)}`,
135
+ `updated: ${emitScalar(fm.updated)}`,
136
+ `quality: ${fm.quality}`,
137
+ `status: ${emitScalar(fm.status)}`,
138
+ ];
139
+
140
+ if (fm.related.length === 0) {
141
+ lines.push("related: []");
142
+ } else {
143
+ lines.push("related:");
144
+ for (const r of fm.related) {
145
+ lines.push(` - slug: ${emitScalar(r.slug)}`, ` type: ${emitScalar(r.type)}`);
146
+ }
147
+ }
148
+
149
+ for (const [k, v] of Object.entries(fm)) {
150
+ if (CANONICAL_KEYS.has(k)) continue;
151
+ if (v === undefined || v === null) continue;
152
+ if (Array.isArray(v)) {
153
+ if (v.every((x) => typeof x === "string")) {
154
+ lines.push(`${k}: ${emitStringArray(v as string[])}`);
155
+ }
156
+ // Other array shapes are out of scope in v1 — skipped silently.
157
+ continue;
158
+ }
159
+ if (typeof v === "object") {
160
+ // Nested objects out of scope in v1.
161
+ continue;
162
+ }
163
+ lines.push(`${k}: ${emitScalar(v)}`);
164
+ }
165
+ lines.push("---");
166
+ return lines.join("\n");
167
+ }
168
+
169
+ // --- YAML parser (bounded schema) ------------------------------------------
170
+
171
+ function parseScalar(raw: string): unknown {
172
+ const trimmed = raw.trim();
173
+ if (trimmed === "") return "";
174
+ if (trimmed === "true") return true;
175
+ if (trimmed === "false") return false;
176
+ if (/^-?\d+(\.\d+)?$/.test(trimmed)) return Number(trimmed);
177
+ if (trimmed.startsWith('"') && trimmed.endsWith('"')) {
178
+ try {
179
+ return JSON.parse(trimmed);
180
+ } catch {
181
+ return trimmed.slice(1, -1);
182
+ }
183
+ }
184
+ return trimmed;
185
+ }
186
+
187
+ function parseStringArray(raw: string): string[] {
188
+ const trimmed = raw.trim();
189
+ if (trimmed === "[]") return [];
190
+ if (!(trimmed.startsWith("[") && trimmed.endsWith("]"))) return [];
191
+ const inner = trimmed.slice(1, -1).trim();
192
+ if (inner === "") return [];
193
+ // Split on commas not inside quotes.
194
+ const out: string[] = [];
195
+ let buf = "";
196
+ let inStr = false;
197
+ let esc = false;
198
+ for (const ch of inner) {
199
+ if (esc) {
200
+ buf += ch;
201
+ esc = false;
202
+ continue;
203
+ }
204
+ if (ch === "\\" && inStr) {
205
+ buf += ch;
206
+ esc = true;
207
+ continue;
208
+ }
209
+ if (ch === '"') {
210
+ inStr = !inStr;
211
+ buf += ch;
212
+ continue;
213
+ }
214
+ if (ch === "," && !inStr) {
215
+ out.push(String(parseScalar(buf)));
216
+ buf = "";
217
+ continue;
218
+ }
219
+ buf += ch;
220
+ }
221
+ if (buf.trim() !== "") out.push(String(parseScalar(buf)));
222
+ return out;
223
+ }
224
+
225
+ interface SplitResult {
226
+ fm: string;
227
+ body: string;
228
+ }
229
+
230
+ function splitFrontmatter(raw: string): SplitResult {
231
+ const match = /^---\n([\s\S]*?)\n---\n?([\s\S]*)$/.exec(raw);
232
+ if (!match) return { fm: "", body: raw };
233
+ return { fm: match[1], body: match[2] };
234
+ }
235
+
236
+ function parseFrontmatter(fmText: string): EntityFrontmatter {
237
+ const out: Record<string, unknown> = {
238
+ title: "",
239
+ type: "",
240
+ tags: [],
241
+ created: "",
242
+ updated: "",
243
+ quality: 0,
244
+ status: "seedling",
245
+ related: [] as Related[],
246
+ };
247
+
248
+ const lines = fmText.split("\n");
249
+ let i = 0;
250
+ while (i < lines.length) {
251
+ const line = lines[i];
252
+ if (line.trim() === "") {
253
+ i++;
254
+ continue;
255
+ }
256
+
257
+ // Top-level keys start with no indentation.
258
+ if (line.startsWith(" ") || line.startsWith("\t") || line.startsWith("-")) {
259
+ i++;
260
+ continue;
261
+ }
262
+
263
+ const colonIdx = line.indexOf(":");
264
+ if (colonIdx < 0) {
265
+ i++;
266
+ continue;
267
+ }
268
+
269
+ const key = line.slice(0, colonIdx).trim();
270
+ const rawVal = line.slice(colonIdx + 1);
271
+
272
+ if (key === "tags") {
273
+ out.tags = parseStringArray(rawVal);
274
+ i++;
275
+ continue;
276
+ }
277
+
278
+ if (key === "related") {
279
+ const v = rawVal.trim();
280
+ if (v === "[]" || v === "") {
281
+ // Could be inline empty or block-style. Look ahead.
282
+ if (v === "[]") {
283
+ out.related = [];
284
+ i++;
285
+ continue;
286
+ }
287
+ const items: Related[] = [];
288
+ i++;
289
+ let current: Partial<Related> | null = null;
290
+ while (i < lines.length) {
291
+ const child = lines[i];
292
+ if (
293
+ child.trim() === "" ||
294
+ (!child.startsWith(" ") && !child.startsWith("\t"))
295
+ ) {
296
+ break;
297
+ }
298
+ const t = child.trim();
299
+ if (t.startsWith("- slug:")) {
300
+ if (current?.slug) {
301
+ items.push({
302
+ slug: current.slug,
303
+ type: (current.type ?? "related") as RelationType,
304
+ });
305
+ }
306
+ current = {
307
+ slug: String(parseScalar(t.slice("- slug:".length))),
308
+ };
309
+ } else if (t.startsWith("slug:") && current) {
310
+ current.slug = String(parseScalar(t.slice("slug:".length)));
311
+ } else if (t.startsWith("type:") && current) {
312
+ current.type = parseScalar(t.slice("type:".length)) as RelationType;
313
+ }
314
+ i++;
315
+ }
316
+ if (current?.slug) {
317
+ items.push({
318
+ slug: current.slug,
319
+ type: (current.type ?? "related") as RelationType,
320
+ });
321
+ }
322
+ out.related = items;
323
+ continue;
324
+ }
325
+ out.related = [];
326
+ i++;
327
+ continue;
328
+ }
329
+
330
+ // Inline string-array values.
331
+ const v = rawVal.trim();
332
+ if (v.startsWith("[") && v.endsWith("]")) {
333
+ out[key] = parseStringArray(v);
334
+ } else {
335
+ out[key] = parseScalar(v);
336
+ }
337
+ i++;
338
+ }
339
+
340
+ return out as EntityFrontmatter;
341
+ }
342
+
343
+ // --- Validation -------------------------------------------------------------
344
+
345
+ export function validate(entity: Entity): void {
346
+ const fm = entity.frontmatter;
347
+ if (!fm.title || typeof fm.title !== "string") {
348
+ throw new Error(`knowledge: missing or invalid 'title' for ${entity.slug}`);
349
+ }
350
+ if (typeof fm.quality !== "number" || fm.quality < 0 || fm.quality > 10) {
351
+ throw new Error(`knowledge: 'quality' must be 0-10 for ${entity.slug}`);
352
+ }
353
+ if (!STATUSES.includes(fm.status)) {
354
+ throw new Error(
355
+ `knowledge: 'status' must be one of ${STATUSES.join("|")} for ${entity.slug}`
356
+ );
357
+ }
358
+ if (!DOMAINS.includes(entity.domain)) {
359
+ throw new Error(
360
+ `knowledge: 'domain' must be one of ${DOMAINS.join("|")} (got ${entity.domain})`
361
+ );
362
+ }
363
+ if (!Array.isArray(fm.tags) || fm.tags.some((t) => typeof t !== "string")) {
364
+ throw new Error(`knowledge: 'tags' must be string[] for ${entity.slug}`);
365
+ }
366
+ if (!Array.isArray(fm.related)) {
367
+ throw new Error(`knowledge: 'related' must be array for ${entity.slug}`);
368
+ }
369
+ for (const r of fm.related) {
370
+ if (!r.slug || !RELATION_TYPES.includes(r.type)) {
371
+ throw new Error(
372
+ `knowledge: invalid related entry on ${entity.slug}: ${JSON.stringify(r)}`
373
+ );
374
+ }
375
+ }
376
+ }
377
+
378
+ // --- Serialize / parse ------------------------------------------------------
379
+
380
+ export function serialize(entity: Entity): string {
381
+ validate(entity);
382
+ const fmText = emitFrontmatter(entity.frontmatter);
383
+ const body = entity.body.endsWith("\n") ? entity.body : `${entity.body}\n`;
384
+ return `${fmText}\n\n${body}`;
385
+ }
386
+
387
+ export function parse(domain: Domain, slug: string, raw: string): Entity {
388
+ const { fm, body } = splitFrontmatter(raw);
389
+ const frontmatter = parseFrontmatter(fm);
390
+ return { domain, slug, frontmatter, body: body.replace(/^\n+/, "") };
391
+ }
392
+
393
+ // --- Filesystem -------------------------------------------------------------
394
+
395
+ function domainDir(domain: Domain, rootDir?: string): string {
396
+ if (rootDir) {
397
+ const d = resolve(rootDir, domain);
398
+ if (!existsSync(d)) mkdirSync(d, { recursive: true });
399
+ return d;
400
+ }
401
+ return ensureDir(resolve(paths.knowledge(), domain));
402
+ }
403
+
404
+ /** @lintignore — consumed by Phase 2 graph layer (needs path-per-slug to read files) */
405
+ export function entityPath(domain: Domain, slug: string, rootDir?: string): string {
406
+ return resolve(domainDir(domain, rootDir), `${slug}.md`);
407
+ }
408
+
409
+ export function exists(domain: Domain, slug: string, rootDir?: string): boolean {
410
+ return existsSync(entityPath(domain, slug, rootDir));
411
+ }
412
+
413
+ export function save(entity: Entity, rootDir?: string): void {
414
+ const p = entityPath(entity.domain, entity.slug, rootDir);
415
+ const tmp = `${p}.tmp`;
416
+ writeFileSync(tmp, serialize(entity), "utf-8");
417
+ renameSync(tmp, p);
418
+ }
419
+
420
+ export function load(domain: Domain, slug: string, rootDir?: string): Entity | null {
421
+ const p = entityPath(domain, slug, rootDir);
422
+ if (!existsSync(p)) return null;
423
+ return parse(domain, slug, readFileSync(p, "utf-8"));
424
+ }
425
+
426
+ export function list(domain?: Domain, rootDir?: string): Entity[] {
427
+ const target = domain ? [domain] : DOMAINS;
428
+ const out: Entity[] = [];
429
+ for (const d of target) {
430
+ const dir = domainDir(d, rootDir);
431
+ if (!existsSync(dir)) continue;
432
+ for (const entry of readdirSync(dir)) {
433
+ if (!entry.endsWith(".md") || entry.startsWith("_")) continue;
434
+ const slug = entry.slice(0, -3);
435
+ const e = load(d, slug, rootDir);
436
+ if (e) out.push(e);
437
+ }
438
+ }
439
+ return out;
440
+ }
441
+
442
+ // --- Create / get -----------------------------------------------------------
443
+
444
+ export interface CreateInput {
445
+ domain: Domain;
446
+ name: string;
447
+ type?: string;
448
+ tags?: string[];
449
+ quality?: number;
450
+ status?: Status;
451
+ related?: Related[];
452
+ body?: string;
453
+ extra?: Record<string, unknown>;
454
+ }
455
+
456
+ /**
457
+ * Idempotent: if an entity already exists at the slug derived from `name`,
458
+ * return it untouched. Otherwise create a new file with sensible defaults
459
+ * (quality 5, status "seedling", today's ISO timestamps).
460
+ *
461
+ * Merging new attributes into an existing entity is intentionally NOT here —
462
+ * that lives in `ingest.ts`. Keeping `getOrCreate` idempotent makes it a safe
463
+ * storage primitive callable from anywhere.
464
+ */
465
+ export function getOrCreate(input: CreateInput, rootDir?: string): Entity {
466
+ const slug = slugify(input.name);
467
+ if (!slug) {
468
+ throw new Error(`knowledge: cannot derive slug from name "${input.name}"`);
469
+ }
470
+ const existing = load(input.domain, slug, rootDir);
471
+ if (existing) return existing;
472
+
473
+ const now = new Date().toISOString();
474
+ const fm: EntityFrontmatter = {
475
+ title: input.name,
476
+ type: input.type ?? DEFAULT_TYPE_BY_DOMAIN[input.domain],
477
+ tags: input.tags ?? [],
478
+ created: now,
479
+ updated: now,
480
+ quality: input.quality ?? 5,
481
+ status: input.status ?? "seedling",
482
+ related: input.related ?? [],
483
+ ...(input.extra ?? {}),
484
+ };
485
+ const entity: Entity = {
486
+ domain: input.domain,
487
+ slug,
488
+ frontmatter: fm,
489
+ body: input.body ?? "",
490
+ };
491
+ save(entity, rootDir);
492
+ return entity;
493
+ }
@@ -1,62 +0,0 @@
1
- ---
2
- name: extract-entities
3
- description: Extract people and companies from content (articles, videos, URLs, pasted text). Use when identifying who and what organizations are mentioned in content.
4
- argument-hint: <content, URL, or pasted text>
5
- ---
6
-
7
- Extract people and companies from $ARGUMENTS:
8
-
9
- 1. Read/fetch the content
10
- 2. Extract ALL people and companies mentioned
11
-
12
- ## People
13
-
14
- For each person, extract:
15
- - **name**: Full name
16
- - **role**: author | subject | mentioned | quoted | expert | interviewer | interviewee
17
- - **title**: Job title (null if unknown)
18
- - **company**: Company affiliation (null if unknown)
19
- - **social**: twitter (@handle), linkedin (URL), email, website — null if unknown
20
- - **context**: Why this person is mentioned and their relevance
21
- - **importance**: primary (central to content) | secondary (supporting) | minor (brief mention)
22
-
23
- ## Companies
24
-
25
- For each company/organization, extract:
26
- - **name**: Official name
27
- - **domain**: Primary website domain (e.g. "anthropic.com", null if unknown)
28
- - **industry**: Classification (AI, security, fintech, healthcare, etc.)
29
- - **context**: How and why mentioned
30
- - **mentioned_as**: subject | source | example | competitor | partner | acquisition | product | other
31
- - **sentiment**: positive | neutral | negative | mixed
32
-
33
- ## Output
34
-
35
- Return structured JSON:
36
-
37
- ```json
38
- {
39
- "people": [...],
40
- "companies": [...]
41
- }
42
- ```
43
-
44
- ## Guidelines
45
-
46
- - Accuracy over quantity — use null for unknown fields, never guess
47
- - Include authors, subjects, quoted individuals, and anyone significantly mentioned
48
- - For research papers: all authors get "author" role
49
- - For interviews: distinguish interviewer vs interviewee
50
- - Universities and research institutions count as companies
51
- - Extract social handles from bios, signatures, or text body
52
- - Context fields should explain relevance, not just repeat the mention
53
-
54
- ## Persistence
55
-
56
- After displaying results, ask the user if they want to save. When saving, pipe the JSON output through the entity-save tool which handles deduplication automatically:
57
-
58
- ```bash
59
- echo '<the JSON output>' | bun ~/.pal/skills/extract-entities/tools/entity-save.ts -- --source "<URL or content origin>"
60
- ```
61
-
62
- The tool deduplicates against the entity index (`memory/entities/entity-index.json`), assigns stable UUIDs, tracks occurrences, and reports what was new vs existing.
@@ -1,110 +0,0 @@
1
- #!/usr/bin/env bun
2
- /**
3
- * Entity Save — Deduplicate and persist extracted entities.
4
- *
5
- * Accepts extracted people/companies JSON via stdin or --file,
6
- * deduplicates against the entity index, and saves.
7
- *
8
- * Usage:
9
- * echo '{"people":[...],"companies":[...]}' | bun entity-save.ts -- --source "https://example.com"
10
- * bun entity-save.ts -- --file /path/to/extracted.json --source "https://example.com"
11
- */
12
-
13
- import { readFileSync } from "node:fs";
14
- import { parseArgs } from "node:util";
15
- import { loadEntityIndex, processEntities } from "../../../../src/hooks/lib/entities";
16
-
17
- const { values } = parseArgs({
18
- args: Bun.argv.slice(2),
19
- options: {
20
- source: { type: "string", short: "s", default: "manual" },
21
- file: { type: "string", short: "f" },
22
- },
23
- strict: true,
24
- });
25
-
26
- const sourceId = values.source ?? "manual";
27
-
28
- let raw: string;
29
- if (values.file) {
30
- raw = readFileSync(values.file, "utf-8");
31
- } else {
32
- raw = await Bun.stdin.text();
33
- }
34
-
35
- if (!raw.trim()) {
36
- console.error("Error: No input provided. Pipe JSON via stdin or use --file.");
37
- process.exit(1);
38
- }
39
-
40
- let data: {
41
- people: Array<Record<string, unknown>>;
42
- companies: Array<Record<string, unknown>>;
43
- links?: Array<Record<string, unknown>>;
44
- sources?: Array<Record<string, unknown>>;
45
- };
46
- try {
47
- data = JSON.parse(raw);
48
- } catch {
49
- console.error("Error: Invalid JSON input.");
50
- process.exit(1);
51
- }
52
-
53
- if (!Array.isArray(data.people) || !Array.isArray(data.companies)) {
54
- console.error('Error: JSON must have "people" and "companies" arrays.');
55
- process.exit(1);
56
- }
57
- data.links ??= [];
58
- data.sources ??= [];
59
-
60
- const before = loadEntityIndex();
61
- const counts = (idx: ReturnType<typeof loadEntityIndex>) => ({
62
- people: Object.keys(idx.people).length,
63
- companies: Object.keys(idx.companies).length,
64
- links: Object.keys(idx.links).length,
65
- sources: Object.keys(idx.sources).length,
66
- });
67
- const cb = counts(before);
68
-
69
- const result = processEntities(
70
- {
71
- people: data.people as Array<{ name: string; [key: string]: unknown }>,
72
- companies: data.companies as Array<{
73
- name: string;
74
- domain: string | null;
75
- [key: string]: unknown;
76
- }>,
77
- links: data.links as Array<{ url: string; [key: string]: unknown }>,
78
- sources: data.sources as Array<{
79
- url: string | null;
80
- author: string | null;
81
- publication: string | null;
82
- [key: string]: unknown;
83
- }>,
84
- },
85
- sourceId
86
- );
87
-
88
- const ca = counts(loadEntityIndex());
89
-
90
- console.log(
91
- JSON.stringify(
92
- {
93
- saved: {
94
- people: result.people.length,
95
- companies: result.companies.length,
96
- links: result.links.length,
97
- sources: result.sources.length,
98
- },
99
- new: {
100
- people: ca.people - cb.people,
101
- companies: ca.companies - cb.companies,
102
- links: ca.links - cb.links,
103
- sources: ca.sources - cb.sources,
104
- },
105
- total: ca,
106
- },
107
- null,
108
- 2
109
- )
110
- );