@vortex-os/ontos 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +32 -17
  2. package/dist/actions/index.d.ts +28 -1
  3. package/dist/actions/index.d.ts.map +1 -1
  4. package/dist/actions/index.js +12 -0
  5. package/dist/actions/index.js.map +1 -1
  6. package/dist/enrich/domain-range.d.ts +25 -0
  7. package/dist/enrich/domain-range.d.ts.map +1 -0
  8. package/dist/enrich/domain-range.js +60 -0
  9. package/dist/enrich/domain-range.js.map +1 -0
  10. package/dist/enrich/index.d.ts +50 -0
  11. package/dist/enrich/index.d.ts.map +1 -0
  12. package/dist/enrich/index.js +67 -0
  13. package/dist/enrich/index.js.map +1 -0
  14. package/dist/enrich/tier.d.ts +27 -0
  15. package/dist/enrich/tier.d.ts.map +1 -0
  16. package/dist/enrich/tier.js +38 -0
  17. package/dist/enrich/tier.js.map +1 -0
  18. package/dist/enrich/types.d.ts +101 -0
  19. package/dist/enrich/types.d.ts.map +1 -0
  20. package/dist/enrich/types.js +11 -0
  21. package/dist/enrich/types.js.map +1 -0
  22. package/dist/enrich/validate.d.ts +32 -0
  23. package/dist/enrich/validate.d.ts.map +1 -0
  24. package/dist/enrich/validate.js +104 -0
  25. package/dist/enrich/validate.js.map +1 -0
  26. package/dist/extract/deterministic.d.ts.map +1 -1
  27. package/dist/extract/deterministic.js +20 -0
  28. package/dist/extract/deterministic.js.map +1 -1
  29. package/dist/extract/types.d.ts +1 -1
  30. package/dist/extract/types.d.ts.map +1 -1
  31. package/dist/index.d.ts +2 -0
  32. package/dist/index.d.ts.map +1 -1
  33. package/dist/index.js +2 -0
  34. package/dist/index.js.map +1 -1
  35. package/dist/sqlite/index.d.ts +1 -1
  36. package/dist/sqlite/index.d.ts.map +1 -1
  37. package/dist/sqlite/schema.d.ts +1 -1
  38. package/dist/sqlite/schema.d.ts.map +1 -1
  39. package/dist/sqlite/schema.js +38 -0
  40. package/dist/sqlite/schema.js.map +1 -1
  41. package/dist/sqlite/store.d.ts +70 -1
  42. package/dist/sqlite/store.d.ts.map +1 -1
  43. package/dist/sqlite/store.js +244 -1
  44. package/dist/sqlite/store.js.map +1 -1
  45. package/dist/writeback.d.ts +60 -0
  46. package/dist/writeback.d.ts.map +1 -0
  47. package/dist/writeback.js +138 -0
  48. package/dist/writeback.js.map +1 -0
  49. package/package.json +4 -2
  50. package/scripts/enrich-ontos.mjs +182 -0
  51. package/scripts/rebuild-ontos.mjs +1 -0
@@ -0,0 +1,182 @@
1
+ #!/usr/bin/env node
2
+ // Agent-mediated LLM enrichment (Phase 1 §4 step 3) — the thin I/O wrapper over
3
+ // the pure enrich layer. ontos has no LLM of its own: this EMITs extraction
4
+ // requests for the running agent to fulfil, then INGESTs the JSON the agent
5
+ // returns (validated → tiered → stored as PROPOSALS, never into the trusted graph).
6
+ //
7
+ // Two-step, agent in the middle:
8
+ // 1. enrich-ontos --emit requests.json # ontos -> agent : what to extract
9
+ // 2. (the agent reads requests.json, does the extraction + self/cross-check,
10
+ // writes proposals.json: [{ recordPath, proposals: [RawProposal...] }])
11
+ // 3. enrich-ontos --ingest proposals.json # agent -> ontos : store proposals
12
+ //
13
+ // Read-only on your markdown in BOTH steps; writes only the sqlite index (ingest).
14
+ // Usage: enrich-ontos [--data-dir <dir>] [--db <path>] (--emit [file] | --ingest <file>)
15
+ import { readFileSync, readdirSync, writeFileSync, existsSync } from "node:fs";
16
+ import { join, resolve, sep } from "node:path";
17
+ import { OntosStore } from "../dist/sqlite/index.js";
18
+ import { extractDeterministic } from "../dist/extract/index.js";
19
+ import {
20
+ buildEnrichmentRequest,
21
+ needsEnrichment,
22
+ extractionHash,
23
+ ingestProposals,
24
+ } from "../dist/enrich/index.js";
25
+ import { parseFrontmatter } from "../dist/internal/frontmatter.js";
26
+
27
+ const args = process.argv.slice(2);
28
+ let dataDir = resolve(process.cwd(), "data");
29
+ let dbPath = null;
30
+ let mode = null; // "emit" | "ingest"
31
+ let ioFile = null;
32
+ for (let i = 0; i < args.length; i++) {
33
+ const a = args[i];
34
+ if (a === "--data-dir" && args[i + 1]) dataDir = resolve(args[++i]);
35
+ else if (a === "--db" && args[i + 1]) dbPath = resolve(args[++i]);
36
+ else if (a === "--emit") {
37
+ mode = "emit";
38
+ if (args[i + 1] && !args[i + 1].startsWith("--")) ioFile = resolve(args[++i]);
39
+ } else if (a === "--ingest" && args[i + 1]) {
40
+ mode = "ingest";
41
+ ioFile = resolve(args[++i]);
42
+ } else if (a === "--gc") {
43
+ mode = "gc";
44
+ }
45
+ }
46
+ if (!dbPath) dbPath = join(dataDir, "_indexes", "ontos.sqlite");
47
+ if (!mode) {
48
+ console.error("usage: enrich-ontos [--data-dir <dir>] [--db <path>] (--emit [file] | --ingest <file> | --gc)");
49
+ process.exit(2);
50
+ }
51
+
52
+ // Prose-heavy sources where the LLM gap-filler earns its keep. worklog is NESTED
53
+ // (YYYY/MM/…), so it is walked recursively; decision-log and _memory are flat.
54
+ // _failures is frontmatter-structured (deterministic handles it) — not enriched.
55
+ const SOURCES = [
56
+ { dir: join(dataDir, "worklog"), category: "worklog", recursive: true },
57
+ { dir: join(dataDir, "decision-log"), category: "decision", recursive: false },
58
+ { dir: join(dataDir, "_memory"), category: "memory", recursive: false },
59
+ ];
60
+
61
+ function listMd(dir, recursive) {
62
+ let entries;
63
+ try {
64
+ entries = readdirSync(dir, { withFileTypes: true });
65
+ } catch {
66
+ return []; // dir absent — skip
67
+ }
68
+ const out = [];
69
+ for (const e of entries) {
70
+ if (e.isDirectory()) {
71
+ if (recursive) out.push(...listMd(join(dir, e.name), true));
72
+ } else if (e.isFile() && e.name.endsWith(".md") && e.name !== "_INDEX.md") {
73
+ out.push(join(dir, e.name));
74
+ }
75
+ }
76
+ return out;
77
+ }
78
+
79
+ function* records() {
80
+ for (const { dir, category, recursive } of SOURCES) {
81
+ for (const path of listMd(dir, recursive)) {
82
+ const { frontmatter, body } = parseFrontmatter(readFileSync(path, "utf8"));
83
+ yield { path, category, frontmatter, body };
84
+ }
85
+ }
86
+ }
87
+
88
+ const store = new OntosStore(dbPath);
89
+ try {
90
+ if (mode === "emit") {
91
+ const known = store.allObjectIds();
92
+ const requests = [];
93
+ let scanned = 0;
94
+ for (const rec of records()) {
95
+ scanned++;
96
+ if (!needsEnrichment(store, rec)) continue; // unchanged since last enrichment
97
+ requests.push(buildEnrichmentRequest(rec, extractDeterministic(rec), known));
98
+ }
99
+ const payload = JSON.stringify(requests, null, 2);
100
+ if (ioFile) writeFileSync(ioFile, payload);
101
+ else process.stdout.write(payload + "\n");
102
+ console.error(
103
+ `[enrich-ontos] emit: ${requests.length} record(s) need enrichment (of ${scanned} scanned)${ioFile ? ` -> ${ioFile}` : ""}`,
104
+ );
105
+ } else if (mode === "gc") {
106
+ // Reconcile: drop enrichment_state + UNCONFIRMED proposals whose source record
107
+ // vanished (rename/delete). Liveness is an fs check here (the store is pure
108
+ // sqlite); confirmed/rejected proposals are human decisions and are kept.
109
+ const dead = (p) => !existsSync(resolve(p));
110
+ let states = 0;
111
+ for (const rp of store.allEnrichmentPaths()) {
112
+ if (dead(rp) && store.deleteEnrichmentState(rp)) states++;
113
+ }
114
+ let props = 0;
115
+ for (const ev of store.pendingProposalEvidencePaths()) {
116
+ if (dead(ev)) props += store.dropPendingProposalsByEvidence(ev);
117
+ }
118
+ console.error(`[enrich-ontos] gc: dropped ${states} stale enrichment_state, ${props} orphaned proposal(s)`);
119
+ } else {
120
+ // ingest: [{ recordPath, proposals: [RawProposal...] }]
121
+ const parsed = JSON.parse(readFileSync(ioFile, "utf8"));
122
+ if (!Array.isArray(parsed)) {
123
+ console.error("[enrich-ontos] ingest: expected a top-level JSON array of { recordPath, proposals }");
124
+ process.exit(2);
125
+ }
126
+ const at = new Date().toISOString().slice(0, 10);
127
+ const dataRoot = resolve(dataDir);
128
+ let accepted = 0;
129
+ let rejected = 0;
130
+ let records_ = 0;
131
+ let skipped = 0;
132
+ for (const item of parsed) {
133
+ const recordPath = item?.recordPath;
134
+ // Malformed shape (no path, or proposals not an array) -> skip; do NOT stamp
135
+ // state, or a bad agent reply would suppress future --emit for that record.
136
+ if (typeof recordPath !== "string" || !Array.isArray(item?.proposals)) {
137
+ skipped++;
138
+ continue;
139
+ }
140
+ // Bound the path to the data dir: the agent only ever echoes back a path ontos
141
+ // emitted, so anything outside scope is malformed/hostile — refuse to read it
142
+ // (defense against path traversal, even though we only hash the content).
143
+ const abs = resolve(recordPath);
144
+ if (abs !== dataRoot && !abs.startsWith(dataRoot + sep)) {
145
+ skipped++;
146
+ continue;
147
+ }
148
+ // Re-read the record to compute its current extraction hash (the change-detection
149
+ // stamp), so a later text edit re-emits it. Read-only. Unreadable -> skip (do not
150
+ // ingest proposals for a record we cannot verify, and do not stamp).
151
+ let hash;
152
+ try {
153
+ const { body } = parseFrontmatter(readFileSync(abs, "utf8"));
154
+ hash = extractionHash({ body });
155
+ } catch {
156
+ skipped++;
157
+ continue;
158
+ }
159
+ records_++;
160
+ // Anchor each proposal's evidence to the record's ABSOLUTE path (all proposals
161
+ // in this item come from this record). A raw/relative evidencePath would make
162
+ // GC's existsSync(resolve(...)) cwd-dependent and mis-classify orphans; the
163
+ // specific quote is preserved in evidenceSpan (Codex review).
164
+ const proposals = item.proposals.map((p) => ({ ...p, evidencePath: abs }));
165
+ const report = ingestProposals(store, proposals, { sourceRecordHash: hash, proposedAt: at });
166
+ accepted += report.accepted;
167
+ rejected += report.rejected.length;
168
+ // Stamp the change-detection state ONLY when something was actually captured.
169
+ // An empty or all-rejected reply (a failed/empty agent run) must NOT mark the
170
+ // record done, or it would silently suppress future --emit (Codex round 2).
171
+ // Trade-off: a record with genuinely no extractable relations re-emits each
172
+ // run; a later "examined, nothing found" marker can suppress that — TODO.
173
+ if (report.accepted > 0) store.setExtractionState(abs, hash, at);
174
+ }
175
+ console.error(
176
+ `[enrich-ontos] ingest: ${accepted} accepted, ${rejected} rejected, ${skipped} skipped, across ${records_} record(s)`,
177
+ );
178
+ }
179
+ console.error(`[enrich-ontos] db: ${store.dbPath}`);
180
+ } finally {
181
+ store.close();
182
+ }
@@ -25,6 +25,7 @@ const SOURCES = [
25
25
  { dir: join(dataDir, "_failures"), category: "failure" },
26
26
  { dir: join(dataDir, "decision-log"), category: "decision" },
27
27
  { dir: join(dataDir, "_memory"), category: "memory" },
28
+ { dir: join(dataDir, "runbooks"), category: "runbook" },
28
29
  ];
29
30
 
30
31
  // FLAT listing (non-recursive): the extracted categories (_failures, decision-log,