@oscharko-dev/keiko-contracts 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -0
- package/dist/bff-wire.d.ts +661 -0
- package/dist/bff-wire.d.ts.map +1 -0
- package/dist/bff-wire.js +102 -0
- package/dist/bug-investigation-events.d.ts +92 -0
- package/dist/bug-investigation-events.d.ts.map +1 -0
- package/dist/bug-investigation-events.js +18 -0
- package/dist/coding-context.d.ts +76 -0
- package/dist/coding-context.d.ts.map +1 -0
- package/dist/coding-context.js +158 -0
- package/dist/connected-context.d.ts +174 -0
- package/dist/connected-context.d.ts.map +1 -0
- package/dist/connected-context.js +636 -0
- package/dist/conversation-budget.d.ts +37 -0
- package/dist/conversation-budget.d.ts.map +1 -0
- package/dist/conversation-budget.js +97 -0
- package/dist/editor-agent.d.ts +131 -0
- package/dist/editor-agent.d.ts.map +1 -0
- package/dist/editor-agent.js +197 -0
- package/dist/editor-completion.d.ts +62 -0
- package/dist/editor-completion.d.ts.map +1 -0
- package/dist/editor-completion.js +147 -0
- package/dist/editor-dirty-close.d.ts +17 -0
- package/dist/editor-dirty-close.d.ts.map +1 -0
- package/dist/editor-dirty-close.js +8 -0
- package/dist/editor-hot-exit.d.ts +18 -0
- package/dist/editor-hot-exit.d.ts.map +1 -0
- package/dist/editor-hot-exit.js +42 -0
- package/dist/editor-inline-completion.d.ts +70 -0
- package/dist/editor-inline-completion.d.ts.map +1 -0
- package/dist/editor-inline-completion.js +215 -0
- package/dist/editor-layout.d.ts +105 -0
- package/dist/editor-layout.d.ts.map +1 -0
- package/dist/editor-layout.js +479 -0
- package/dist/editor-patch-apply.d.ts +77 -0
- package/dist/editor-patch-apply.d.ts.map +1 -0
- package/dist/editor-patch-apply.js +122 -0
- package/dist/editor-session.d.ts +31 -0
- package/dist/editor-session.d.ts.map +1 -0
- package/dist/editor-session.js +75 -0
- package/dist/editor-test-generation.d.ts +104 -0
- package/dist/editor-test-generation.d.ts.map +1 -0
- package/dist/editor-test-generation.js +211 -0
- package/dist/evaluations.d.ts +75 -0
- package/dist/evaluations.d.ts.map +1 -0
- package/dist/evaluations.js +16 -0
- package/dist/evidence.d.ts +297 -0
- package/dist/evidence.d.ts.map +1 -0
- package/dist/evidence.js +9 -0
- package/dist/gateway.d.ts +129 -0
- package/dist/gateway.d.ts.map +1 -0
- package/dist/gateway.js +66 -0
- package/dist/harness.d.ts +274 -0
- package/dist/harness.d.ts.map +1 -0
- package/dist/harness.js +38 -0
- package/dist/index.d.ts +101 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +83 -0
- package/dist/language-service.d.ts +145 -0
- package/dist/language-service.d.ts.map +1 -0
- package/dist/language-service.js +161 -0
- package/dist/local-knowledge-large-document-validation.d.ts +7 -0
- package/dist/local-knowledge-large-document-validation.d.ts.map +1 -0
- package/dist/local-knowledge-large-document-validation.js +161 -0
- package/dist/local-knowledge-large-document.d.ts +113 -0
- package/dist/local-knowledge-large-document.d.ts.map +1 -0
- package/dist/local-knowledge-large-document.js +142 -0
- package/dist/local-knowledge-paths.d.ts +3 -0
- package/dist/local-knowledge-paths.d.ts.map +1 -0
- package/dist/local-knowledge-paths.js +65 -0
- package/dist/local-knowledge-records.d.ts +190 -0
- package/dist/local-knowledge-records.d.ts.map +1 -0
- package/dist/local-knowledge-records.js +36 -0
- package/dist/local-knowledge-schema-validation.d.ts +19 -0
- package/dist/local-knowledge-schema-validation.d.ts.map +1 -0
- package/dist/local-knowledge-schema-validation.js +115 -0
- package/dist/local-knowledge-schema.d.ts +14 -0
- package/dist/local-knowledge-schema.d.ts.map +1 -0
- package/dist/local-knowledge-schema.js +715 -0
- package/dist/local-knowledge-validation.d.ts +20 -0
- package/dist/local-knowledge-validation.d.ts.map +1 -0
- package/dist/local-knowledge-validation.js +487 -0
- package/dist/local-knowledge.d.ts +158 -0
- package/dist/local-knowledge.d.ts.map +1 -0
- package/dist/local-knowledge.js +63 -0
- package/dist/memory-audit-events.d.ts +73 -0
- package/dist/memory-audit-events.d.ts.map +1 -0
- package/dist/memory-audit-events.js +44 -0
- package/dist/memory-audit-validation.d.ts +4 -0
- package/dist/memory-audit-validation.d.ts.map +1 -0
- package/dist/memory-audit-validation.js +151 -0
- package/dist/memory-barrel.d.ts +15 -0
- package/dist/memory-barrel.d.ts.map +1 -0
- package/dist/memory-barrel.js +20 -0
- package/dist/memory-internal.d.ts +26 -0
- package/dist/memory-internal.d.ts.map +1 -0
- package/dist/memory-internal.js +104 -0
- package/dist/memory-operations-validation.d.ts +12 -0
- package/dist/memory-operations-validation.d.ts.map +1 -0
- package/dist/memory-operations-validation.js +267 -0
- package/dist/memory-operations.d.ts +156 -0
- package/dist/memory-operations.d.ts.map +1 -0
- package/dist/memory-operations.js +29 -0
- package/dist/memory-record-validation.d.ts +10 -0
- package/dist/memory-record-validation.d.ts.map +1 -0
- package/dist/memory-record-validation.js +101 -0
- package/dist/memory-records.d.ts +66 -0
- package/dist/memory-records.d.ts.map +1 -0
- package/dist/memory-records.js +22 -0
- package/dist/memory-retrieval-validation.d.ts +6 -0
- package/dist/memory-retrieval-validation.d.ts.map +1 -0
- package/dist/memory-retrieval-validation.js +108 -0
- package/dist/memory-validation.d.ts +31 -0
- package/dist/memory-validation.d.ts.map +1 -0
- package/dist/memory-validation.js +318 -0
- package/dist/memory-workflow-port.d.ts +26 -0
- package/dist/memory-workflow-port.d.ts.map +1 -0
- package/dist/memory-workflow-port.js +13 -0
- package/dist/memory.d.ts +81 -0
- package/dist/memory.d.ts.map +1 -0
- package/dist/memory.js +104 -0
- package/dist/prompt-enhancer-analyzer.d.ts +7 -0
- package/dist/prompt-enhancer-analyzer.d.ts.map +1 -0
- package/dist/prompt-enhancer-analyzer.js +745 -0
- package/dist/prompt-enhancer-bff.d.ts +67 -0
- package/dist/prompt-enhancer-bff.d.ts.map +1 -0
- package/dist/prompt-enhancer-bff.js +156 -0
- package/dist/prompt-enhancer-critic.d.ts +46 -0
- package/dist/prompt-enhancer-critic.d.ts.map +1 -0
- package/dist/prompt-enhancer-critic.js +35 -0
- package/dist/prompt-enhancer-grounding.d.ts +19 -0
- package/dist/prompt-enhancer-grounding.d.ts.map +1 -0
- package/dist/prompt-enhancer-grounding.js +235 -0
- package/dist/prompt-enhancer-safety.d.ts +66 -0
- package/dist/prompt-enhancer-safety.d.ts.map +1 -0
- package/dist/prompt-enhancer-safety.js +446 -0
- package/dist/prompt-enhancer-validation.d.ts +28 -0
- package/dist/prompt-enhancer-validation.d.ts.map +1 -0
- package/dist/prompt-enhancer-validation.js +931 -0
- package/dist/prompt-enhancer.d.ts +184 -0
- package/dist/prompt-enhancer.d.ts.map +1 -0
- package/dist/prompt-enhancer.js +350 -0
- package/dist/qualityIntelligence/assertNever.d.ts +2 -0
- package/dist/qualityIntelligence/assertNever.d.ts.map +1 -0
- package/dist/qualityIntelligence/assertNever.js +7 -0
- package/dist/qualityIntelligence/auditSummary.d.ts +25 -0
- package/dist/qualityIntelligence/auditSummary.d.ts.map +1 -0
- package/dist/qualityIntelligence/auditSummary.js +7 -0
- package/dist/qualityIntelligence/bffWire.d.ts +356 -0
- package/dist/qualityIntelligence/bffWire.d.ts.map +1 -0
- package/dist/qualityIntelligence/bffWire.js +22 -0
- package/dist/qualityIntelligence/coverageMap.d.ts +21 -0
- package/dist/qualityIntelligence/coverageMap.d.ts.map +1 -0
- package/dist/qualityIntelligence/coverageMap.js +29 -0
- package/dist/qualityIntelligence/editableRevision.d.ts +21 -0
- package/dist/qualityIntelligence/editableRevision.d.ts.map +1 -0
- package/dist/qualityIntelligence/editableRevision.js +8 -0
- package/dist/qualityIntelligence/evidenceAtom.d.ts +35 -0
- package/dist/qualityIntelligence/evidenceAtom.d.ts.map +1 -0
- package/dist/qualityIntelligence/evidenceAtom.js +29 -0
- package/dist/qualityIntelligence/exportBundle.d.ts +28 -0
- package/dist/qualityIntelligence/exportBundle.d.ts.map +1 -0
- package/dist/qualityIntelligence/exportBundle.js +46 -0
- package/dist/qualityIntelligence/handoffEnvelope.d.ts +23 -0
- package/dist/qualityIntelligence/handoffEnvelope.d.ts.map +1 -0
- package/dist/qualityIntelligence/handoffEnvelope.js +8 -0
- package/dist/qualityIntelligence/ids.d.ts +58 -0
- package/dist/qualityIntelligence/ids.d.ts.map +1 -0
- package/dist/qualityIntelligence/ids.js +93 -0
- package/dist/qualityIntelligence/index.d.ts +29 -0
- package/dist/qualityIntelligence/index.d.ts.map +1 -0
- package/dist/qualityIntelligence/index.js +20 -0
- package/dist/qualityIntelligence/reviewRecord.d.ts +19 -0
- package/dist/qualityIntelligence/reviewRecord.d.ts.map +1 -0
- package/dist/qualityIntelligence/reviewRecord.js +20 -0
- package/dist/qualityIntelligence/runPlanAndEvents.d.ts +84 -0
- package/dist/qualityIntelligence/runPlanAndEvents.d.ts.map +1 -0
- package/dist/qualityIntelligence/runPlanAndEvents.js +51 -0
- package/dist/qualityIntelligence/sourceEnvelope.d.ts +77 -0
- package/dist/qualityIntelligence/sourceEnvelope.d.ts.map +1 -0
- package/dist/qualityIntelligence/sourceEnvelope.js +118 -0
- package/dist/qualityIntelligence/testCaseCandidate.d.ts +21 -0
- package/dist/qualityIntelligence/testCaseCandidate.d.ts.map +1 -0
- package/dist/qualityIntelligence/testCaseCandidate.js +21 -0
- package/dist/qualityIntelligence/testQualityRubric.d.ts +17 -0
- package/dist/qualityIntelligence/testQualityRubric.d.ts.map +1 -0
- package/dist/qualityIntelligence/testQualityRubric.js +32 -0
- package/dist/qualityIntelligence/validationFinding.d.ts +48 -0
- package/dist/qualityIntelligence/validationFinding.d.ts.map +1 -0
- package/dist/qualityIntelligence/validationFinding.js +36 -0
- package/dist/relationships-validation.d.ts +13 -0
- package/dist/relationships-validation.d.ts.map +1 -0
- package/dist/relationships-validation.js +422 -0
- package/dist/relationships.d.ts +79 -0
- package/dist/relationships.d.ts.map +1 -0
- package/dist/relationships.js +307 -0
- package/dist/text-safety.d.ts +7 -0
- package/dist/text-safety.d.ts.map +1 -0
- package/dist/text-safety.js +58 -0
- package/dist/tools.d.ts +153 -0
- package/dist/tools.d.ts.map +1 -0
- package/dist/tools.js +118 -0
- package/dist/unit-test-events.d.ts +87 -0
- package/dist/unit-test-events.d.ts.map +1 -0
- package/dist/unit-test-events.js +14 -0
- package/dist/verification-summary.d.ts +38 -0
- package/dist/verification-summary.d.ts.map +1 -0
- package/dist/verification-summary.js +5 -0
- package/dist/verification.d.ts +64 -0
- package/dist/verification.d.ts.map +1 -0
- package/dist/verification.js +13 -0
- package/dist/workflow-descriptor.d.ts +21 -0
- package/dist/workflow-descriptor.d.ts.map +1 -0
- package/dist/workflow-descriptor.js +8 -0
- package/dist/workflow-handoff.d.ts +69 -0
- package/dist/workflow-handoff.d.ts.map +1 -0
- package/dist/workflow-handoff.js +381 -0
- package/dist/workspace-descriptors.d.ts +21 -0
- package/dist/workspace-descriptors.d.ts.map +1 -0
- package/dist/workspace-descriptors.js +180 -0
- package/dist/workspace-ui.d.ts +119 -0
- package/dist/workspace-ui.d.ts.map +1 -0
- package/dist/workspace-ui.js +105 -0
- package/dist/workspace.d.ts +104 -0
- package/dist/workspace.d.ts.map +1 -0
- package/dist/workspace.js +27 -0
- package/package.json +71 -0
|
@@ -0,0 +1,715 @@
|
|
|
1
|
+
// Persistent on-disk schema for the Local Knowledge Connector capsule store (Epic #189,
|
|
2
|
+
// Issue #265). This module is PURE — no `node:sqlite` import, no fs, no clock — so it can
|
|
3
|
+
// live in the leaf `keiko-contracts` package without breaking ADR-0019 direction rule 1.
|
|
4
|
+
// The runtime that *applies* the DDL ships in issue #193; that runtime owns the
|
|
5
|
+
// `DatabaseSync` import, atomic file creation, and the migration runner.
|
|
6
|
+
//
|
|
7
|
+
// Schema-version model
|
|
8
|
+
// --------------------
|
|
9
|
+
// * `LOCAL_KNOWLEDGE_SCHEMA_VERSION` (string `"1"`, from `local-knowledge.ts`) pins the
|
|
10
|
+
// *in-memory* type-contract surface. A breaking type change adds a new literal member.
|
|
11
|
+
// * `LOCAL_KNOWLEDGE_DB_SCHEMA_VERSION` (integer `1`, here) pins the *on-disk* DDL and is
|
|
12
|
+
// stored via `PRAGMA user_version`. The two evolve independently — a new column with a
|
|
13
|
+
// non-breaking JS-side mapping bumps only the DB version; a contract-breaking type
|
|
14
|
+
// addition bumps only the string version.
|
|
15
|
+
//
|
|
16
|
+
// Lineage invariant
|
|
17
|
+
// -----------------
|
|
18
|
+
// Every operational dependent table carries `capsule_id TEXT NOT NULL REFERENCES
|
|
19
|
+
// capsules(id) ON DELETE CASCADE`. Documents, chunks, and vectors additionally carry
|
|
20
|
+
// `source_id`; pages, sections, parsed units, chunks, and vectors additionally carry
|
|
21
|
+
// `document_id`. The DB therefore enforces the Foundry-IQ "no global pool" rule — a
|
|
22
|
+
// chunk or vector cannot exist outside of its capsule + source + document tuple.
|
|
23
|
+
//
|
|
24
|
+
// Audit tables intentionally keep only metadata identifiers and do NOT cascade on capsule
|
|
25
|
+
// deletion. A `capsule-deleted` audit event must remain durable after the capsule row and
|
|
26
|
+
// operational index state are removed.
|
|
27
|
+
//
|
|
28
|
+
// Vector identity is denormalised onto every vector row (provider/modelId/dimensions/
|
|
29
|
+
// metric). When the active embedding model changes, stale vectors are detected by a single
|
|
30
|
+
// scan against the index `idx_vectors_capsule_identity` without joining back to `capsules`.
|
|
31
|
+
export const LOCAL_KNOWLEDGE_DB_SCHEMA_VERSION = 13;
|
|
32
|
+
// ─── DDL statements (applied in declared order) ──────────────────────────────────
|
|
33
|
+
// node:sqlite from Node 22 ships SQLite ≥ 3.45 which supports `STRICT`. Each statement is
|
|
34
|
+
// a single complete top-level statement so the runtime can apply them via either `exec`
|
|
35
|
+
// (batch) or one-shot `prepare(...).run()` without re-parsing.
|
|
36
|
+
const PRAGMA_FOREIGN_KEYS = "PRAGMA foreign_keys = ON;";
|
|
37
|
+
const CREATE_CAPSULES = `
|
|
38
|
+
CREATE TABLE capsules (
|
|
39
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
40
|
+
display_name TEXT NOT NULL,
|
|
41
|
+
description TEXT,
|
|
42
|
+
tags_json TEXT NOT NULL,
|
|
43
|
+
source_routing_instructions TEXT,
|
|
44
|
+
always_query INTEGER NOT NULL DEFAULT 0,
|
|
45
|
+
retrieval_effort TEXT NOT NULL,
|
|
46
|
+
output_mode TEXT NOT NULL,
|
|
47
|
+
answer_grounding_policy TEXT NOT NULL,
|
|
48
|
+
embedding_model_provider TEXT NOT NULL,
|
|
49
|
+
embedding_model_id TEXT NOT NULL,
|
|
50
|
+
embedding_model_revision TEXT,
|
|
51
|
+
vector_dimensions INTEGER NOT NULL,
|
|
52
|
+
vector_metric TEXT NOT NULL,
|
|
53
|
+
lifecycle_state TEXT NOT NULL,
|
|
54
|
+
storage_reference TEXT NOT NULL,
|
|
55
|
+
created_at INTEGER NOT NULL,
|
|
56
|
+
updated_at INTEGER NOT NULL
|
|
57
|
+
) STRICT;
|
|
58
|
+
`.trim();
|
|
59
|
+
const CREATE_KNOWLEDGE_SOURCES = `
|
|
60
|
+
CREATE TABLE knowledge_sources (
|
|
61
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
62
|
+
display_name TEXT NOT NULL,
|
|
63
|
+
description TEXT,
|
|
64
|
+
tags_json TEXT NOT NULL,
|
|
65
|
+
scope_kind TEXT NOT NULL,
|
|
66
|
+
scope_json TEXT NOT NULL,
|
|
67
|
+
created_at INTEGER NOT NULL,
|
|
68
|
+
updated_at INTEGER NOT NULL
|
|
69
|
+
) STRICT;
|
|
70
|
+
`.trim();
|
|
71
|
+
// capsule_sources adds UNIQUE (capsule_id, id) so dependent tables can FK against the
|
|
72
|
+
// composite (capsule_id, source_id) pair — that enforces the Foundry-IQ lineage invariant
|
|
73
|
+
// at the database level: a chunk's source_id cannot belong to a different capsule than the
|
|
74
|
+
// chunk itself. It also links to `knowledge_sources`, the independent source metadata table
|
|
75
|
+
// used by lifecycle reads; existing stores gain that table through the v10 backfill.
|
|
76
|
+
const CREATE_CAPSULE_SOURCES = `
|
|
77
|
+
CREATE TABLE capsule_sources (
|
|
78
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
79
|
+
capsule_id TEXT NOT NULL,
|
|
80
|
+
display_name TEXT NOT NULL,
|
|
81
|
+
description TEXT,
|
|
82
|
+
tags_json TEXT NOT NULL,
|
|
83
|
+
scope_kind TEXT NOT NULL,
|
|
84
|
+
scope_json TEXT NOT NULL,
|
|
85
|
+
created_at INTEGER NOT NULL,
|
|
86
|
+
updated_at INTEGER NOT NULL,
|
|
87
|
+
FOREIGN KEY (capsule_id) REFERENCES capsules(id) ON DELETE CASCADE,
|
|
88
|
+
FOREIGN KEY (id) REFERENCES knowledge_sources(id) ON DELETE RESTRICT,
|
|
89
|
+
UNIQUE (capsule_id, id)
|
|
90
|
+
) STRICT;
|
|
91
|
+
`.trim();
|
|
92
|
+
const CREATE_CAPSULE_SOURCES_V1 = `
|
|
93
|
+
CREATE TABLE capsule_sources (
|
|
94
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
95
|
+
capsule_id TEXT NOT NULL,
|
|
96
|
+
display_name TEXT NOT NULL,
|
|
97
|
+
description TEXT,
|
|
98
|
+
tags_json TEXT NOT NULL,
|
|
99
|
+
scope_kind TEXT NOT NULL,
|
|
100
|
+
scope_json TEXT NOT NULL,
|
|
101
|
+
created_at INTEGER NOT NULL,
|
|
102
|
+
updated_at INTEGER NOT NULL,
|
|
103
|
+
FOREIGN KEY (capsule_id) REFERENCES capsules(id) ON DELETE CASCADE,
|
|
104
|
+
UNIQUE (capsule_id, id)
|
|
105
|
+
) STRICT;
|
|
106
|
+
`.trim();
|
|
107
|
+
const CREATE_CAPSULE_SET_MEMBERS = `
|
|
108
|
+
CREATE TABLE capsule_set_members (
|
|
109
|
+
set_id TEXT NOT NULL,
|
|
110
|
+
capsule_id TEXT NOT NULL REFERENCES capsules(id) ON DELETE CASCADE,
|
|
111
|
+
ordinal INTEGER NOT NULL,
|
|
112
|
+
composed_at INTEGER NOT NULL,
|
|
113
|
+
PRIMARY KEY (set_id, capsule_id)
|
|
114
|
+
) STRICT;
|
|
115
|
+
`.trim();
|
|
116
|
+
// documents links to capsule_sources via the composite (capsule_id, source_id) so the
|
|
117
|
+
// source is required to live in the same capsule as the document. UNIQUE (capsule_id, id)
|
|
118
|
+
// exposes the same composite for downstream tables (chunks, vectors, pages, sections,
|
|
119
|
+
// parsed_units, parser_diagnostics) to lock document_id to capsule_id.
|
|
120
|
+
const CREATE_DOCUMENTS = `
|
|
121
|
+
CREATE TABLE documents (
|
|
122
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
123
|
+
capsule_id TEXT NOT NULL,
|
|
124
|
+
source_id TEXT NOT NULL,
|
|
125
|
+
document_path TEXT NOT NULL,
|
|
126
|
+
size_bytes INTEGER NOT NULL,
|
|
127
|
+
media_type TEXT NOT NULL,
|
|
128
|
+
content_hash TEXT NOT NULL,
|
|
129
|
+
parser_id TEXT NOT NULL,
|
|
130
|
+
parser_version TEXT NOT NULL,
|
|
131
|
+
last_extracted_at INTEGER NOT NULL,
|
|
132
|
+
status TEXT NOT NULL,
|
|
133
|
+
safe_display_name TEXT NOT NULL,
|
|
134
|
+
FOREIGN KEY (capsule_id) REFERENCES capsules(id) ON DELETE CASCADE,
|
|
135
|
+
FOREIGN KEY (capsule_id, source_id) REFERENCES capsule_sources(capsule_id, id) ON DELETE CASCADE,
|
|
136
|
+
UNIQUE (capsule_id, id)
|
|
137
|
+
) STRICT;
|
|
138
|
+
`.trim();
|
|
139
|
+
const CREATE_DOCUMENT_TEXTS = `
|
|
140
|
+
CREATE TABLE document_texts (
|
|
141
|
+
capsule_id TEXT NOT NULL,
|
|
142
|
+
document_id TEXT NOT NULL,
|
|
143
|
+
normalized_text TEXT NOT NULL,
|
|
144
|
+
PRIMARY KEY (document_id),
|
|
145
|
+
FOREIGN KEY (capsule_id) REFERENCES capsules(id) ON DELETE CASCADE,
|
|
146
|
+
FOREIGN KEY (capsule_id, document_id) REFERENCES documents(capsule_id, id) ON DELETE CASCADE
|
|
147
|
+
) STRICT;
|
|
148
|
+
`.trim();
|
|
149
|
+
const CREATE_PAGES = `
|
|
150
|
+
CREATE TABLE pages (
|
|
151
|
+
capsule_id TEXT NOT NULL,
|
|
152
|
+
document_id TEXT NOT NULL,
|
|
153
|
+
page_number INTEGER NOT NULL,
|
|
154
|
+
page_label TEXT,
|
|
155
|
+
character_start INTEGER NOT NULL,
|
|
156
|
+
character_end INTEGER NOT NULL,
|
|
157
|
+
bbox_x REAL,
|
|
158
|
+
bbox_y REAL,
|
|
159
|
+
bbox_w REAL,
|
|
160
|
+
bbox_h REAL,
|
|
161
|
+
PRIMARY KEY (document_id, page_number),
|
|
162
|
+
FOREIGN KEY (capsule_id) REFERENCES capsules(id) ON DELETE CASCADE,
|
|
163
|
+
FOREIGN KEY (capsule_id, document_id) REFERENCES documents(capsule_id, id) ON DELETE CASCADE
|
|
164
|
+
) STRICT;
|
|
165
|
+
`.trim();
|
|
166
|
+
const CREATE_SECTIONS = `
|
|
167
|
+
CREATE TABLE sections (
|
|
168
|
+
capsule_id TEXT NOT NULL,
|
|
169
|
+
document_id TEXT NOT NULL,
|
|
170
|
+
section_path_json TEXT NOT NULL,
|
|
171
|
+
section_path_hash TEXT,
|
|
172
|
+
character_start INTEGER NOT NULL,
|
|
173
|
+
character_end INTEGER NOT NULL,
|
|
174
|
+
PRIMARY KEY (document_id, section_path_json),
|
|
175
|
+
FOREIGN KEY (capsule_id) REFERENCES capsules(id) ON DELETE CASCADE,
|
|
176
|
+
FOREIGN KEY (capsule_id, document_id) REFERENCES documents(capsule_id, id) ON DELETE CASCADE
|
|
177
|
+
) STRICT;
|
|
178
|
+
`.trim();
|
|
179
|
+
const CREATE_SECTIONS_V1 = `
|
|
180
|
+
CREATE TABLE sections (
|
|
181
|
+
capsule_id TEXT NOT NULL,
|
|
182
|
+
document_id TEXT NOT NULL,
|
|
183
|
+
section_path_json TEXT NOT NULL,
|
|
184
|
+
character_start INTEGER NOT NULL,
|
|
185
|
+
character_end INTEGER NOT NULL,
|
|
186
|
+
PRIMARY KEY (document_id, section_path_json),
|
|
187
|
+
FOREIGN KEY (capsule_id) REFERENCES capsules(id) ON DELETE CASCADE,
|
|
188
|
+
FOREIGN KEY (capsule_id, document_id) REFERENCES documents(capsule_id, id) ON DELETE CASCADE
|
|
189
|
+
) STRICT;
|
|
190
|
+
`.trim();
|
|
191
|
+
const CREATE_PARSED_UNITS = `
|
|
192
|
+
CREATE TABLE parsed_units (
|
|
193
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
194
|
+
capsule_id TEXT NOT NULL,
|
|
195
|
+
document_id TEXT NOT NULL,
|
|
196
|
+
kind TEXT NOT NULL,
|
|
197
|
+
page_number INTEGER,
|
|
198
|
+
page_label TEXT,
|
|
199
|
+
section_path_json TEXT,
|
|
200
|
+
json_pointer TEXT,
|
|
201
|
+
table_name TEXT,
|
|
202
|
+
row_index INTEGER,
|
|
203
|
+
heading_path_json TEXT,
|
|
204
|
+
unsupported_reason TEXT,
|
|
205
|
+
character_start INTEGER,
|
|
206
|
+
character_end INTEGER,
|
|
207
|
+
FOREIGN KEY (capsule_id) REFERENCES capsules(id) ON DELETE CASCADE,
|
|
208
|
+
FOREIGN KEY (capsule_id, document_id) REFERENCES documents(capsule_id, id) ON DELETE CASCADE,
|
|
209
|
+
UNIQUE (capsule_id, id)
|
|
210
|
+
) STRICT;
|
|
211
|
+
`.trim();
|
|
212
|
+
const CREATE_CHUNKS = `
|
|
213
|
+
CREATE TABLE chunks (
|
|
214
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
215
|
+
capsule_id TEXT NOT NULL,
|
|
216
|
+
source_id TEXT NOT NULL,
|
|
217
|
+
document_id TEXT NOT NULL,
|
|
218
|
+
parsed_unit_id TEXT NOT NULL,
|
|
219
|
+
order_index INTEGER NOT NULL,
|
|
220
|
+
token_count INTEGER NOT NULL,
|
|
221
|
+
safe_excerpt_hash TEXT NOT NULL,
|
|
222
|
+
chunking_strategy_version TEXT,
|
|
223
|
+
character_start INTEGER,
|
|
224
|
+
character_end INTEGER,
|
|
225
|
+
FOREIGN KEY (capsule_id) REFERENCES capsules(id) ON DELETE CASCADE,
|
|
226
|
+
FOREIGN KEY (capsule_id, source_id) REFERENCES capsule_sources(capsule_id, id) ON DELETE CASCADE,
|
|
227
|
+
FOREIGN KEY (capsule_id, document_id) REFERENCES documents(capsule_id, id) ON DELETE CASCADE,
|
|
228
|
+
FOREIGN KEY (capsule_id, parsed_unit_id) REFERENCES parsed_units(capsule_id, id) ON DELETE CASCADE,
|
|
229
|
+
UNIQUE (capsule_id, id)
|
|
230
|
+
) STRICT;
|
|
231
|
+
`.trim();
|
|
232
|
+
const CREATE_CHUNKS_V1 = `
|
|
233
|
+
CREATE TABLE chunks (
|
|
234
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
235
|
+
capsule_id TEXT NOT NULL,
|
|
236
|
+
source_id TEXT NOT NULL,
|
|
237
|
+
document_id TEXT NOT NULL,
|
|
238
|
+
parsed_unit_id TEXT NOT NULL,
|
|
239
|
+
order_index INTEGER NOT NULL,
|
|
240
|
+
token_count INTEGER NOT NULL,
|
|
241
|
+
safe_excerpt_hash TEXT NOT NULL,
|
|
242
|
+
FOREIGN KEY (capsule_id) REFERENCES capsules(id) ON DELETE CASCADE,
|
|
243
|
+
FOREIGN KEY (capsule_id, source_id) REFERENCES capsule_sources(capsule_id, id) ON DELETE CASCADE,
|
|
244
|
+
FOREIGN KEY (capsule_id, document_id) REFERENCES documents(capsule_id, id) ON DELETE CASCADE,
|
|
245
|
+
FOREIGN KEY (capsule_id, parsed_unit_id) REFERENCES parsed_units(capsule_id, id) ON DELETE CASCADE,
|
|
246
|
+
UNIQUE (capsule_id, id)
|
|
247
|
+
) STRICT;
|
|
248
|
+
`.trim();
|
|
249
|
+
const CREATE_VECTORS = `
|
|
250
|
+
CREATE TABLE vectors (
|
|
251
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
252
|
+
capsule_id TEXT NOT NULL,
|
|
253
|
+
source_id TEXT NOT NULL,
|
|
254
|
+
document_id TEXT NOT NULL,
|
|
255
|
+
chunk_id TEXT NOT NULL,
|
|
256
|
+
embedding BLOB NOT NULL,
|
|
257
|
+
embedding_model_provider TEXT NOT NULL,
|
|
258
|
+
embedding_model_id TEXT NOT NULL,
|
|
259
|
+
embedding_model_revision TEXT,
|
|
260
|
+
vector_dimensions INTEGER NOT NULL,
|
|
261
|
+
vector_metric TEXT NOT NULL,
|
|
262
|
+
storage_reference TEXT NOT NULL,
|
|
263
|
+
created_at INTEGER NOT NULL,
|
|
264
|
+
FOREIGN KEY (capsule_id) REFERENCES capsules(id) ON DELETE CASCADE,
|
|
265
|
+
FOREIGN KEY (capsule_id, source_id) REFERENCES capsule_sources(capsule_id, id) ON DELETE CASCADE,
|
|
266
|
+
FOREIGN KEY (capsule_id, document_id) REFERENCES documents(capsule_id, id) ON DELETE CASCADE,
|
|
267
|
+
FOREIGN KEY (capsule_id, chunk_id) REFERENCES chunks(capsule_id, id) ON DELETE CASCADE
|
|
268
|
+
) STRICT;
|
|
269
|
+
`.trim();
|
|
270
|
+
const CREATE_PARSER_DIAGNOSTICS = `
|
|
271
|
+
CREATE TABLE parser_diagnostics (
|
|
272
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
273
|
+
capsule_id TEXT NOT NULL,
|
|
274
|
+
document_id TEXT,
|
|
275
|
+
severity TEXT NOT NULL,
|
|
276
|
+
code TEXT NOT NULL,
|
|
277
|
+
message TEXT NOT NULL,
|
|
278
|
+
page_number INTEGER,
|
|
279
|
+
created_at INTEGER NOT NULL,
|
|
280
|
+
FOREIGN KEY (capsule_id) REFERENCES capsules(id) ON DELETE CASCADE,
|
|
281
|
+
FOREIGN KEY (capsule_id, document_id) REFERENCES documents(capsule_id, id) ON DELETE CASCADE
|
|
282
|
+
) STRICT;
|
|
283
|
+
`.trim();
|
|
284
|
+
const CREATE_INDEXING_JOBS = `
|
|
285
|
+
CREATE TABLE indexing_jobs (
|
|
286
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
287
|
+
capsule_id TEXT NOT NULL REFERENCES capsules(id) ON DELETE CASCADE,
|
|
288
|
+
source_ids_json TEXT NOT NULL,
|
|
289
|
+
started_at INTEGER NOT NULL,
|
|
290
|
+
finished_at INTEGER,
|
|
291
|
+
status TEXT NOT NULL,
|
|
292
|
+
total_documents INTEGER NOT NULL,
|
|
293
|
+
processed_documents INTEGER NOT NULL,
|
|
294
|
+
failed_documents INTEGER NOT NULL,
|
|
295
|
+
skipped_documents INTEGER NOT NULL,
|
|
296
|
+
last_error_code TEXT,
|
|
297
|
+
last_error_message TEXT,
|
|
298
|
+
resume_token TEXT,
|
|
299
|
+
cancellation_requested INTEGER NOT NULL DEFAULT 0
|
|
300
|
+
) STRICT;
|
|
301
|
+
`.trim();
|
|
302
|
+
const CREATE_SCHEMA_META = `
|
|
303
|
+
CREATE TABLE schema_meta (
|
|
304
|
+
key TEXT PRIMARY KEY NOT NULL,
|
|
305
|
+
value TEXT NOT NULL
|
|
306
|
+
) STRICT;
|
|
307
|
+
`.trim();
|
|
308
|
+
// capsule_membership_changes — append-only audit trail for composition events on a capsule
|
|
309
|
+
// (Issue #263). Recorded inline by `addSourcesToCapsule` and `composeCapsules`; consumed by
|
|
310
|
+
// the evidence ledger and the future UI history view. `change_kind` is constrained so a typo
|
|
311
|
+
// at the application layer fails at INSERT rather than silently broadens the audit vocabulary.
|
|
312
|
+
// `source_id` is nullable because compose-events reference a capsule_set rather than a single
|
|
313
|
+
// source — the `details_json` payload carries the structured arguments for that case.
|
|
314
|
+
const CREATE_CAPSULE_MEMBERSHIP_CHANGES = `
|
|
315
|
+
CREATE TABLE capsule_membership_changes (
|
|
316
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
317
|
+
capsule_id TEXT NOT NULL,
|
|
318
|
+
change_kind TEXT NOT NULL CHECK (change_kind IN ('add-source', 'remove-source', 'compose-set')),
|
|
319
|
+
source_id TEXT,
|
|
320
|
+
details_json TEXT,
|
|
321
|
+
occurred_at INTEGER NOT NULL
|
|
322
|
+
) STRICT;
|
|
323
|
+
`.trim();
|
|
324
|
+
const CREATE_CAPSULE_MEMBERSHIP_CHANGES_INDEX = "CREATE INDEX idx_capsule_membership_changes_capsule_time ON capsule_membership_changes(capsule_id, occurred_at);";
|
|
325
|
+
const CREATE_CAPSULE_AUDIT_EVENTS = `
|
|
326
|
+
CREATE TABLE capsule_audit_events (
|
|
327
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
328
|
+
capsule_id TEXT NOT NULL,
|
|
329
|
+
kind TEXT NOT NULL CHECK (
|
|
330
|
+
kind IN (
|
|
331
|
+
'capsule-created',
|
|
332
|
+
'capsule-deleted',
|
|
333
|
+
'source-added',
|
|
334
|
+
'source-removed',
|
|
335
|
+
'indexing-job-started',
|
|
336
|
+
'indexing-job-completed',
|
|
337
|
+
'indexing-job-failed',
|
|
338
|
+
'retention-applied',
|
|
339
|
+
'retrieval-performed',
|
|
340
|
+
'answer-context-assembled',
|
|
341
|
+
'model-context-sent'
|
|
342
|
+
)
|
|
343
|
+
),
|
|
344
|
+
source_id TEXT,
|
|
345
|
+
job_id TEXT,
|
|
346
|
+
error_code TEXT,
|
|
347
|
+
processed_documents INTEGER,
|
|
348
|
+
failed_documents INTEGER,
|
|
349
|
+
deleted_vector_count INTEGER,
|
|
350
|
+
deleted_extracted_text_count INTEGER,
|
|
351
|
+
details_json TEXT,
|
|
352
|
+
occurred_at INTEGER NOT NULL
|
|
353
|
+
) STRICT;
|
|
354
|
+
`.trim();
|
|
355
|
+
const CREATE_CAPSULE_AUDIT_EVENTS_INDEX = "CREATE INDEX idx_capsule_audit_events_capsule_time ON capsule_audit_events(capsule_id, occurred_at);";
|
|
356
|
+
// extraction_checkpoints — durable per-document progress for the bounded large-document
|
|
357
|
+
// ingestion path (Epic #1160, Issue #1286). One row per (capsule_id, document_id); the row is
|
|
358
|
+
// REPLACEd as a document advances through extraction → chunking → embedding so an interrupted
|
|
359
|
+
// large-document job can resume from durable progress instead of restarting. The compatibility
|
|
360
|
+
// fingerprint columns (source_content_hash, parser_version, policy_fingerprint,
|
|
361
|
+
// chunking_strategy_version, embedding_identity_json) let a resumed run refuse a checkpoint that
|
|
362
|
+
// was produced under an incompatible source, parser, policy, chunking strategy, or embedding
|
|
363
|
+
// identity. The table is content-free: it carries hashes, cursors, counts, and redacted
|
|
364
|
+
// diagnostics, never raw extracted text. capsule_id cascades on capsule deletion; document_id is
|
|
365
|
+
// a lineage column rather than an FK so a checkpoint can be written before the document row is
|
|
366
|
+
// persisted during progressive extraction.
|
|
367
|
+
const CREATE_EXTRACTION_CHECKPOINTS = `
|
|
368
|
+
CREATE TABLE extraction_checkpoints (
|
|
369
|
+
capsule_id TEXT NOT NULL,
|
|
370
|
+
document_id TEXT NOT NULL,
|
|
371
|
+
job_id TEXT NOT NULL,
|
|
372
|
+
strategy TEXT NOT NULL,
|
|
373
|
+
phase TEXT NOT NULL,
|
|
374
|
+
page_cursor INTEGER NOT NULL DEFAULT 0,
|
|
375
|
+
section_cursor INTEGER NOT NULL DEFAULT 0,
|
|
376
|
+
object_cursor INTEGER NOT NULL DEFAULT 0,
|
|
377
|
+
extracted_text_bytes INTEGER NOT NULL DEFAULT 0,
|
|
378
|
+
chunk_cursor INTEGER NOT NULL DEFAULT 0,
|
|
379
|
+
embedded_chunk_cursor INTEGER NOT NULL DEFAULT 0,
|
|
380
|
+
last_embedded_chunk_id TEXT,
|
|
381
|
+
retry_count INTEGER NOT NULL DEFAULT 0,
|
|
382
|
+
coverage TEXT NOT NULL,
|
|
383
|
+
source_content_hash TEXT NOT NULL,
|
|
384
|
+
parser_version TEXT NOT NULL,
|
|
385
|
+
policy_fingerprint TEXT NOT NULL,
|
|
386
|
+
chunking_strategy_version TEXT NOT NULL,
|
|
387
|
+
embedding_identity_json TEXT NOT NULL,
|
|
388
|
+
terminal_diagnostics_json TEXT NOT NULL,
|
|
389
|
+
created_at INTEGER NOT NULL,
|
|
390
|
+
updated_at INTEGER NOT NULL,
|
|
391
|
+
PRIMARY KEY (capsule_id, document_id),
|
|
392
|
+
FOREIGN KEY (capsule_id) REFERENCES capsules(id) ON DELETE CASCADE
|
|
393
|
+
) STRICT;
|
|
394
|
+
`.trim();
|
|
395
|
+
const CREATE_EXTRACTION_CHECKPOINTS_PHASE_INDEX = "CREATE INDEX idx_extraction_checkpoints_capsule_phase ON extraction_checkpoints(capsule_id, phase);";
|
|
396
|
+
const CREATE_EXTRACTION_CHECKPOINTS_JOB_INDEX = "CREATE INDEX idx_extraction_checkpoints_job ON extraction_checkpoints(capsule_id, job_id);";
|
|
397
|
+
// document_text_windows — bounded per-window extracted text for the progressive large-document
|
|
398
|
+
// ingestion path (Epic #1160, Issue #1286). Small files keep a single document_texts row; a
|
|
399
|
+
// progressively-extracted document instead persists its normalized text as one bounded row per
|
|
400
|
+
// extraction window so the JS working set never holds the whole document text and the on-disk text
|
|
401
|
+
// is stored exactly once (linear storage). Every chunk lies inside one page → inside one window, so
|
|
402
|
+
// a chunk's document-relative span maps to exactly one window row; the unified text-span reader
|
|
403
|
+
// resolves it via the (character_start, character_end) bounds. capsule_id cascades on capsule
|
|
404
|
+
// deletion; document_id is a lineage column (no FK) so windows can be written before the document
|
|
405
|
+
// row is finalized during progressive extraction.
|
|
406
|
+
const CREATE_DOCUMENT_TEXT_WINDOWS = `
|
|
407
|
+
CREATE TABLE document_text_windows (
|
|
408
|
+
capsule_id TEXT NOT NULL,
|
|
409
|
+
document_id TEXT NOT NULL,
|
|
410
|
+
window_index INTEGER NOT NULL,
|
|
411
|
+
character_start INTEGER NOT NULL,
|
|
412
|
+
character_end INTEGER NOT NULL,
|
|
413
|
+
normalized_text TEXT NOT NULL,
|
|
414
|
+
PRIMARY KEY (capsule_id, document_id, window_index),
|
|
415
|
+
FOREIGN KEY (capsule_id) REFERENCES capsules(id) ON DELETE CASCADE
|
|
416
|
+
) STRICT;
|
|
417
|
+
`.trim();
|
|
418
|
+
const CREATE_DOCUMENT_TEXT_WINDOWS_SPAN_INDEX = "CREATE INDEX idx_document_text_windows_span ON document_text_windows(capsule_id, document_id, character_start);";
|
|
419
|
+
const CREATE_SECTIONS_SECTION_PATH_HASH_INDEX = "CREATE UNIQUE INDEX idx_sections_document_section_path_hash ON sections(document_id, section_path_hash) WHERE section_path_hash IS NOT NULL;";
|
|
420
|
+
// Statements must be applied in this exact order: PRAGMA first (so child-table NOT NULL
|
|
421
|
+
// foreign-key constraints are enforced as the rows arrive), then parents before children.
|
|
422
|
+
export const KNOWLEDGE_CAPSULE_DDL = [
|
|
423
|
+
PRAGMA_FOREIGN_KEYS,
|
|
424
|
+
CREATE_CAPSULES,
|
|
425
|
+
CREATE_KNOWLEDGE_SOURCES,
|
|
426
|
+
CREATE_CAPSULE_SOURCES,
|
|
427
|
+
CREATE_CAPSULE_SET_MEMBERS,
|
|
428
|
+
CREATE_DOCUMENTS,
|
|
429
|
+
CREATE_DOCUMENT_TEXTS,
|
|
430
|
+
CREATE_PAGES,
|
|
431
|
+
CREATE_SECTIONS,
|
|
432
|
+
CREATE_PARSED_UNITS,
|
|
433
|
+
CREATE_CHUNKS,
|
|
434
|
+
CREATE_VECTORS,
|
|
435
|
+
CREATE_PARSER_DIAGNOSTICS,
|
|
436
|
+
CREATE_INDEXING_JOBS,
|
|
437
|
+
CREATE_SCHEMA_META,
|
|
438
|
+
CREATE_CAPSULE_MEMBERSHIP_CHANGES,
|
|
439
|
+
CREATE_CAPSULE_AUDIT_EVENTS,
|
|
440
|
+
CREATE_EXTRACTION_CHECKPOINTS,
|
|
441
|
+
CREATE_DOCUMENT_TEXT_WINDOWS,
|
|
442
|
+
];
|
|
443
|
+
// ─── Indexes (scoped-query patterns only — no full-table scans) ──────────────────
|
|
444
|
+
export const KNOWLEDGE_CAPSULE_INDEXES = [
|
|
445
|
+
"CREATE INDEX idx_knowledge_sources_updated ON knowledge_sources(updated_at DESC, id ASC);",
|
|
446
|
+
"CREATE INDEX idx_capsule_set_members_capsule ON capsule_set_members(capsule_id);",
|
|
447
|
+
"CREATE INDEX idx_document_texts_capsule ON document_texts(capsule_id);",
|
|
448
|
+
"CREATE INDEX idx_pages_capsule ON pages(capsule_id);",
|
|
449
|
+
"CREATE INDEX idx_sections_capsule ON sections(capsule_id);",
|
|
450
|
+
CREATE_SECTIONS_SECTION_PATH_HASH_INDEX,
|
|
451
|
+
"CREATE INDEX idx_documents_capsule_source ON documents(capsule_id, source_id, status);",
|
|
452
|
+
"CREATE INDEX idx_documents_capsule_status ON documents(capsule_id, status);",
|
|
453
|
+
"CREATE INDEX idx_documents_content_hash ON documents(capsule_id, content_hash);",
|
|
454
|
+
"CREATE INDEX idx_documents_capsule_last_extracted ON documents(capsule_id, last_extracted_at);",
|
|
455
|
+
"CREATE INDEX idx_chunks_capsule_document_order ON chunks(capsule_id, document_id, order_index);",
|
|
456
|
+
"CREATE INDEX idx_vectors_capsule ON vectors(capsule_id);",
|
|
457
|
+
"CREATE INDEX idx_vectors_capsule_source ON vectors(capsule_id, source_id);",
|
|
458
|
+
"CREATE INDEX idx_vectors_capsule_document ON vectors(capsule_id, document_id);",
|
|
459
|
+
"CREATE INDEX idx_vectors_capsule_created ON vectors(capsule_id, created_at);",
|
|
460
|
+
"CREATE INDEX idx_vectors_capsule_identity ON vectors(capsule_id, embedding_model_provider, embedding_model_id, vector_dimensions);",
|
|
461
|
+
"CREATE INDEX idx_parsed_units_capsule_document ON parsed_units(capsule_id, document_id);",
|
|
462
|
+
"CREATE INDEX idx_parser_diagnostics_capsule_doc ON parser_diagnostics(capsule_id, document_id);",
|
|
463
|
+
"CREATE INDEX idx_parser_diagnostics_capsule_created ON parser_diagnostics(capsule_id, created_at DESC, id DESC);",
|
|
464
|
+
"CREATE INDEX idx_indexing_jobs_capsule_status ON indexing_jobs(capsule_id, status);",
|
|
465
|
+
"CREATE INDEX idx_indexing_jobs_capsule_started ON indexing_jobs(capsule_id, started_at DESC, id DESC);",
|
|
466
|
+
CREATE_CAPSULE_MEMBERSHIP_CHANGES_INDEX,
|
|
467
|
+
CREATE_CAPSULE_AUDIT_EVENTS_INDEX,
|
|
468
|
+
CREATE_EXTRACTION_CHECKPOINTS_PHASE_INDEX,
|
|
469
|
+
CREATE_EXTRACTION_CHECKPOINTS_JOB_INDEX,
|
|
470
|
+
CREATE_DOCUMENT_TEXT_WINDOWS_SPAN_INDEX,
|
|
471
|
+
];
|
|
472
|
+
// Runtime deletion primitive (#193 uses this inside a transaction). The cascade chain in
|
|
473
|
+
// the DDL above removes every dependent row atomically when this single statement runs.
|
|
474
|
+
export const DELETE_CAPSULE_SQL = "DELETE FROM capsules WHERE id = :capsule_id;";
|
|
475
|
+
// Version 1 originally applied the entire DDL+indexes set as a single migration. To preserve
|
|
476
|
+
// forward-only semantics we split v2 out as a *delta*: existing v1 databases run only the
|
|
477
|
+
// new CREATE TABLE + CREATE INDEX. Fresh installs apply v1 followed by v2 and end at the
|
|
478
|
+
// same on-disk shape. Each `up` entry stays a single complete statement.
|
|
479
|
+
const V1_DDL_WITHOUT_V2 = [
|
|
480
|
+
PRAGMA_FOREIGN_KEYS,
|
|
481
|
+
CREATE_CAPSULES,
|
|
482
|
+
CREATE_CAPSULE_SOURCES_V1,
|
|
483
|
+
CREATE_CAPSULE_SET_MEMBERS,
|
|
484
|
+
CREATE_DOCUMENTS,
|
|
485
|
+
CREATE_PAGES,
|
|
486
|
+
CREATE_SECTIONS_V1,
|
|
487
|
+
CREATE_PARSED_UNITS,
|
|
488
|
+
CREATE_CHUNKS_V1,
|
|
489
|
+
CREATE_VECTORS,
|
|
490
|
+
CREATE_PARSER_DIAGNOSTICS,
|
|
491
|
+
CREATE_INDEXING_JOBS,
|
|
492
|
+
CREATE_SCHEMA_META,
|
|
493
|
+
];
|
|
494
|
+
const V1_INDEXES_WITHOUT_V2 = [
|
|
495
|
+
"CREATE INDEX idx_documents_capsule_source ON documents(capsule_id, source_id, status);",
|
|
496
|
+
"CREATE INDEX idx_documents_content_hash ON documents(capsule_id, content_hash);",
|
|
497
|
+
"CREATE INDEX idx_chunks_capsule_document_order ON chunks(capsule_id, document_id, order_index);",
|
|
498
|
+
"CREATE INDEX idx_vectors_capsule ON vectors(capsule_id);",
|
|
499
|
+
"CREATE INDEX idx_vectors_capsule_identity ON vectors(capsule_id, embedding_model_provider, embedding_model_id, vector_dimensions);",
|
|
500
|
+
"CREATE INDEX idx_parser_diagnostics_capsule_doc ON parser_diagnostics(capsule_id, document_id);",
|
|
501
|
+
"CREATE INDEX idx_indexing_jobs_capsule_status ON indexing_jobs(capsule_id, status);",
|
|
502
|
+
];
|
|
503
|
+
const V9_PERFORMANCE_INDEXES = [
|
|
504
|
+
"CREATE INDEX idx_documents_capsule_status ON documents(capsule_id, status);",
|
|
505
|
+
"CREATE INDEX idx_documents_capsule_last_extracted ON documents(capsule_id, last_extracted_at);",
|
|
506
|
+
"CREATE INDEX idx_vectors_capsule_source ON vectors(capsule_id, source_id);",
|
|
507
|
+
"CREATE INDEX idx_vectors_capsule_document ON vectors(capsule_id, document_id);",
|
|
508
|
+
"CREATE INDEX idx_vectors_capsule_created ON vectors(capsule_id, created_at);",
|
|
509
|
+
"CREATE INDEX idx_parsed_units_capsule_document ON parsed_units(capsule_id, document_id);",
|
|
510
|
+
"CREATE INDEX idx_parser_diagnostics_capsule_created ON parser_diagnostics(capsule_id, created_at DESC, id DESC);",
|
|
511
|
+
"CREATE INDEX idx_indexing_jobs_capsule_started ON indexing_jobs(capsule_id, started_at DESC, id DESC);",
|
|
512
|
+
];
|
|
513
|
+
const V10_SOURCE_AND_DELETE_INDEXES = [
|
|
514
|
+
CREATE_KNOWLEDGE_SOURCES,
|
|
515
|
+
`
|
|
516
|
+
INSERT INTO knowledge_sources (
|
|
517
|
+
id, display_name, description, tags_json, scope_kind, scope_json, created_at, updated_at
|
|
518
|
+
)
|
|
519
|
+
SELECT id, display_name, description, tags_json, scope_kind, scope_json, MIN(created_at), MAX(updated_at)
|
|
520
|
+
FROM capsule_sources
|
|
521
|
+
GROUP BY id;
|
|
522
|
+
`.trim(),
|
|
523
|
+
"CREATE INDEX idx_knowledge_sources_updated ON knowledge_sources(updated_at DESC, id ASC);",
|
|
524
|
+
"CREATE INDEX idx_capsule_set_members_capsule ON capsule_set_members(capsule_id);",
|
|
525
|
+
"CREATE INDEX idx_document_texts_capsule ON document_texts(capsule_id);",
|
|
526
|
+
"CREATE INDEX idx_pages_capsule ON pages(capsule_id);",
|
|
527
|
+
"CREATE INDEX idx_sections_capsule ON sections(capsule_id);",
|
|
528
|
+
];
|
|
529
|
+
const CREATE_CAPSULE_MEMBERSHIP_CHANGES_V5 = CREATE_CAPSULE_MEMBERSHIP_CHANGES.replace("capsule_membership_changes", "capsule_membership_changes_v5");
|
|
530
|
+
const CREATE_CAPSULE_AUDIT_EVENTS_V5 = CREATE_CAPSULE_AUDIT_EVENTS.replace("capsule_audit_events", "capsule_audit_events_v5");
|
|
531
|
+
const COPY_CAPSULE_MEMBERSHIP_CHANGES_TO_V5 = `
|
|
532
|
+
INSERT INTO capsule_membership_changes_v5 (
|
|
533
|
+
id, capsule_id, change_kind, source_id, details_json, occurred_at
|
|
534
|
+
)
|
|
535
|
+
SELECT id, capsule_id, change_kind, source_id, details_json, occurred_at
|
|
536
|
+
FROM capsule_membership_changes;
|
|
537
|
+
`.trim();
|
|
538
|
+
const COPY_CAPSULE_AUDIT_EVENTS_TO_V5 = `
|
|
539
|
+
INSERT INTO capsule_audit_events_v5 (
|
|
540
|
+
id, capsule_id, kind, source_id, job_id, error_code, processed_documents, failed_documents,
|
|
541
|
+
deleted_vector_count, deleted_extracted_text_count, occurred_at
|
|
542
|
+
)
|
|
543
|
+
SELECT id, capsule_id, kind, source_id, job_id, error_code, processed_documents, failed_documents,
|
|
544
|
+
deleted_vector_count, deleted_extracted_text_count, occurred_at
|
|
545
|
+
FROM capsule_audit_events;
|
|
546
|
+
`.trim();
|
|
547
|
+
const CREATE_CAPSULE_AUDIT_EVENTS_V7 = CREATE_CAPSULE_AUDIT_EVENTS.replace("capsule_audit_events", "capsule_audit_events_v7");
|
|
548
|
+
const COPY_CAPSULE_AUDIT_EVENTS_TO_V7 = `
|
|
549
|
+
INSERT INTO capsule_audit_events_v7 (
|
|
550
|
+
id, capsule_id, kind, source_id, job_id, error_code, processed_documents, failed_documents,
|
|
551
|
+
deleted_vector_count, deleted_extracted_text_count, details_json, occurred_at
|
|
552
|
+
)
|
|
553
|
+
SELECT id, capsule_id, kind, source_id, job_id, error_code, processed_documents, failed_documents,
|
|
554
|
+
deleted_vector_count, deleted_extracted_text_count, NULL, occurred_at
|
|
555
|
+
FROM capsule_audit_events;
|
|
556
|
+
`.trim();
|
|
557
|
+
const REBUILD_AUDIT_TABLES_FOR_DELETE_DURABILITY = [
|
|
558
|
+
CREATE_CAPSULE_MEMBERSHIP_CHANGES_V5,
|
|
559
|
+
COPY_CAPSULE_MEMBERSHIP_CHANGES_TO_V5,
|
|
560
|
+
"DROP TABLE capsule_membership_changes;",
|
|
561
|
+
"ALTER TABLE capsule_membership_changes_v5 RENAME TO capsule_membership_changes;",
|
|
562
|
+
CREATE_CAPSULE_MEMBERSHIP_CHANGES_INDEX,
|
|
563
|
+
CREATE_CAPSULE_AUDIT_EVENTS_V5,
|
|
564
|
+
COPY_CAPSULE_AUDIT_EVENTS_TO_V5,
|
|
565
|
+
"DROP TABLE capsule_audit_events;",
|
|
566
|
+
"ALTER TABLE capsule_audit_events_v5 RENAME TO capsule_audit_events;",
|
|
567
|
+
CREATE_CAPSULE_AUDIT_EVENTS_INDEX,
|
|
568
|
+
];
|
|
569
|
+
export const KNOWLEDGE_CAPSULE_MIGRATIONS = [
|
|
570
|
+
{
|
|
571
|
+
version: 1,
|
|
572
|
+
reason: "Initial schema for Local Knowledge Connector capsule store (Issue #265).",
|
|
573
|
+
up: [...V1_DDL_WITHOUT_V2, ...V1_INDEXES_WITHOUT_V2],
|
|
574
|
+
},
|
|
575
|
+
{
|
|
576
|
+
version: 2,
|
|
577
|
+
reason: "Audit trail for capsule composition events (add-source, remove-source, compose-set) for Issue #263.",
|
|
578
|
+
up: [CREATE_CAPSULE_MEMBERSHIP_CHANGES, CREATE_CAPSULE_MEMBERSHIP_CHANGES_INDEX],
|
|
579
|
+
},
|
|
580
|
+
{
|
|
581
|
+
version: 3,
|
|
582
|
+
reason: "Persist metadata-only capsule lifecycle and retention audit events for Issue #201.",
|
|
583
|
+
up: [CREATE_CAPSULE_AUDIT_EVENTS, CREATE_CAPSULE_AUDIT_EVENTS_INDEX],
|
|
584
|
+
},
|
|
585
|
+
{
|
|
586
|
+
version: 4,
|
|
587
|
+
reason: "Persist normalized extracted text for binary parsers so chunk offsets project against extracted content.",
|
|
588
|
+
up: [CREATE_DOCUMENT_TEXTS],
|
|
589
|
+
},
|
|
590
|
+
{
|
|
591
|
+
version: 5,
|
|
592
|
+
reason: "Keep metadata-only capsule audit rows durable after capsule deletion for Issue #201.",
|
|
593
|
+
up: REBUILD_AUDIT_TABLES_FOR_DELETE_DURABILITY,
|
|
594
|
+
},
|
|
595
|
+
{
|
|
596
|
+
version: 6,
|
|
597
|
+
reason: "Persist chunking strategy version so stale chunks and vectors are re-emitted after Issue #195 strategy changes.",
|
|
598
|
+
up: ["ALTER TABLE chunks ADD COLUMN chunking_strategy_version TEXT;"],
|
|
599
|
+
},
|
|
600
|
+
{
|
|
601
|
+
version: 7,
|
|
602
|
+
reason: "Persist retrieval, answer-context, and model-bound chunk usage audit metadata for Issue #201.",
|
|
603
|
+
up: [
|
|
604
|
+
CREATE_CAPSULE_AUDIT_EVENTS_V7,
|
|
605
|
+
COPY_CAPSULE_AUDIT_EVENTS_TO_V7,
|
|
606
|
+
"DROP TABLE capsule_audit_events;",
|
|
607
|
+
"ALTER TABLE capsule_audit_events_v7 RENAME TO capsule_audit_events;",
|
|
608
|
+
CREATE_CAPSULE_AUDIT_EVENTS_INDEX,
|
|
609
|
+
],
|
|
610
|
+
},
|
|
611
|
+
{
|
|
612
|
+
version: 8,
|
|
613
|
+
reason: "Persist per-chunk character offsets so each chunk embeds its own bounded sub-span " +
|
|
614
|
+
"instead of re-deriving the full parsed-unit span at index time (Epic #189, Issue #195). " +
|
|
615
|
+
"Columns are nullable; chunks indexed before this migration fall back to the parsed-unit " +
|
|
616
|
+
"span until the capsule is reindexed.",
|
|
617
|
+
up: [
|
|
618
|
+
"ALTER TABLE chunks ADD COLUMN character_start INTEGER;",
|
|
619
|
+
"ALTER TABLE chunks ADD COLUMN character_end INTEGER;",
|
|
620
|
+
],
|
|
621
|
+
},
|
|
622
|
+
{
|
|
623
|
+
version: 9,
|
|
624
|
+
reason: "Add large-capsule scoped indexes for retrieval filters, health counts, bounded history reads, and retention cleanup (Issue #265 audit).",
|
|
625
|
+
up: V9_PERFORMANCE_INDEXES,
|
|
626
|
+
},
|
|
627
|
+
{
|
|
628
|
+
version: 10,
|
|
629
|
+
reason: "Persist KnowledgeSources independently from capsule membership and index capsule-delete verification paths (Issue #193 audit).",
|
|
630
|
+
up: V10_SOURCE_AND_DELETE_INDEXES,
|
|
631
|
+
},
|
|
632
|
+
{
|
|
633
|
+
version: 11,
|
|
634
|
+
reason: "Persist durable per-document extraction checkpoints for bounded large-document ingestion " +
|
|
635
|
+
"so interrupted large-document jobs resume from progress instead of restarting (Epic #1160, Issue #1286).",
|
|
636
|
+
up: [
|
|
637
|
+
CREATE_EXTRACTION_CHECKPOINTS,
|
|
638
|
+
CREATE_EXTRACTION_CHECKPOINTS_PHASE_INDEX,
|
|
639
|
+
CREATE_EXTRACTION_CHECKPOINTS_JOB_INDEX,
|
|
640
|
+
],
|
|
641
|
+
},
|
|
642
|
+
{
|
|
643
|
+
version: 12,
|
|
644
|
+
reason: "Persist bounded per-window extracted text so progressive large-document extraction never " +
|
|
645
|
+
"holds the whole document text in memory and stores it once with linear growth (Epic #1160, Issue #1286).",
|
|
646
|
+
up: [CREATE_DOCUMENT_TEXT_WINDOWS, CREATE_DOCUMENT_TEXT_WINDOWS_SPAN_INDEX],
|
|
647
|
+
},
|
|
648
|
+
{
|
|
649
|
+
version: 13,
|
|
650
|
+
reason: "Persist a deterministic, non-reversible section path hash so encrypted randomized section " +
|
|
651
|
+
"labels retain duplicate-section uniqueness without storing labels in plaintext (Issue #1322 audit).",
|
|
652
|
+
up: [
|
|
653
|
+
"ALTER TABLE sections ADD COLUMN section_path_hash TEXT;",
|
|
654
|
+
CREATE_SECTIONS_SECTION_PATH_HASH_INDEX,
|
|
655
|
+
],
|
|
656
|
+
},
|
|
657
|
+
];
|
|
658
|
+
// Expected table/index names; consumers can iterate to assert presence without re-parsing
|
|
659
|
+
// the DDL strings. Mirrors the order of KNOWLEDGE_CAPSULE_DDL after the leading PRAGMA.
|
|
660
|
+
//
|
|
661
|
+
// `KNOWLEDGE_CAPSULE_V1_TABLES` lists only the tables that exist after a v1-only migration
|
|
662
|
+
// (i.e. the original 12 tables without the v2 audit table). The store uses this narrower
|
|
663
|
+
// set for the pre-migration check so that an existing v1 database is not falsely treated
|
|
664
|
+
// as corrupt before migrations run.
|
|
665
|
+
export const KNOWLEDGE_CAPSULE_V1_TABLES = [
|
|
666
|
+
"capsules",
|
|
667
|
+
"capsule_sources",
|
|
668
|
+
"capsule_set_members",
|
|
669
|
+
"documents",
|
|
670
|
+
"pages",
|
|
671
|
+
"sections",
|
|
672
|
+
"parsed_units",
|
|
673
|
+
"chunks",
|
|
674
|
+
"vectors",
|
|
675
|
+
"parser_diagnostics",
|
|
676
|
+
"indexing_jobs",
|
|
677
|
+
"schema_meta",
|
|
678
|
+
];
|
|
679
|
+
export const KNOWLEDGE_CAPSULE_TABLES = [
|
|
680
|
+
...KNOWLEDGE_CAPSULE_V1_TABLES,
|
|
681
|
+
"knowledge_sources",
|
|
682
|
+
"document_texts",
|
|
683
|
+
"capsule_membership_changes",
|
|
684
|
+
"capsule_audit_events",
|
|
685
|
+
"extraction_checkpoints",
|
|
686
|
+
"document_text_windows",
|
|
687
|
+
];
|
|
688
|
+
export const KNOWLEDGE_CAPSULE_INDEX_NAMES = [
|
|
689
|
+
"idx_knowledge_sources_updated",
|
|
690
|
+
"idx_capsule_set_members_capsule",
|
|
691
|
+
"idx_document_texts_capsule",
|
|
692
|
+
"idx_pages_capsule",
|
|
693
|
+
"idx_sections_capsule",
|
|
694
|
+
"idx_sections_document_section_path_hash",
|
|
695
|
+
"idx_documents_capsule_source",
|
|
696
|
+
"idx_documents_capsule_status",
|
|
697
|
+
"idx_documents_content_hash",
|
|
698
|
+
"idx_documents_capsule_last_extracted",
|
|
699
|
+
"idx_chunks_capsule_document_order",
|
|
700
|
+
"idx_vectors_capsule",
|
|
701
|
+
"idx_vectors_capsule_source",
|
|
702
|
+
"idx_vectors_capsule_document",
|
|
703
|
+
"idx_vectors_capsule_created",
|
|
704
|
+
"idx_vectors_capsule_identity",
|
|
705
|
+
"idx_parsed_units_capsule_document",
|
|
706
|
+
"idx_parser_diagnostics_capsule_doc",
|
|
707
|
+
"idx_parser_diagnostics_capsule_created",
|
|
708
|
+
"idx_indexing_jobs_capsule_status",
|
|
709
|
+
"idx_indexing_jobs_capsule_started",
|
|
710
|
+
"idx_capsule_membership_changes_capsule_time",
|
|
711
|
+
"idx_capsule_audit_events_capsule_time",
|
|
712
|
+
"idx_extraction_checkpoints_capsule_phase",
|
|
713
|
+
"idx_extraction_checkpoints_job",
|
|
714
|
+
"idx_document_text_windows_span",
|
|
715
|
+
];
|