@pentatonic-ai/ai-agent-sdk 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/README.md +3 -3
  2. package/bin/cli.js +1 -1
  3. package/bin/commands/config.js +1 -1
  4. package/dist/index.cjs +1 -1
  5. package/dist/index.js +1 -1
  6. package/package.json +2 -2
  7. package/packages/doctor/src/checks/local-memory.js +2 -2
  8. package/packages/memory/README.md +2 -2
  9. package/packages/memory/openclaw-plugin/README.md +2 -2
  10. package/packages/memory/openclaw-plugin/openclaw.plugin.json +1 -1
  11. package/packages/memory/src/server.js +2 -2
  12. package/packages/memory-engine-v2/.env.example +30 -0
  13. package/packages/memory-engine-v2/README.md +125 -0
  14. package/packages/memory-engine-v2/compat/Dockerfile +11 -0
  15. package/packages/memory-engine-v2/compat/requirements.txt +6 -0
  16. package/packages/memory-engine-v2/compat/server.py +1047 -0
  17. package/packages/memory-engine-v2/docker-compose.aws.yml +78 -0
  18. package/packages/memory-engine-v2/docker-compose.yml +206 -0
  19. package/packages/memory-engine-v2/extractor-async/Dockerfile +14 -0
  20. package/packages/memory-engine-v2/extractor-async/confidence.py +62 -0
  21. package/packages/memory-engine-v2/extractor-async/noise_filter.py +144 -0
  22. package/packages/memory-engine-v2/extractor-async/requirements.txt +2 -0
  23. package/packages/memory-engine-v2/extractor-async/test_confidence.py +76 -0
  24. package/packages/memory-engine-v2/extractor-async/test_noise_filter.py +177 -0
  25. package/packages/memory-engine-v2/extractor-async/worker.py +797 -0
  26. package/packages/memory-engine-v2/extractor-sync/Dockerfile +11 -0
  27. package/packages/memory-engine-v2/extractor-sync/requirements.txt +4 -0
  28. package/packages/memory-engine-v2/extractor-sync/server.py +424 -0
  29. package/packages/memory-engine-v2/org-model/migrations/001_init.sql +390 -0
  30. package/packages/memory-engine-v2/tests/e2e_smoke.py +356 -0
  31. package/packages/memory-engine-v2/tests/fixtures/generate_synthetic_corpus.py +758 -0
  32. package/packages/memory-engine/.env.example +0 -13
  33. package/packages/memory-engine/MIGRATION.md +0 -219
  34. package/packages/memory-engine/README.md +0 -145
  35. package/packages/memory-engine/bench/README.md +0 -99
  36. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +0 -1115
  37. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +0 -819
  38. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +0 -1278
  39. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +0 -1018
  40. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +0 -1038
  41. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +0 -961
  42. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +0 -1115
  43. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +0 -819
  44. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +0 -1278
  45. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +0 -1018
  46. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +0 -1038
  47. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +0 -937
  48. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +0 -1115
  49. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +0 -819
  50. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +0 -1278
  51. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +0 -1018
  52. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +0 -1038
  53. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +0 -961
  54. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +0 -1115
  55. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +0 -819
  56. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +0 -1278
  57. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +0 -1018
  58. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +0 -1038
  59. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +0 -883
  60. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +0 -1115
  61. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +0 -819
  62. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +0 -1278
  63. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +0 -1018
  64. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +0 -1038
  65. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +0 -937
  66. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +0 -1115
  67. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +0 -1115
  68. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +0 -819
  69. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +0 -542
  70. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +0 -1278
  71. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +0 -894
  72. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +0 -1018
  73. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +0 -680
  74. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +0 -1038
  75. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +0 -693
  76. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +0 -961
  77. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +0 -727
  78. package/packages/memory-engine/compat/Dockerfile +0 -22
  79. package/packages/memory-engine/compat/server.py +0 -1255
  80. package/packages/memory-engine/docker-compose.test.yml +0 -59
  81. package/packages/memory-engine/docker-compose.yml +0 -255
  82. package/packages/memory-engine/engine/README.md +0 -52
  83. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +0 -1543
  84. package/packages/memory-engine/engine/l5-comms-layer.py +0 -663
  85. package/packages/memory-engine/engine/l6-document-store.py +0 -1018
  86. package/packages/memory-engine/engine/services/_shared/__init__.py +0 -1
  87. package/packages/memory-engine/engine/services/_shared/embed_provider.py +0 -562
  88. package/packages/memory-engine/engine/services/l2/Dockerfile +0 -50
  89. package/packages/memory-engine/engine/services/l2/init_databases.py +0 -81
  90. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +0 -2721
  91. package/packages/memory-engine/engine/services/l5/Dockerfile +0 -11
  92. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +0 -808
  93. package/packages/memory-engine/engine/services/l6/Dockerfile +0 -30
  94. package/packages/memory-engine/engine/services/l6/l6-document-store.py +0 -1221
  95. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +0 -28
  96. package/packages/memory-engine/engine/services/nv-embed/server.py +0 -152
  97. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  98. package/packages/memory-engine/pme_memory/__main__.py +0 -129
  99. package/packages/memory-engine/pme_memory/artifacts.py +0 -95
  100. package/packages/memory-engine/pme_memory/embed.py +0 -74
  101. package/packages/memory-engine/pme_memory/health.py +0 -36
  102. package/packages/memory-engine/pme_memory/hygiene.py +0 -159
  103. package/packages/memory-engine/pme_memory/indexer.py +0 -200
  104. package/packages/memory-engine/pme_memory/needs.py +0 -55
  105. package/packages/memory-engine/pme_memory/provenance.py +0 -80
  106. package/packages/memory-engine/pme_memory/scoring.py +0 -168
  107. package/packages/memory-engine/pme_memory/search.py +0 -52
  108. package/packages/memory-engine/pme_memory/store.py +0 -86
  109. package/packages/memory-engine/pme_memory/synthesis.py +0 -114
  110. package/packages/memory-engine/pyproject.toml +0 -65
  111. package/packages/memory-engine/scripts/kg-extractor.py +0 -557
  112. package/packages/memory-engine/scripts/kg-preflexor-v2.py +0 -738
  113. package/packages/memory-engine/scripts/wipe-legacy-l3-entities.py +0 -128
  114. package/packages/memory-engine/tests/e2e_arena.sh +0 -259
  115. package/packages/memory-engine/tests/embed_stub/Dockerfile +0 -13
  116. package/packages/memory-engine/tests/embed_stub/server.py +0 -80
  117. package/packages/memory-engine/tests/test_aggregate.py +0 -333
  118. package/packages/memory-engine/tests/test_api_contract.sh +0 -57
  119. package/packages/memory-engine/tests/test_arena_safety.py +0 -232
  120. package/packages/memory-engine/tests/test_channel_stat_reader.py +0 -437
  121. package/packages/memory-engine/tests/test_channel_stat_rollups.py +0 -308
  122. package/packages/memory-engine/tests/test_compat_nv_embed_probe.py +0 -48
  123. package/packages/memory-engine/tests/test_embed_provider.py +0 -693
  124. package/packages/memory-engine/tests/test_l2_qmd_vec_search.py +0 -280
  125. package/packages/memory-engine/tests/test_l3_arena_isolation.py +0 -412
  126. package/packages/memory-engine/tests/test_l6_module_load.py +0 -84
  127. package/packages/memory-engine/tests/test_people_list_reader.py +0 -432
@@ -0,0 +1,390 @@
1
+ -- pentatonic-memory-engine v2: org-model schema, initial migration.
2
+ --
3
+ -- Design principles encoded in this schema:
4
+ --
5
+ -- 1. Append-only event log + materialised views. The `events` table is
6
+ -- the source of truth; `entities`, `facts`, `relationships` are
7
+ -- materialised views over it. To iterate the schema, add columns;
8
+ -- never alter existing ones. To rebuild the view, drop + replay.
9
+ --
10
+ -- 2. Provenance + participant-set + disclosure-class on every fact
11
+ -- from day one. Even if enforcement (the privacy plane / Layer-P)
12
+ -- is later, the columns exist. Retrofitting these into a populated
13
+ -- multi-tenant schema = the schema-altering operation we explicitly
14
+ -- can't tolerate. See PR #285 (extraction-objectives.md) for the
15
+ -- semantic contract these columns enforce.
16
+ --
17
+ -- 3. Forget is a deletion, not an orphan mark. When an evidence
18
+ -- record is forgotten, every fact whose entire provenance set
19
+ -- is forgotten gets deleted in the same transaction. Orphan-
20
+ -- marking a derivative of erased data is itself GDPR non-
21
+ -- compliance — see privacy model L4 / fix #8.
22
+ --
23
+ -- 4. Content-hash IDs everywhere. Idempotent upsert is the only
24
+ -- write contract. Re-ingestion converges instead of inflating
25
+ -- (the v1 L0 840k vs 594k bloat).
26
+
27
+ -- ----------------------------------------------------------------------
28
+ -- Enums
29
+ -- ----------------------------------------------------------------------
30
+
31
+ -- Source category, set at ingest time by extractor-sync's per-source
32
+ -- rules. Drives downstream routing (typed retrieval) + extraction
33
+ -- prompts. v1 hardcoded these to 'general' for every record; v2 makes
34
+ -- it required.
35
+ CREATE TYPE source_kind AS ENUM (
36
+ 'chat', -- slack, teams, dm
37
+ 'note', -- gmail, drafts
38
+ 'doc', -- drive, notion, confluence
39
+ 'event', -- calendar meetings
40
+ 'ticket', -- jira, linear, github issues
41
+ 'commit', -- github commits / PRs
42
+ 'system', -- system-of-record snapshots (CRM, ERP)
43
+ 'agent' -- agent-generated content (chat_turn from pip)
44
+ );
45
+
46
+ -- Internal vs external classification. "Internal" = the writing user's
47
+ -- organisation; "external" = counterparty / customer / partner. Drives
48
+ -- the disclosure-class invariant in PR #280 (counterparty-taint spike).
49
+ CREATE TYPE participant_kind AS ENUM (
50
+ 'internal',
51
+ 'external',
52
+ 'mixed', -- a thread with both
53
+ 'unknown' -- can't classify; treated as 'external' for safety
54
+ );
55
+
56
+ -- Disclosure class — how widely can a fact be re-surfaced? Default
57
+ -- 'private' until proven otherwise. Drives cross-user inference taint.
58
+ CREATE TYPE disclosure_class AS ENUM (
59
+ 'public', -- explicitly shared (e.g., docs marked external)
60
+ 'team', -- visible across user's org but not externally
61
+ 'private', -- visible only to participant set
62
+ 'restricted' -- legal hold / contains PII / etc.
63
+ );
64
+
65
+ -- Extraction stage. Provisional = extracted by deterministic
66
+ -- fast-path (extractor-sync). Distilled = upgraded by LLM
67
+ -- (extractor-async). Verified = human-confirmed.
68
+ CREATE TYPE extraction_stage AS ENUM (
69
+ 'provisional',
70
+ 'distilled',
71
+ 'verified'
72
+ );
73
+
74
+ -- ----------------------------------------------------------------------
75
+ -- events — the append-only source of truth.
76
+ --
77
+ -- Every STORE_MEMORY / FORGET_MEMORY event the engine processes lands
78
+ -- here. The materialised views below are derived from this table; if
79
+ -- they're ever wrong, drop and replay.
80
+ -- ----------------------------------------------------------------------
81
+
82
+ CREATE TABLE events (
83
+ -- Content-addressed primary key: sha256(arena || ':' || canonical_content).
84
+ -- Idempotent: re-emit the same event, same ID, no-op insert.
85
+ id TEXT PRIMARY KEY,
86
+
87
+ -- Arena scope. 'tenant' = clientId, 'user' = clientId:userId.
88
+ arena TEXT NOT NULL,
89
+ client_id TEXT NOT NULL,
90
+ user_id TEXT,
91
+
92
+ -- Event semantics.
93
+ event_type TEXT NOT NULL, -- STORE_MEMORY | FORGET_MEMORY | CHAT_TURN
94
+ source_kind source_kind NOT NULL,
95
+ source_id TEXT, -- producer's stable id (msg ts, drive id, etc.)
96
+
97
+ -- Content + canonical hash. content_hash is what dedups across
98
+ -- re-ingests; content is the raw payload (lazy-materialisable —
99
+ -- could move to object storage later if RAM matters).
100
+ content TEXT NOT NULL,
101
+ content_hash TEXT NOT NULL,
102
+
103
+ -- Provenance (mandatory from day one — see header note 2).
104
+ participant_set TEXT[] NOT NULL, -- arena IDs that can see
105
+ participant_kind participant_kind NOT NULL DEFAULT 'unknown',
106
+ disclosure_class disclosure_class NOT NULL DEFAULT 'private',
107
+
108
+ -- Producer-supplied metadata bag. Untyped to avoid lock-in; the
109
+ -- extractor reads from here per source_kind.
110
+ attributes JSONB NOT NULL DEFAULT '{}'::jsonb,
111
+
112
+ -- Timestamps. emitted_at is producer-supplied; received_at is when
113
+ -- we landed it. Both useful for forensic queries.
114
+ emitted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
115
+ received_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
116
+
117
+ -- FORGET_MEMORY events reference the event they're erasing.
118
+ forgets TEXT REFERENCES events(id) ON DELETE SET NULL,
119
+
120
+ CONSTRAINT valid_event_type CHECK (event_type IN ('STORE_MEMORY', 'FORGET_MEMORY', 'CHAT_TURN'))
121
+ );
122
+
123
+ CREATE INDEX idx_events_arena ON events(arena);
124
+ CREATE INDEX idx_events_content_hash ON events(content_hash);
125
+ CREATE INDEX idx_events_source_kind ON events(source_kind);
126
+ CREATE INDEX idx_events_emitted_at ON events(emitted_at DESC);
127
+ CREATE INDEX idx_events_client_user ON events(client_id, user_id);
128
+ CREATE INDEX idx_events_participant_set ON events USING GIN(participant_set);
129
+ CREATE INDEX idx_events_attributes ON events USING GIN(attributes jsonb_path_ops);
130
+
131
+ -- ----------------------------------------------------------------------
132
+ -- entities — extracted nouns, people, projects, etc.
133
+ --
134
+ -- Many-to-one with events: one event can mention multiple entities.
135
+ -- entity_id is content-derived (lowercased + stemmed, plus
136
+ -- participant_set for tenant isolation) so re-extractions converge.
137
+ -- ----------------------------------------------------------------------
138
+
139
+ CREATE TABLE entities (
140
+ id TEXT PRIMARY KEY,
141
+ arena TEXT NOT NULL,
142
+ entity_type TEXT NOT NULL, -- person | org | project | concept | ...
143
+ canonical_name TEXT NOT NULL,
144
+ -- Surface forms used to derive this canonical (for fuzzy matching).
145
+ -- Append-only — never remove.
146
+ aliases TEXT[] NOT NULL DEFAULT ARRAY[]::TEXT[],
147
+ -- Provenance: which events first mentioned this entity.
148
+ provenance_event_ids TEXT[] NOT NULL DEFAULT ARRAY[]::TEXT[],
149
+
150
+ participant_set TEXT[] NOT NULL,
151
+ disclosure_class disclosure_class NOT NULL DEFAULT 'private',
152
+
153
+ attributes JSONB NOT NULL DEFAULT '{}'::jsonb,
154
+ first_seen TIMESTAMPTZ NOT NULL DEFAULT NOW(),
155
+ last_seen TIMESTAMPTZ NOT NULL DEFAULT NOW()
156
+ );
157
+
158
+ CREATE INDEX idx_entities_arena ON entities(arena);
159
+ CREATE INDEX idx_entities_entity_type ON entities(entity_type);
160
+ CREATE INDEX idx_entities_canonical_name ON entities(canonical_name);
161
+ CREATE INDEX idx_entities_aliases ON entities USING GIN(aliases);
162
+ CREATE INDEX idx_entities_provenance ON entities USING GIN(provenance_event_ids);
163
+
164
+ -- ----------------------------------------------------------------------
165
+ -- facts — extracted decisions, commitments, statements.
166
+ --
167
+ -- This is the org-model proper: structured knowledge derived from
168
+ -- events. Each fact cites the events it was extracted from
169
+ -- (provenance_event_ids). When all citing events are forgotten, the
170
+ -- fact gets deleted (see header note 3).
171
+ -- ----------------------------------------------------------------------
172
+
173
+ CREATE TABLE facts (
174
+ id TEXT PRIMARY KEY,
175
+ arena TEXT NOT NULL,
176
+ -- Fact category, drives prompt selection in extractor-async and
177
+ -- typed routing in compat's read path.
178
+ category TEXT NOT NULL, -- decision | commitment | state | mention | observation
179
+ subject_entity_id TEXT REFERENCES entities(id) ON DELETE SET NULL,
180
+ predicate TEXT, -- 'committed_to' | 'decided' | 'mentioned_in' etc.
181
+ object_entity_id TEXT REFERENCES entities(id) ON DELETE SET NULL,
182
+ -- Free-text natural-language form. Useful for LLM context-building
183
+ -- and for showing the user.
184
+ statement TEXT NOT NULL,
185
+
186
+ -- Provenance. Citing events that support this fact. If all are
187
+ -- deleted via FORGET_MEMORY cascade, this fact gets deleted too.
188
+ provenance_event_ids TEXT[] NOT NULL,
189
+ -- Extraction confidence + stage.
190
+ stage extraction_stage NOT NULL DEFAULT 'provisional',
191
+ confidence REAL NOT NULL DEFAULT 0.5 CHECK (confidence BETWEEN 0.0 AND 1.0),
192
+
193
+ -- Provenance privacy from day one.
194
+ participant_set TEXT[] NOT NULL,
195
+ disclosure_class disclosure_class NOT NULL DEFAULT 'private',
196
+
197
+ -- Temporal anchoring.
198
+ asserted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
199
+ -- For commitments / deadlines: when the fact resolves or expires.
200
+ effective_until TIMESTAMPTZ,
201
+
202
+ attributes JSONB NOT NULL DEFAULT '{}'::jsonb
203
+ );
204
+
205
+ CREATE INDEX idx_facts_arena ON facts(arena);
206
+ CREATE INDEX idx_facts_category ON facts(category);
207
+ CREATE INDEX idx_facts_subject ON facts(subject_entity_id);
208
+ CREATE INDEX idx_facts_object ON facts(object_entity_id);
209
+ CREATE INDEX idx_facts_provenance ON facts USING GIN(provenance_event_ids);
210
+ CREATE INDEX idx_facts_stage ON facts(stage);
211
+ CREATE INDEX idx_facts_asserted_at ON facts(asserted_at DESC);
212
+
213
+ -- ----------------------------------------------------------------------
214
+ -- relationships — edges between entities.
215
+ --
216
+ -- pgGraph / recursive-CTE substrate. Lets us compute personFacets,
217
+ -- peopleList, and spreading-activation queries over the canonical
218
+ -- store, retiring Neo4j (pending the spike in #278).
219
+ -- ----------------------------------------------------------------------
220
+
221
+ CREATE TABLE relationships (
222
+ id TEXT PRIMARY KEY,
223
+ arena TEXT NOT NULL,
224
+ from_entity_id TEXT NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
225
+ to_entity_id TEXT NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
226
+ relationship_type TEXT NOT NULL, -- communicated_with | works_for | mentions | etc.
227
+
228
+ -- Edge weight (for spreading-activation). Aggregated from supporting
229
+ -- events; updated in-place by the extractor (idempotent: see content_hash).
230
+ weight REAL NOT NULL DEFAULT 1.0,
231
+ first_seen TIMESTAMPTZ NOT NULL DEFAULT NOW(),
232
+ last_seen TIMESTAMPTZ NOT NULL DEFAULT NOW(),
233
+
234
+ provenance_event_ids TEXT[] NOT NULL,
235
+ participant_set TEXT[] NOT NULL,
236
+ disclosure_class disclosure_class NOT NULL DEFAULT 'private',
237
+
238
+ attributes JSONB NOT NULL DEFAULT '{}'::jsonb,
239
+
240
+ -- Idempotent edge identity: (arena, from, to, type) is unique. Lets
241
+ -- re-extractions update weight + last_seen without duplicating.
242
+ UNIQUE(arena, from_entity_id, to_entity_id, relationship_type)
243
+ );
244
+
245
+ CREATE INDEX idx_relationships_arena ON relationships(arena);
246
+ CREATE INDEX idx_relationships_from ON relationships(from_entity_id);
247
+ CREATE INDEX idx_relationships_to ON relationships(to_entity_id);
248
+ CREATE INDEX idx_relationships_type ON relationships(relationship_type);
249
+ CREATE INDEX idx_relationships_provenance ON relationships USING GIN(provenance_event_ids);
250
+
251
+ -- ----------------------------------------------------------------------
252
+ -- distillation_queue — work queue for the async LLM extractor.
253
+ --
254
+ -- extractor-sync enqueues an entry for every event it processes;
255
+ -- extractor-async polls + claims items, runs the LLM, writes facts
256
+ -- back, marks the item done. Crash-safe: claim_expires_at lets a
257
+ -- stuck-mid-extraction item re-surface.
258
+ -- ----------------------------------------------------------------------
259
+
260
+ CREATE TABLE distillation_queue (
261
+ id BIGSERIAL PRIMARY KEY,
262
+ event_id TEXT NOT NULL REFERENCES events(id) ON DELETE CASCADE,
263
+ enqueued_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
264
+ -- Worker that's currently processing this. NULL = unclaimed.
265
+ claimed_by TEXT,
266
+ claimed_at TIMESTAMPTZ,
267
+ -- Claims expire so a crashed worker doesn't strand items.
268
+ claim_expires_at TIMESTAMPTZ,
269
+ -- Status: pending | claimed | done | failed.
270
+ status TEXT NOT NULL DEFAULT 'pending',
271
+ attempts INT NOT NULL DEFAULT 0,
272
+ last_error TEXT,
273
+ completed_at TIMESTAMPTZ,
274
+
275
+ CONSTRAINT valid_status CHECK (status IN ('pending', 'claimed', 'done', 'failed'))
276
+ );
277
+
278
+ CREATE INDEX idx_distillation_status ON distillation_queue(status);
279
+ CREATE INDEX idx_distillation_event_id ON distillation_queue(event_id);
280
+ CREATE INDEX idx_distillation_claim_expires ON distillation_queue(claim_expires_at)
281
+ WHERE status = 'claimed';
282
+
283
+ -- ----------------------------------------------------------------------
284
+ -- vector_provenance — links from vector-index payloads back to events.
285
+ --
286
+ -- The evidence index (Qdrant) stores embeddings + lightweight payload;
287
+ -- the canonical record is here. compat queries vector-index → uses
288
+ -- payload.event_id → fetches event from this table → projects facts
289
+ -- citing that event. Late materialisation in action.
290
+ --
291
+ -- Why a table and not just relying on Qdrant payload: it lets us
292
+ -- enforce FK cascade on event delete (the orphan-prevention contract)
293
+ -- in the same transaction as the event delete. The vector-index
294
+ -- payload becomes the "soft pointer"; this table is the hard
295
+ -- transactional link.
296
+ -- ----------------------------------------------------------------------
297
+
298
+ CREATE TABLE vector_provenance (
299
+ -- vector_id = Qdrant point ID for this embedding.
300
+ vector_id TEXT PRIMARY KEY,
301
+ event_id TEXT NOT NULL REFERENCES events(id) ON DELETE CASCADE,
302
+ chunk_index INT NOT NULL DEFAULT 0, -- for multi-chunk events
303
+ embedded_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
304
+ embedding_model TEXT NOT NULL, -- 'nv-embed-v2' etc.
305
+ embedding_dim INT NOT NULL -- 4096 today
306
+ );
307
+
308
+ CREATE INDEX idx_vector_provenance_event_id ON vector_provenance(event_id);
309
+
310
+ -- ----------------------------------------------------------------------
311
+ -- audit_events — operator + system events (forensic + observability).
312
+ --
313
+ -- Mirrors the v1 audit_events table for compatibility. compat writes
314
+ -- here on every /store, /search, /forget call so existing dashboards
315
+ -- + queries keep working.
316
+ -- ----------------------------------------------------------------------
317
+
318
+ CREATE TABLE audit_events (
319
+ id BIGSERIAL PRIMARY KEY,
320
+ client_id TEXT NOT NULL,
321
+ operation TEXT NOT NULL, -- store | search | forget | etc.
322
+ actor_user_id TEXT,
323
+ request_hash TEXT NOT NULL,
324
+ engine_id TEXT, -- predicted/actual record id
325
+ layer_id TEXT,
326
+ duration_ms INT NOT NULL,
327
+ status TEXT NOT NULL, -- queued | ok | engine_error | ...
328
+ error_msg TEXT,
329
+ record_count INT,
330
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
331
+ );
332
+
333
+ CREATE INDEX idx_audit_events_client_id ON audit_events(client_id);
334
+ CREATE INDEX idx_audit_events_operation ON audit_events(operation);
335
+ CREATE INDEX idx_audit_events_created_at ON audit_events(created_at DESC);
336
+
337
+ -- ----------------------------------------------------------------------
338
+ -- forget cascade trigger.
339
+ --
340
+ -- When a FORGET_MEMORY event lands, the events row referenced by
341
+ -- `forgets` gets deleted. CASCADE deletes vector_provenance + drops
342
+ -- the event_id from any provenance_event_ids array in facts /
343
+ -- relationships / entities. If a fact's provenance_event_ids becomes
344
+ -- empty as a result, the fact gets deleted in the same trigger.
345
+ --
346
+ -- This is the "FORGET is a deletion, not an orphan mark" contract
347
+ -- from the reconciliation doc.
348
+ -- ----------------------------------------------------------------------
349
+
350
+ CREATE OR REPLACE FUNCTION cascade_forget() RETURNS TRIGGER AS $$
351
+ BEGIN
352
+ -- Remove the forgotten event_id from every fact's provenance.
353
+ UPDATE facts SET provenance_event_ids = array_remove(provenance_event_ids, OLD.id)
354
+ WHERE OLD.id = ANY(provenance_event_ids);
355
+ -- Delete facts whose entire provenance is now gone.
356
+ DELETE FROM facts WHERE provenance_event_ids = ARRAY[]::TEXT[];
357
+
358
+ -- Same treatment for relationships.
359
+ UPDATE relationships SET provenance_event_ids = array_remove(provenance_event_ids, OLD.id)
360
+ WHERE OLD.id = ANY(provenance_event_ids);
361
+ DELETE FROM relationships WHERE provenance_event_ids = ARRAY[]::TEXT[];
362
+
363
+ -- Same for entities. (Entities are usually multi-event; this only
364
+ -- fires for one-event entities, which is fine.)
365
+ UPDATE entities SET provenance_event_ids = array_remove(provenance_event_ids, OLD.id)
366
+ WHERE OLD.id = ANY(provenance_event_ids);
367
+ DELETE FROM entities WHERE provenance_event_ids = ARRAY[]::TEXT[];
368
+
369
+ RETURN OLD;
370
+ END;
371
+ $$ LANGUAGE plpgsql;
372
+
373
+ CREATE TRIGGER trigger_cascade_forget
374
+ BEFORE DELETE ON events
375
+ FOR EACH ROW EXECUTE FUNCTION cascade_forget();
376
+
377
+ -- ----------------------------------------------------------------------
378
+ -- Health check view — used by compat /health/deep.
379
+ -- ----------------------------------------------------------------------
380
+
381
+ CREATE OR REPLACE VIEW health_counts AS
382
+ SELECT
383
+ (SELECT COUNT(*) FROM events) AS events_count,
384
+ (SELECT COUNT(*) FROM entities) AS entities_count,
385
+ (SELECT COUNT(*) FROM facts) AS facts_count,
386
+ (SELECT COUNT(*) FROM relationships) AS relationships_count,
387
+ (SELECT COUNT(*) FROM vector_provenance) AS vector_records_count,
388
+ (SELECT COUNT(*) FROM distillation_queue WHERE status = 'pending') AS distillation_pending,
389
+ (SELECT COUNT(*) FROM distillation_queue WHERE status = 'claimed') AS distillation_in_flight,
390
+ (SELECT COUNT(*) FROM distillation_queue WHERE status = 'failed') AS distillation_failed;