@pentatonic-ai/ai-agent-sdk 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/bin/cli.js +1 -1
- package/bin/commands/config.js +1 -1
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/package.json +2 -2
- package/packages/doctor/src/checks/local-memory.js +2 -2
- package/packages/memory/README.md +2 -2
- package/packages/memory/openclaw-plugin/README.md +2 -2
- package/packages/memory/openclaw-plugin/openclaw.plugin.json +1 -1
- package/packages/memory/src/server.js +2 -2
- package/packages/memory-engine-v2/.env.example +30 -0
- package/packages/memory-engine-v2/README.md +125 -0
- package/packages/memory-engine-v2/compat/Dockerfile +11 -0
- package/packages/memory-engine-v2/compat/requirements.txt +6 -0
- package/packages/memory-engine-v2/compat/server.py +1047 -0
- package/packages/memory-engine-v2/docker-compose.aws.yml +78 -0
- package/packages/memory-engine-v2/docker-compose.yml +206 -0
- package/packages/memory-engine-v2/extractor-async/Dockerfile +14 -0
- package/packages/memory-engine-v2/extractor-async/confidence.py +62 -0
- package/packages/memory-engine-v2/extractor-async/noise_filter.py +144 -0
- package/packages/memory-engine-v2/extractor-async/requirements.txt +2 -0
- package/packages/memory-engine-v2/extractor-async/test_confidence.py +76 -0
- package/packages/memory-engine-v2/extractor-async/test_noise_filter.py +177 -0
- package/packages/memory-engine-v2/extractor-async/worker.py +797 -0
- package/packages/memory-engine-v2/extractor-sync/Dockerfile +11 -0
- package/packages/memory-engine-v2/extractor-sync/requirements.txt +4 -0
- package/packages/memory-engine-v2/extractor-sync/server.py +424 -0
- package/packages/memory-engine-v2/org-model/migrations/001_init.sql +390 -0
- package/packages/memory-engine-v2/tests/e2e_smoke.py +356 -0
- package/packages/memory-engine-v2/tests/fixtures/generate_synthetic_corpus.py +758 -0
- package/packages/memory-engine/.env.example +0 -13
- package/packages/memory-engine/MIGRATION.md +0 -219
- package/packages/memory-engine/README.md +0 -145
- package/packages/memory-engine/bench/README.md +0 -99
- package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +0 -961
- package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +0 -937
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +0 -961
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +0 -883
- package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +0 -937
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +0 -1115
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +0 -1115
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +0 -819
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +0 -542
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +0 -1278
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +0 -894
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +0 -1018
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +0 -680
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +0 -1038
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +0 -693
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +0 -961
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +0 -727
- package/packages/memory-engine/compat/Dockerfile +0 -22
- package/packages/memory-engine/compat/server.py +0 -1255
- package/packages/memory-engine/docker-compose.test.yml +0 -59
- package/packages/memory-engine/docker-compose.yml +0 -255
- package/packages/memory-engine/engine/README.md +0 -52
- package/packages/memory-engine/engine/l2-hybridrag-proxy.py +0 -1543
- package/packages/memory-engine/engine/l5-comms-layer.py +0 -663
- package/packages/memory-engine/engine/l6-document-store.py +0 -1018
- package/packages/memory-engine/engine/services/_shared/__init__.py +0 -1
- package/packages/memory-engine/engine/services/_shared/embed_provider.py +0 -562
- package/packages/memory-engine/engine/services/l2/Dockerfile +0 -50
- package/packages/memory-engine/engine/services/l2/init_databases.py +0 -81
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +0 -2721
- package/packages/memory-engine/engine/services/l5/Dockerfile +0 -11
- package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +0 -808
- package/packages/memory-engine/engine/services/l6/Dockerfile +0 -30
- package/packages/memory-engine/engine/services/l6/l6-document-store.py +0 -1221
- package/packages/memory-engine/engine/services/nv-embed/Dockerfile +0 -28
- package/packages/memory-engine/engine/services/nv-embed/server.py +0 -152
- package/packages/memory-engine/pme_memory/__init__.py +0 -0
- package/packages/memory-engine/pme_memory/__main__.py +0 -129
- package/packages/memory-engine/pme_memory/artifacts.py +0 -95
- package/packages/memory-engine/pme_memory/embed.py +0 -74
- package/packages/memory-engine/pme_memory/health.py +0 -36
- package/packages/memory-engine/pme_memory/hygiene.py +0 -159
- package/packages/memory-engine/pme_memory/indexer.py +0 -200
- package/packages/memory-engine/pme_memory/needs.py +0 -55
- package/packages/memory-engine/pme_memory/provenance.py +0 -80
- package/packages/memory-engine/pme_memory/scoring.py +0 -168
- package/packages/memory-engine/pme_memory/search.py +0 -52
- package/packages/memory-engine/pme_memory/store.py +0 -86
- package/packages/memory-engine/pme_memory/synthesis.py +0 -114
- package/packages/memory-engine/pyproject.toml +0 -65
- package/packages/memory-engine/scripts/kg-extractor.py +0 -557
- package/packages/memory-engine/scripts/kg-preflexor-v2.py +0 -738
- package/packages/memory-engine/scripts/wipe-legacy-l3-entities.py +0 -128
- package/packages/memory-engine/tests/e2e_arena.sh +0 -259
- package/packages/memory-engine/tests/embed_stub/Dockerfile +0 -13
- package/packages/memory-engine/tests/embed_stub/server.py +0 -80
- package/packages/memory-engine/tests/test_aggregate.py +0 -333
- package/packages/memory-engine/tests/test_api_contract.sh +0 -57
- package/packages/memory-engine/tests/test_arena_safety.py +0 -232
- package/packages/memory-engine/tests/test_channel_stat_reader.py +0 -437
- package/packages/memory-engine/tests/test_channel_stat_rollups.py +0 -308
- package/packages/memory-engine/tests/test_compat_nv_embed_probe.py +0 -48
- package/packages/memory-engine/tests/test_embed_provider.py +0 -693
- package/packages/memory-engine/tests/test_l2_qmd_vec_search.py +0 -280
- package/packages/memory-engine/tests/test_l3_arena_isolation.py +0 -412
- package/packages/memory-engine/tests/test_l6_module_load.py +0 -84
- package/packages/memory-engine/tests/test_people_list_reader.py +0 -432
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
-- pentatonic-memory-engine v2: org-model schema, initial migration.
|
|
2
|
+
--
|
|
3
|
+
-- Design principles encoded in this schema:
|
|
4
|
+
--
|
|
5
|
+
-- 1. Append-only event log + materialised views. The `events` table is
|
|
6
|
+
-- the source of truth; `entities`, `facts`, `relationships` are
|
|
7
|
+
-- materialised views over it. To iterate the schema, add columns;
|
|
8
|
+
-- never alter existing ones. To rebuild the view, drop + replay.
|
|
9
|
+
--
|
|
10
|
+
-- 2. Provenance + participant-set + disclosure-class on every fact
|
|
11
|
+
-- from day one. Even if enforcement (the privacy plane / Layer-P)
|
|
12
|
+
-- is later, the columns exist. Retrofitting these into a populated
|
|
13
|
+
-- multi-tenant schema = the schema-altering operation we explicitly
|
|
14
|
+
-- can't tolerate. See PR #285 (extraction-objectives.md) for the
|
|
15
|
+
-- semantic contract these columns enforce.
|
|
16
|
+
--
|
|
17
|
+
-- 3. Forget is a deletion, not an orphan mark. When an evidence
|
|
18
|
+
-- record is forgotten, every fact whose entire provenance set
|
|
19
|
+
-- is forgotten gets deleted in the same transaction. Orphan-
|
|
20
|
+
-- marking a derivative of erased data is itself GDPR non-
|
|
21
|
+
-- compliance — see privacy model L4 / fix #8.
|
|
22
|
+
--
|
|
23
|
+
-- 4. Content-hash IDs everywhere. Idempotent upsert is the only
|
|
24
|
+
-- write contract. Re-ingestion converges instead of inflating
|
|
25
|
+
-- (the v1 L0 840k vs 594k bloat).
|
|
26
|
+
|
|
27
|
+
-- ----------------------------------------------------------------------
|
|
28
|
+
-- Enums
|
|
29
|
+
-- ----------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
-- Source category, set at ingest time by extractor-sync's per-source
|
|
32
|
+
-- rules. Drives downstream routing (typed retrieval) + extraction
|
|
33
|
+
-- prompts. v1 hardcoded these to 'general' for every record; v2 makes
|
|
34
|
+
-- it required.
|
|
35
|
+
CREATE TYPE source_kind AS ENUM (
|
|
36
|
+
'chat', -- slack, teams, dm
|
|
37
|
+
'note', -- gmail, drafts
|
|
38
|
+
'doc', -- drive, notion, confluence
|
|
39
|
+
'event', -- calendar meetings
|
|
40
|
+
'ticket', -- jira, linear, github issues
|
|
41
|
+
'commit', -- github commits / PRs
|
|
42
|
+
'system', -- system-of-record snapshots (CRM, ERP)
|
|
43
|
+
'agent' -- agent-generated content (chat_turn from pip)
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
-- Internal vs external classification. "Internal" = the writing user's
|
|
47
|
+
-- organisation; "external" = counterparty / customer / partner. Drives
|
|
48
|
+
-- the disclosure-class invariant in PR #280 (counterparty-taint spike).
|
|
49
|
+
CREATE TYPE participant_kind AS ENUM (
|
|
50
|
+
'internal',
|
|
51
|
+
'external',
|
|
52
|
+
'mixed', -- a thread with both
|
|
53
|
+
'unknown' -- can't classify; treated as 'external' for safety
|
|
54
|
+
);
|
|
55
|
+
|
|
56
|
+
-- Disclosure class — how widely can a fact be re-surfaced? Default
|
|
57
|
+
-- 'private' until proven otherwise. Drives cross-user inference taint.
|
|
58
|
+
CREATE TYPE disclosure_class AS ENUM (
|
|
59
|
+
'public', -- explicitly shared (e.g., docs marked external)
|
|
60
|
+
'team', -- visible across user's org but not externally
|
|
61
|
+
'private', -- visible only to participant set
|
|
62
|
+
'restricted' -- legal hold / contains PII / etc.
|
|
63
|
+
);
|
|
64
|
+
|
|
65
|
+
-- Extraction stage. Provisional = extracted by deterministic
|
|
66
|
+
-- fast-path (extractor-sync). Distilled = upgraded by LLM
|
|
67
|
+
-- (extractor-async). Verified = human-confirmed.
|
|
68
|
+
CREATE TYPE extraction_stage AS ENUM (
|
|
69
|
+
'provisional',
|
|
70
|
+
'distilled',
|
|
71
|
+
'verified'
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
-- ----------------------------------------------------------------------
|
|
75
|
+
-- events — the append-only source of truth.
|
|
76
|
+
--
|
|
77
|
+
-- Every STORE_MEMORY / FORGET_MEMORY event the engine processes lands
|
|
78
|
+
-- here. The materialised views below are derived from this table; if
|
|
79
|
+
-- they're ever wrong, drop and replay.
|
|
80
|
+
-- ----------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
CREATE TABLE events (
|
|
83
|
+
-- Content-addressed primary key: sha256(arena || ':' || canonical_content).
|
|
84
|
+
-- Idempotent: re-emit the same event, same ID, no-op insert.
|
|
85
|
+
id TEXT PRIMARY KEY,
|
|
86
|
+
|
|
87
|
+
-- Arena scope. 'tenant' = clientId, 'user' = clientId:userId.
|
|
88
|
+
arena TEXT NOT NULL,
|
|
89
|
+
client_id TEXT NOT NULL,
|
|
90
|
+
user_id TEXT,
|
|
91
|
+
|
|
92
|
+
-- Event semantics.
|
|
93
|
+
event_type TEXT NOT NULL, -- STORE_MEMORY | FORGET_MEMORY | CHAT_TURN
|
|
94
|
+
source_kind source_kind NOT NULL,
|
|
95
|
+
source_id TEXT, -- producer's stable id (msg ts, drive id, etc.)
|
|
96
|
+
|
|
97
|
+
-- Content + canonical hash. content_hash is what dedups across
|
|
98
|
+
-- re-ingests; content is the raw payload (lazy-materialisable —
|
|
99
|
+
-- could move to object storage later if RAM matters).
|
|
100
|
+
content TEXT NOT NULL,
|
|
101
|
+
content_hash TEXT NOT NULL,
|
|
102
|
+
|
|
103
|
+
-- Provenance (mandatory from day one — see header note 2).
|
|
104
|
+
participant_set TEXT[] NOT NULL, -- arena IDs that can see
|
|
105
|
+
participant_kind participant_kind NOT NULL DEFAULT 'unknown',
|
|
106
|
+
disclosure_class disclosure_class NOT NULL DEFAULT 'private',
|
|
107
|
+
|
|
108
|
+
-- Producer-supplied metadata bag. Untyped to avoid lock-in; the
|
|
109
|
+
-- extractor reads from here per source_kind.
|
|
110
|
+
attributes JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
111
|
+
|
|
112
|
+
-- Timestamps. emitted_at is producer-supplied; received_at is when
|
|
113
|
+
-- we landed it. Both useful for forensic queries.
|
|
114
|
+
emitted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
115
|
+
received_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
116
|
+
|
|
117
|
+
-- FORGET_MEMORY events reference the event they're erasing.
|
|
118
|
+
forgets TEXT REFERENCES events(id) ON DELETE SET NULL,
|
|
119
|
+
|
|
120
|
+
CONSTRAINT valid_event_type CHECK (event_type IN ('STORE_MEMORY', 'FORGET_MEMORY', 'CHAT_TURN'))
|
|
121
|
+
);
|
|
122
|
+
|
|
123
|
+
CREATE INDEX idx_events_arena ON events(arena);
|
|
124
|
+
CREATE INDEX idx_events_content_hash ON events(content_hash);
|
|
125
|
+
CREATE INDEX idx_events_source_kind ON events(source_kind);
|
|
126
|
+
CREATE INDEX idx_events_emitted_at ON events(emitted_at DESC);
|
|
127
|
+
CREATE INDEX idx_events_client_user ON events(client_id, user_id);
|
|
128
|
+
CREATE INDEX idx_events_participant_set ON events USING GIN(participant_set);
|
|
129
|
+
CREATE INDEX idx_events_attributes ON events USING GIN(attributes jsonb_path_ops);
|
|
130
|
+
|
|
131
|
+
-- ----------------------------------------------------------------------
|
|
132
|
+
-- entities — extracted nouns, people, projects, etc.
|
|
133
|
+
--
|
|
134
|
+
-- Many-to-one with events: one event can mention multiple entities.
|
|
135
|
+
-- entity_id is content-derived (lowercased + stemmed, plus
|
|
136
|
+
-- participant_set for tenant isolation) so re-extractions converge.
|
|
137
|
+
-- ----------------------------------------------------------------------
|
|
138
|
+
|
|
139
|
+
CREATE TABLE entities (
|
|
140
|
+
id TEXT PRIMARY KEY,
|
|
141
|
+
arena TEXT NOT NULL,
|
|
142
|
+
entity_type TEXT NOT NULL, -- person | org | project | concept | ...
|
|
143
|
+
canonical_name TEXT NOT NULL,
|
|
144
|
+
-- Surface forms used to derive this canonical (for fuzzy matching).
|
|
145
|
+
-- Append-only — never remove.
|
|
146
|
+
aliases TEXT[] NOT NULL DEFAULT ARRAY[]::TEXT[],
|
|
147
|
+
-- Provenance: which events first mentioned this entity.
|
|
148
|
+
provenance_event_ids TEXT[] NOT NULL DEFAULT ARRAY[]::TEXT[],
|
|
149
|
+
|
|
150
|
+
participant_set TEXT[] NOT NULL,
|
|
151
|
+
disclosure_class disclosure_class NOT NULL DEFAULT 'private',
|
|
152
|
+
|
|
153
|
+
attributes JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
154
|
+
first_seen TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
155
|
+
last_seen TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
156
|
+
);
|
|
157
|
+
|
|
158
|
+
CREATE INDEX idx_entities_arena ON entities(arena);
|
|
159
|
+
CREATE INDEX idx_entities_entity_type ON entities(entity_type);
|
|
160
|
+
CREATE INDEX idx_entities_canonical_name ON entities(canonical_name);
|
|
161
|
+
CREATE INDEX idx_entities_aliases ON entities USING GIN(aliases);
|
|
162
|
+
CREATE INDEX idx_entities_provenance ON entities USING GIN(provenance_event_ids);
|
|
163
|
+
|
|
164
|
+
-- ----------------------------------------------------------------------
|
|
165
|
+
-- facts — extracted decisions, commitments, statements.
|
|
166
|
+
--
|
|
167
|
+
-- This is the org-model proper: structured knowledge derived from
|
|
168
|
+
-- events. Each fact cites the events it was extracted from
|
|
169
|
+
-- (provenance_event_ids). When all citing events are forgotten, the
|
|
170
|
+
-- fact gets deleted (see header note 3).
|
|
171
|
+
-- ----------------------------------------------------------------------
|
|
172
|
+
|
|
173
|
+
CREATE TABLE facts (
|
|
174
|
+
id TEXT PRIMARY KEY,
|
|
175
|
+
arena TEXT NOT NULL,
|
|
176
|
+
-- Fact category, drives prompt selection in extractor-async and
|
|
177
|
+
-- typed routing in compat's read path.
|
|
178
|
+
category TEXT NOT NULL, -- decision | commitment | state | mention | observation
|
|
179
|
+
subject_entity_id TEXT REFERENCES entities(id) ON DELETE SET NULL,
|
|
180
|
+
predicate TEXT, -- 'committed_to' | 'decided' | 'mentioned_in' etc.
|
|
181
|
+
object_entity_id TEXT REFERENCES entities(id) ON DELETE SET NULL,
|
|
182
|
+
-- Free-text natural-language form. Useful for LLM context-building
|
|
183
|
+
-- and for showing the user.
|
|
184
|
+
statement TEXT NOT NULL,
|
|
185
|
+
|
|
186
|
+
-- Provenance. Citing events that support this fact. If all are
|
|
187
|
+
-- deleted via FORGET_MEMORY cascade, this fact gets deleted too.
|
|
188
|
+
provenance_event_ids TEXT[] NOT NULL,
|
|
189
|
+
-- Extraction confidence + stage.
|
|
190
|
+
stage extraction_stage NOT NULL DEFAULT 'provisional',
|
|
191
|
+
confidence REAL NOT NULL DEFAULT 0.5 CHECK (confidence BETWEEN 0.0 AND 1.0),
|
|
192
|
+
|
|
193
|
+
-- Provenance privacy from day one.
|
|
194
|
+
participant_set TEXT[] NOT NULL,
|
|
195
|
+
disclosure_class disclosure_class NOT NULL DEFAULT 'private',
|
|
196
|
+
|
|
197
|
+
-- Temporal anchoring.
|
|
198
|
+
asserted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
199
|
+
-- For commitments / deadlines: when the fact resolves or expires.
|
|
200
|
+
effective_until TIMESTAMPTZ,
|
|
201
|
+
|
|
202
|
+
attributes JSONB NOT NULL DEFAULT '{}'::jsonb
|
|
203
|
+
);
|
|
204
|
+
|
|
205
|
+
CREATE INDEX idx_facts_arena ON facts(arena);
|
|
206
|
+
CREATE INDEX idx_facts_category ON facts(category);
|
|
207
|
+
CREATE INDEX idx_facts_subject ON facts(subject_entity_id);
|
|
208
|
+
CREATE INDEX idx_facts_object ON facts(object_entity_id);
|
|
209
|
+
CREATE INDEX idx_facts_provenance ON facts USING GIN(provenance_event_ids);
|
|
210
|
+
CREATE INDEX idx_facts_stage ON facts(stage);
|
|
211
|
+
CREATE INDEX idx_facts_asserted_at ON facts(asserted_at DESC);
|
|
212
|
+
|
|
213
|
+
-- ----------------------------------------------------------------------
|
|
214
|
+
-- relationships — edges between entities.
|
|
215
|
+
--
|
|
216
|
+
-- pgGraph / recursive-CTE substrate. Lets us compute personFacets,
|
|
217
|
+
-- peopleList, and spreading-activation queries over the canonical
|
|
218
|
+
-- store, retiring Neo4j (pending the spike in #278).
|
|
219
|
+
-- ----------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
CREATE TABLE relationships (
|
|
222
|
+
id TEXT PRIMARY KEY,
|
|
223
|
+
arena TEXT NOT NULL,
|
|
224
|
+
from_entity_id TEXT NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
|
|
225
|
+
to_entity_id TEXT NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
|
|
226
|
+
relationship_type TEXT NOT NULL, -- communicated_with | works_for | mentions | etc.
|
|
227
|
+
|
|
228
|
+
-- Edge weight (for spreading-activation). Aggregated from supporting
|
|
229
|
+
-- events; updated in-place by the extractor (idempotent: see content_hash).
|
|
230
|
+
weight REAL NOT NULL DEFAULT 1.0,
|
|
231
|
+
first_seen TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
232
|
+
last_seen TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
233
|
+
|
|
234
|
+
provenance_event_ids TEXT[] NOT NULL,
|
|
235
|
+
participant_set TEXT[] NOT NULL,
|
|
236
|
+
disclosure_class disclosure_class NOT NULL DEFAULT 'private',
|
|
237
|
+
|
|
238
|
+
attributes JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
239
|
+
|
|
240
|
+
-- Idempotent edge identity: (arena, from, to, type) is unique. Lets
|
|
241
|
+
-- re-extractions update weight + last_seen without duplicating.
|
|
242
|
+
UNIQUE(arena, from_entity_id, to_entity_id, relationship_type)
|
|
243
|
+
);
|
|
244
|
+
|
|
245
|
+
CREATE INDEX idx_relationships_arena ON relationships(arena);
|
|
246
|
+
CREATE INDEX idx_relationships_from ON relationships(from_entity_id);
|
|
247
|
+
CREATE INDEX idx_relationships_to ON relationships(to_entity_id);
|
|
248
|
+
CREATE INDEX idx_relationships_type ON relationships(relationship_type);
|
|
249
|
+
CREATE INDEX idx_relationships_provenance ON relationships USING GIN(provenance_event_ids);
|
|
250
|
+
|
|
251
|
+
-- ----------------------------------------------------------------------
|
|
252
|
+
-- distillation_queue — work queue for the async LLM extractor.
|
|
253
|
+
--
|
|
254
|
+
-- extractor-sync enqueues an entry for every event it processes;
|
|
255
|
+
-- extractor-async polls + claims items, runs the LLM, writes facts
|
|
256
|
+
-- back, marks the item done. Crash-safe: claim_expires_at lets a
|
|
257
|
+
-- stuck-mid-extraction item re-surface.
|
|
258
|
+
-- ----------------------------------------------------------------------
|
|
259
|
+
|
|
260
|
+
CREATE TABLE distillation_queue (
|
|
261
|
+
id BIGSERIAL PRIMARY KEY,
|
|
262
|
+
event_id TEXT NOT NULL REFERENCES events(id) ON DELETE CASCADE,
|
|
263
|
+
enqueued_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
264
|
+
-- Worker that's currently processing this. NULL = unclaimed.
|
|
265
|
+
claimed_by TEXT,
|
|
266
|
+
claimed_at TIMESTAMPTZ,
|
|
267
|
+
-- Claims expire so a crashed worker doesn't strand items.
|
|
268
|
+
claim_expires_at TIMESTAMPTZ,
|
|
269
|
+
-- Status: pending | claimed | done | failed.
|
|
270
|
+
status TEXT NOT NULL DEFAULT 'pending',
|
|
271
|
+
attempts INT NOT NULL DEFAULT 0,
|
|
272
|
+
last_error TEXT,
|
|
273
|
+
completed_at TIMESTAMPTZ,
|
|
274
|
+
|
|
275
|
+
CONSTRAINT valid_status CHECK (status IN ('pending', 'claimed', 'done', 'failed'))
|
|
276
|
+
);
|
|
277
|
+
|
|
278
|
+
CREATE INDEX idx_distillation_status ON distillation_queue(status);
|
|
279
|
+
CREATE INDEX idx_distillation_event_id ON distillation_queue(event_id);
|
|
280
|
+
CREATE INDEX idx_distillation_claim_expires ON distillation_queue(claim_expires_at)
|
|
281
|
+
WHERE status = 'claimed';
|
|
282
|
+
|
|
283
|
+
-- ----------------------------------------------------------------------
|
|
284
|
+
-- vector_provenance — links from vector-index payloads back to events.
|
|
285
|
+
--
|
|
286
|
+
-- The evidence index (Qdrant) stores embeddings + lightweight payload;
|
|
287
|
+
-- the canonical record is here. compat queries vector-index → uses
|
|
288
|
+
-- payload.event_id → fetches event from this table → projects facts
|
|
289
|
+
-- citing that event. Late materialisation in action.
|
|
290
|
+
--
|
|
291
|
+
-- Why a table and not just relying on Qdrant payload: it lets us
|
|
292
|
+
-- enforce FK cascade on event delete (the orphan-prevention contract)
|
|
293
|
+
-- in the same transaction as the event delete. The vector-index
|
|
294
|
+
-- payload becomes the "soft pointer"; this table is the hard
|
|
295
|
+
-- transactional link.
|
|
296
|
+
-- ----------------------------------------------------------------------
|
|
297
|
+
|
|
298
|
+
CREATE TABLE vector_provenance (
|
|
299
|
+
-- vector_id = Qdrant point ID for this embedding.
|
|
300
|
+
vector_id TEXT PRIMARY KEY,
|
|
301
|
+
event_id TEXT NOT NULL REFERENCES events(id) ON DELETE CASCADE,
|
|
302
|
+
chunk_index INT NOT NULL DEFAULT 0, -- for multi-chunk events
|
|
303
|
+
embedded_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
304
|
+
embedding_model TEXT NOT NULL, -- 'nv-embed-v2' etc.
|
|
305
|
+
embedding_dim INT NOT NULL -- 4096 today
|
|
306
|
+
);
|
|
307
|
+
|
|
308
|
+
CREATE INDEX idx_vector_provenance_event_id ON vector_provenance(event_id);
|
|
309
|
+
|
|
310
|
+
-- ----------------------------------------------------------------------
|
|
311
|
+
-- audit_events — operator + system events (forensic + observability).
|
|
312
|
+
--
|
|
313
|
+
-- Mirrors the v1 audit_events table for compatibility. compat writes
|
|
314
|
+
-- here on every /store, /search, /forget call so existing dashboards
|
|
315
|
+
-- + queries keep working.
|
|
316
|
+
-- ----------------------------------------------------------------------
|
|
317
|
+
|
|
318
|
+
CREATE TABLE audit_events (
|
|
319
|
+
id BIGSERIAL PRIMARY KEY,
|
|
320
|
+
client_id TEXT NOT NULL,
|
|
321
|
+
operation TEXT NOT NULL, -- store | search | forget | etc.
|
|
322
|
+
actor_user_id TEXT,
|
|
323
|
+
request_hash TEXT NOT NULL,
|
|
324
|
+
engine_id TEXT, -- predicted/actual record id
|
|
325
|
+
layer_id TEXT,
|
|
326
|
+
duration_ms INT NOT NULL,
|
|
327
|
+
status TEXT NOT NULL, -- queued | ok | engine_error | ...
|
|
328
|
+
error_msg TEXT,
|
|
329
|
+
record_count INT,
|
|
330
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
331
|
+
);
|
|
332
|
+
|
|
333
|
+
CREATE INDEX idx_audit_events_client_id ON audit_events(client_id);
|
|
334
|
+
CREATE INDEX idx_audit_events_operation ON audit_events(operation);
|
|
335
|
+
CREATE INDEX idx_audit_events_created_at ON audit_events(created_at DESC);
|
|
336
|
+
|
|
337
|
+
-- ----------------------------------------------------------------------
|
|
338
|
+
-- forget cascade trigger.
|
|
339
|
+
--
|
|
340
|
+
-- When a FORGET_MEMORY event lands, the events row referenced by
|
|
341
|
+
-- `forgets` gets deleted. CASCADE deletes vector_provenance + drops
|
|
342
|
+
-- the event_id from any provenance_event_ids array in facts /
|
|
343
|
+
-- relationships / entities. If a fact's provenance_event_ids becomes
|
|
344
|
+
-- empty as a result, the fact gets deleted in the same trigger.
|
|
345
|
+
--
|
|
346
|
+
-- This is the "FORGET is a deletion, not an orphan mark" contract
|
|
347
|
+
-- from the reconciliation doc.
|
|
348
|
+
-- ----------------------------------------------------------------------
|
|
349
|
+
|
|
350
|
+
CREATE OR REPLACE FUNCTION cascade_forget() RETURNS TRIGGER AS $$
|
|
351
|
+
BEGIN
|
|
352
|
+
-- Remove the forgotten event_id from every fact's provenance.
|
|
353
|
+
UPDATE facts SET provenance_event_ids = array_remove(provenance_event_ids, OLD.id)
|
|
354
|
+
WHERE OLD.id = ANY(provenance_event_ids);
|
|
355
|
+
-- Delete facts whose entire provenance is now gone.
|
|
356
|
+
DELETE FROM facts WHERE provenance_event_ids = ARRAY[]::TEXT[];
|
|
357
|
+
|
|
358
|
+
-- Same treatment for relationships.
|
|
359
|
+
UPDATE relationships SET provenance_event_ids = array_remove(provenance_event_ids, OLD.id)
|
|
360
|
+
WHERE OLD.id = ANY(provenance_event_ids);
|
|
361
|
+
DELETE FROM relationships WHERE provenance_event_ids = ARRAY[]::TEXT[];
|
|
362
|
+
|
|
363
|
+
-- Same for entities. (Entities are usually multi-event; this only
|
|
364
|
+
-- fires for one-event entities, which is fine.)
|
|
365
|
+
UPDATE entities SET provenance_event_ids = array_remove(provenance_event_ids, OLD.id)
|
|
366
|
+
WHERE OLD.id = ANY(provenance_event_ids);
|
|
367
|
+
DELETE FROM entities WHERE provenance_event_ids = ARRAY[]::TEXT[];
|
|
368
|
+
|
|
369
|
+
RETURN OLD;
|
|
370
|
+
END;
|
|
371
|
+
$$ LANGUAGE plpgsql;
|
|
372
|
+
|
|
373
|
+
CREATE TRIGGER trigger_cascade_forget
|
|
374
|
+
BEFORE DELETE ON events
|
|
375
|
+
FOR EACH ROW EXECUTE FUNCTION cascade_forget();
|
|
376
|
+
|
|
377
|
+
-- ----------------------------------------------------------------------
|
|
378
|
+
-- Health check view — used by compat /health/deep.
|
|
379
|
+
-- ----------------------------------------------------------------------
|
|
380
|
+
|
|
381
|
+
CREATE OR REPLACE VIEW health_counts AS
|
|
382
|
+
SELECT
|
|
383
|
+
(SELECT COUNT(*) FROM events) AS events_count,
|
|
384
|
+
(SELECT COUNT(*) FROM entities) AS entities_count,
|
|
385
|
+
(SELECT COUNT(*) FROM facts) AS facts_count,
|
|
386
|
+
(SELECT COUNT(*) FROM relationships) AS relationships_count,
|
|
387
|
+
(SELECT COUNT(*) FROM vector_provenance) AS vector_records_count,
|
|
388
|
+
(SELECT COUNT(*) FROM distillation_queue WHERE status = 'pending') AS distillation_pending,
|
|
389
|
+
(SELECT COUNT(*) FROM distillation_queue WHERE status = 'claimed') AS distillation_in_flight,
|
|
390
|
+
(SELECT COUNT(*) FROM distillation_queue WHERE status = 'failed') AS distillation_failed;
|