@shadowforge0/aquifer-memory 1.2.1 → 1.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +8 -9
  2. package/consumers/cli.js +11 -1
  3. package/consumers/default/index.js +17 -4
  4. package/consumers/mcp.js +21 -0
  5. package/consumers/miranda/index.js +15 -4
  6. package/consumers/miranda/profile.json +145 -0
  7. package/consumers/miranda/recall-format.js +5 -3
  8. package/consumers/miranda/render-daily-md.js +186 -0
  9. package/consumers/shared/config.js +8 -0
  10. package/consumers/shared/factory.js +2 -1
  11. package/consumers/shared/llm.js +1 -1
  12. package/consumers/shared/recall-format.js +21 -1
  13. package/core/aquifer.js +693 -87
  14. package/core/artifacts.js +174 -0
  15. package/core/bundles.js +400 -0
  16. package/core/consolidation.js +340 -0
  17. package/core/decisions.js +164 -0
  18. package/core/entity-state.js +483 -0
  19. package/core/errors.js +97 -0
  20. package/core/handoff.js +153 -0
  21. package/core/insights.js +499 -0
  22. package/core/mcp-manifest.js +131 -0
  23. package/core/narratives.js +212 -0
  24. package/core/profiles.js +171 -0
  25. package/core/state.js +163 -0
  26. package/core/storage.js +82 -5
  27. package/core/timeline.js +152 -0
  28. package/index.js +14 -0
  29. package/package.json +1 -1
  30. package/pipeline/extract-state-changes.js +205 -0
  31. package/schema/001-base.sql +186 -16
  32. package/schema/002-entities.sql +35 -1
  33. package/schema/004-completion.sql +391 -0
  34. package/schema/005-entity-state-history.sql +87 -0
  35. package/schema/006-insights.sql +138 -0
  36. package/scripts/diagnose-fts-zh.js +37 -4
  37. package/scripts/drop-entity-state-history.sql +17 -0
  38. package/scripts/drop-insights.sql +12 -0
  39. package/scripts/extract-insights-from-recent-sessions.js +315 -0
  40. package/scripts/find-dburl-hints.js +29 -0
  41. package/scripts/queries.json +45 -0
  42. package/scripts/retro-recall-bench.js +409 -0
  43. package/scripts/sample-bench-queries.sql +75 -0
@@ -0,0 +1,391 @@
1
+ -- 004-completion.sql — cross-session completion schema (P1 foundation)
2
+ --
3
+ -- Adds the minimal DDL needed for the aquifer-completion capability surface:
4
+ -- * shared set_updated_at() trigger function (reused by narratives, consumer_profiles,
5
+ -- and future completion tables)
6
+ -- * sessions.consolidation_phases JSONB (per-phase state map; see consolidation
7
+ -- orchestration spec)
8
+ -- * narratives table — cross-session state snapshot with supersede chain
9
+ -- * consumer_profiles table — consumer schema registry with composite primary key
10
+ -- (tenant_id, consumer_id, version) for future multi-tenant safety
11
+ --
12
+ -- All identifiers stay parameterised on ${schema} so P4 schema rename
13
+ -- (miranda → aquifer) is a one-line config change rather than a DDL rewrite.
14
+
15
+ -- Ensure pg_trgm available (used by existing migrations; re-declared for independent
16
+ -- run safety).
17
+ CREATE EXTENSION IF NOT EXISTS pg_trgm;
18
+
19
+ -- Shared trigger: bump updated_at on row modification.
20
+ CREATE OR REPLACE FUNCTION ${schema}.set_updated_at()
21
+ RETURNS trigger
22
+ LANGUAGE plpgsql
23
+ AS $$
24
+ BEGIN
25
+ NEW.updated_at := now();
26
+ RETURN NEW;
27
+ END;
28
+ $$;
29
+
30
+ -- sessions.consolidation_phases: per-phase state map keyed by phase name.
31
+ -- Shape (documented in spec, enforced at application layer):
32
+ -- {
33
+ -- "<phase>": {
34
+ -- "status": "pending|claimed|running|succeeded|failed|skipped",
35
+ -- "attempts": int,
36
+ -- "idempotencyKey": string?, "claimToken": string?, "workerId": string?,
37
+ -- "startedAt": iso?, "finishedAt": iso?, "retryAfter": iso?,
38
+ -- "errorCode": string?, "errorMessage": string?,
39
+ -- "outputRef": { ... }?
40
+ -- }
41
+ -- }
42
+ ALTER TABLE ${schema}.sessions
43
+ ADD COLUMN IF NOT EXISTS consolidation_phases JSONB NOT NULL DEFAULT '{}'::jsonb;
44
+
45
+ -- narratives: cross-session state snapshots with scope-based addressing and
46
+ -- supersede chain. Only one 'active' row per (tenant, agent, scope, scope_key).
47
+ CREATE TABLE IF NOT EXISTS ${schema}.narratives (
48
+ id BIGSERIAL PRIMARY KEY,
49
+ tenant_id TEXT NOT NULL DEFAULT 'default',
50
+ session_row_id BIGINT REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
51
+ source_session_id TEXT,
52
+ agent_id TEXT NOT NULL DEFAULT 'main',
53
+ consumer_profile_id TEXT NOT NULL,
54
+ consumer_profile_version INT NOT NULL,
55
+ consumer_schema_hash TEXT NOT NULL,
56
+ idempotency_key TEXT UNIQUE,
57
+ scope TEXT NOT NULL DEFAULT 'agent'
58
+ CHECK (scope IN ('agent', 'workspace', 'project', 'custom')),
59
+ scope_key TEXT NOT NULL,
60
+ text TEXT NOT NULL,
61
+ status TEXT NOT NULL DEFAULT 'active'
62
+ CHECK (status IN ('active', 'archived', 'superseded')),
63
+ based_on_fact_ids BIGINT[] NOT NULL DEFAULT '{}',
64
+ metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
65
+ superseded_by_narrative_id BIGINT REFERENCES ${schema}.narratives(id) ON DELETE SET NULL,
66
+ effective_at TIMESTAMPTZ NOT NULL DEFAULT now(),
67
+ search_tsv TSVECTOR,
68
+ search_text TEXT,
69
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
70
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
71
+ );
72
+
73
+ -- Only one active narrative per (tenant, agent, scope, scope_key).
74
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_narratives_active_scope
75
+ ON ${schema}.narratives (tenant_id, agent_id, scope, scope_key)
76
+ WHERE status = 'active';
77
+
78
+ CREATE INDEX IF NOT EXISTS idx_narratives_effective_at
79
+ ON ${schema}.narratives (tenant_id, agent_id, effective_at DESC);
80
+
81
+ CREATE INDEX IF NOT EXISTS idx_narratives_search_tsv
82
+ ON ${schema}.narratives USING GIN (search_tsv);
83
+
84
+ CREATE INDEX IF NOT EXISTS idx_narratives_search_text_trgm
85
+ ON ${schema}.narratives USING GIN (search_text gin_trgm_ops);
86
+
87
+ CREATE OR REPLACE FUNCTION ${schema}.narratives_search_tsv_update()
88
+ RETURNS trigger
89
+ LANGUAGE plpgsql
90
+ AS $$
91
+ BEGIN
92
+ NEW.search_text := COALESCE(NEW.text, '') || ' ' || COALESCE(NEW.metadata::text, '');
93
+ IF EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'zhcfg') THEN
94
+ NEW.search_tsv := setweight(to_tsvector('zhcfg', COALESCE(NEW.text, '')), 'A');
95
+ ELSE
96
+ NEW.search_tsv := setweight(to_tsvector('simple', COALESCE(NEW.text, '')), 'A');
97
+ END IF;
98
+ RETURN NEW;
99
+ END;
100
+ $$;
101
+
102
+ DROP TRIGGER IF EXISTS trg_narratives_search_tsv ON ${schema}.narratives;
103
+ CREATE TRIGGER trg_narratives_search_tsv
104
+ BEFORE INSERT OR UPDATE OF text, metadata
105
+ ON ${schema}.narratives
106
+ FOR EACH ROW
107
+ EXECUTE FUNCTION ${schema}.narratives_search_tsv_update();
108
+
109
+ DROP TRIGGER IF EXISTS trg_narratives_updated_at ON ${schema}.narratives;
110
+ CREATE TRIGGER trg_narratives_updated_at
111
+ BEFORE UPDATE ON ${schema}.narratives
112
+ FOR EACH ROW
113
+ EXECUTE FUNCTION ${schema}.set_updated_at();
114
+
115
+ -- consumer_profiles: registry for consumer output contracts.
116
+ -- Composite primary key (tenant_id, consumer_id, version) future-proofs multi-tenant.
117
+ -- profile_hash UNIQUE per (consumer_id, version) catches accidental hash drift within
118
+ -- a consumer version.
119
+ CREATE TABLE IF NOT EXISTS ${schema}.consumer_profiles (
120
+ tenant_id TEXT NOT NULL DEFAULT 'default',
121
+ consumer_id TEXT NOT NULL,
122
+ version INT NOT NULL,
123
+ profile_hash TEXT NOT NULL,
124
+ profile_json JSONB NOT NULL,
125
+ loaded_at TIMESTAMPTZ NOT NULL DEFAULT now(),
126
+ deprecated_at TIMESTAMPTZ,
127
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
128
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
129
+ PRIMARY KEY (tenant_id, consumer_id, version),
130
+ UNIQUE (consumer_id, version, profile_hash)
131
+ );
132
+
133
+ CREATE INDEX IF NOT EXISTS idx_consumer_profiles_active
134
+ ON ${schema}.consumer_profiles (tenant_id, consumer_id, version DESC)
135
+ WHERE deprecated_at IS NULL;
136
+
137
+ DROP TRIGGER IF EXISTS trg_consumer_profiles_updated_at ON ${schema}.consumer_profiles;
138
+ CREATE TRIGGER trg_consumer_profiles_updated_at
139
+ BEFORE UPDATE ON ${schema}.consumer_profiles
140
+ FOR EACH ROW
141
+ EXECUTE FUNCTION ${schema}.set_updated_at();
142
+
143
+ -- timeline_events: append-only event log keyed by (tenant, agent, occurred_at).
144
+ -- category vocabulary is consumer-owned (focus/todo/mood/handoff/narrative/cli
145
+ -- for Miranda default), event shape is strict core. idempotency_key UNIQUE
146
+ -- across the table to make caller-driven dedupe safe.
147
+ CREATE TABLE IF NOT EXISTS ${schema}.timeline_events (
148
+ id BIGSERIAL PRIMARY KEY,
149
+ tenant_id TEXT NOT NULL DEFAULT 'default',
150
+ session_row_id BIGINT REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
151
+ source_session_id TEXT,
152
+ agent_id TEXT NOT NULL DEFAULT 'main',
153
+ consumer_profile_id TEXT NOT NULL,
154
+ consumer_profile_version INT NOT NULL,
155
+ consumer_schema_hash TEXT NOT NULL,
156
+ idempotency_key TEXT UNIQUE,
157
+ occurred_at TIMESTAMPTZ NOT NULL,
158
+ source TEXT NOT NULL,
159
+ session_ref TEXT,
160
+ category TEXT NOT NULL,
161
+ text TEXT NOT NULL,
162
+ metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
163
+ search_tsv TSVECTOR,
164
+ search_text TEXT,
165
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
166
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
167
+ );
168
+
169
+ CREATE INDEX IF NOT EXISTS idx_timeline_events_occurred_at
170
+ ON ${schema}.timeline_events (tenant_id, agent_id, occurred_at DESC);
171
+
172
+ CREATE INDEX IF NOT EXISTS idx_timeline_events_category
173
+ ON ${schema}.timeline_events (tenant_id, agent_id, category, occurred_at DESC);
174
+
175
+ CREATE INDEX IF NOT EXISTS idx_timeline_events_search_tsv
176
+ ON ${schema}.timeline_events USING GIN (search_tsv);
177
+
178
+ CREATE INDEX IF NOT EXISTS idx_timeline_events_search_text_trgm
179
+ ON ${schema}.timeline_events USING GIN (search_text gin_trgm_ops);
180
+
181
+ CREATE OR REPLACE FUNCTION ${schema}.timeline_events_search_tsv_update()
182
+ RETURNS trigger
183
+ LANGUAGE plpgsql
184
+ AS $$
185
+ BEGIN
186
+ NEW.search_text :=
187
+ COALESCE(NEW.category, '') || ' ' ||
188
+ COALESCE(NEW.text, '') || ' ' ||
189
+ COALESCE(NEW.metadata::text, '');
190
+
191
+ IF EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'zhcfg') THEN
192
+ NEW.search_tsv :=
193
+ setweight(to_tsvector('zhcfg', COALESCE(NEW.category, '')), 'B') ||
194
+ setweight(to_tsvector('zhcfg', COALESCE(NEW.text, '')), 'A');
195
+ ELSE
196
+ NEW.search_tsv :=
197
+ setweight(to_tsvector('simple', COALESCE(NEW.category, '')), 'B') ||
198
+ setweight(to_tsvector('simple', COALESCE(NEW.text, '')), 'A');
199
+ END IF;
200
+
201
+ RETURN NEW;
202
+ END;
203
+ $$;
204
+
205
+ DROP TRIGGER IF EXISTS trg_timeline_events_search_tsv ON ${schema}.timeline_events;
206
+ CREATE TRIGGER trg_timeline_events_search_tsv
207
+ BEFORE INSERT OR UPDATE OF category, text, metadata
208
+ ON ${schema}.timeline_events
209
+ FOR EACH ROW
210
+ EXECUTE FUNCTION ${schema}.timeline_events_search_tsv_update();
211
+
212
+ DROP TRIGGER IF EXISTS trg_timeline_events_updated_at ON ${schema}.timeline_events;
213
+ CREATE TRIGGER trg_timeline_events_updated_at
214
+ BEFORE UPDATE ON ${schema}.timeline_events
215
+ FOR EACH ROW
216
+ EXECUTE FUNCTION ${schema}.set_updated_at();
217
+
218
+ -- session_states: latest-snapshot-per-scope with supersede chain.
219
+ -- is_latest + partial unique index enforces at-most-one latest per
220
+ -- (tenant, agent, scope_key); writer supersedes prior latest atomically.
221
+ CREATE TABLE IF NOT EXISTS ${schema}.session_states (
222
+ id BIGSERIAL PRIMARY KEY,
223
+ tenant_id TEXT NOT NULL DEFAULT 'default',
224
+ session_row_id BIGINT REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
225
+ source_session_id TEXT,
226
+ agent_id TEXT NOT NULL DEFAULT 'main',
227
+ scope_key TEXT NOT NULL,
228
+ consumer_profile_id TEXT NOT NULL,
229
+ consumer_profile_version INT NOT NULL,
230
+ consumer_schema_hash TEXT NOT NULL,
231
+ idempotency_key TEXT UNIQUE,
232
+ goal TEXT,
233
+ active_work JSONB NOT NULL DEFAULT '[]'::jsonb,
234
+ blockers JSONB NOT NULL DEFAULT '[]'::jsonb,
235
+ affect JSONB NOT NULL DEFAULT '{}'::jsonb,
236
+ payload JSONB NOT NULL,
237
+ is_latest BOOLEAN NOT NULL DEFAULT true,
238
+ supersedes_state_id BIGINT REFERENCES ${schema}.session_states(id) ON DELETE SET NULL,
239
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now()
240
+ );
241
+
242
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_session_states_latest
243
+ ON ${schema}.session_states (tenant_id, agent_id, scope_key)
244
+ WHERE is_latest = true;
245
+
246
+ CREATE INDEX IF NOT EXISTS idx_session_states_agent
247
+ ON ${schema}.session_states (tenant_id, agent_id, created_at DESC);
248
+
249
+ -- session_handoffs: append-only handoff log. getLatest by (agent) or (agent, session).
250
+ -- No latest-enforcement — every write is a row; retrieval sorts by created_at DESC.
251
+ CREATE TABLE IF NOT EXISTS ${schema}.session_handoffs (
252
+ id BIGSERIAL PRIMARY KEY,
253
+ tenant_id TEXT NOT NULL DEFAULT 'default',
254
+ session_row_id BIGINT REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
255
+ source_session_id TEXT NOT NULL,
256
+ agent_id TEXT NOT NULL DEFAULT 'main',
257
+ consumer_profile_id TEXT NOT NULL,
258
+ consumer_profile_version INT NOT NULL,
259
+ consumer_schema_hash TEXT NOT NULL,
260
+ idempotency_key TEXT UNIQUE,
261
+ status TEXT NOT NULL,
262
+ last_step TEXT,
263
+ next_step TEXT,
264
+ blockers JSONB NOT NULL DEFAULT '[]'::jsonb,
265
+ decided JSONB NOT NULL DEFAULT '[]'::jsonb,
266
+ open_loops JSONB NOT NULL DEFAULT '[]'::jsonb,
267
+ payload JSONB NOT NULL,
268
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now()
269
+ );
270
+
271
+ CREATE INDEX IF NOT EXISTS idx_session_handoffs_agent
272
+ ON ${schema}.session_handoffs (tenant_id, agent_id, created_at DESC);
273
+
274
+ CREATE INDEX IF NOT EXISTS idx_session_handoffs_session
275
+ ON ${schema}.session_handoffs (tenant_id, source_session_id, created_at DESC);
276
+
277
+ -- decisions: append-only decision log. status vocabulary
278
+ -- (proposed/committed/reversed) lives in a CHECK constraint so bad writes
279
+ -- fail at DB boundary. reversed_by_decision_id forms a supersede chain.
280
+ CREATE TABLE IF NOT EXISTS ${schema}.decisions (
281
+ id BIGSERIAL PRIMARY KEY,
282
+ tenant_id TEXT NOT NULL DEFAULT 'default',
283
+ session_row_id BIGINT REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
284
+ source_session_id TEXT,
285
+ agent_id TEXT NOT NULL DEFAULT 'main',
286
+ consumer_profile_id TEXT NOT NULL,
287
+ consumer_profile_version INT NOT NULL,
288
+ consumer_schema_hash TEXT NOT NULL,
289
+ idempotency_key TEXT UNIQUE,
290
+ payload JSONB NOT NULL,
291
+ status TEXT NOT NULL
292
+ CHECK (status IN ('proposed', 'committed', 'reversed')),
293
+ decision_text TEXT NOT NULL,
294
+ reason_text TEXT,
295
+ decided_at TIMESTAMPTZ NOT NULL DEFAULT now(),
296
+ reversed_by_decision_id BIGINT REFERENCES ${schema}.decisions(id) ON DELETE SET NULL,
297
+ metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
298
+ search_tsv TSVECTOR,
299
+ search_text TEXT,
300
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
301
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
302
+ );
303
+
304
+ CREATE INDEX IF NOT EXISTS idx_decisions_status
305
+ ON ${schema}.decisions (tenant_id, agent_id, status, decided_at DESC);
306
+
307
+ CREATE INDEX IF NOT EXISTS idx_decisions_session
308
+ ON ${schema}.decisions (tenant_id, source_session_id);
309
+
310
+ CREATE INDEX IF NOT EXISTS idx_decisions_search_tsv
311
+ ON ${schema}.decisions USING GIN (search_tsv);
312
+
313
+ CREATE OR REPLACE FUNCTION ${schema}.decisions_search_tsv_update()
314
+ RETURNS trigger
315
+ LANGUAGE plpgsql
316
+ AS $$
317
+ BEGIN
318
+ NEW.search_text :=
319
+ COALESCE(NEW.decision_text, '') || ' ' ||
320
+ COALESCE(NEW.reason_text, '') || ' ' ||
321
+ COALESCE(NEW.metadata::text, '');
322
+
323
+ IF EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'zhcfg') THEN
324
+ NEW.search_tsv :=
325
+ setweight(to_tsvector('zhcfg', COALESCE(NEW.decision_text, '')), 'A') ||
326
+ setweight(to_tsvector('zhcfg', COALESCE(NEW.reason_text, '')), 'B');
327
+ ELSE
328
+ NEW.search_tsv :=
329
+ setweight(to_tsvector('simple', COALESCE(NEW.decision_text, '')), 'A') ||
330
+ setweight(to_tsvector('simple', COALESCE(NEW.reason_text, '')), 'B');
331
+ END IF;
332
+
333
+ RETURN NEW;
334
+ END;
335
+ $$;
336
+
337
+ DROP TRIGGER IF EXISTS trg_decisions_search_tsv ON ${schema}.decisions;
338
+ CREATE TRIGGER trg_decisions_search_tsv
339
+ BEFORE INSERT OR UPDATE OF decision_text, reason_text, metadata
340
+ ON ${schema}.decisions
341
+ FOR EACH ROW
342
+ EXECUTE FUNCTION ${schema}.decisions_search_tsv_update();
343
+
344
+ DROP TRIGGER IF EXISTS trg_decisions_updated_at ON ${schema}.decisions;
345
+ CREATE TRIGGER trg_decisions_updated_at
346
+ BEFORE UPDATE ON ${schema}.decisions
347
+ FOR EACH ROW
348
+ EXECUTE FUNCTION ${schema}.set_updated_at();
349
+
350
+ -- artifacts: records producer-declared outputs (daily md, render, export).
351
+ -- Aquifer doesn't interpret payload — producers own shape. status lifecycle
352
+ -- pending → produced|failed|discarded.
353
+ CREATE TABLE IF NOT EXISTS ${schema}.artifacts (
354
+ id BIGSERIAL PRIMARY KEY,
355
+ tenant_id TEXT NOT NULL DEFAULT 'default',
356
+ session_row_id BIGINT REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
357
+ source_session_id TEXT,
358
+ agent_id TEXT NOT NULL DEFAULT 'main',
359
+ consumer_profile_id TEXT NOT NULL,
360
+ consumer_profile_version INT NOT NULL,
361
+ consumer_schema_hash TEXT NOT NULL,
362
+ idempotency_key TEXT UNIQUE,
363
+ producer_id TEXT NOT NULL,
364
+ artifact_type TEXT NOT NULL,
365
+ trigger_phase TEXT,
366
+ format TEXT NOT NULL,
367
+ destination TEXT NOT NULL,
368
+ status TEXT NOT NULL DEFAULT 'pending'
369
+ CHECK (status IN ('pending', 'produced', 'failed', 'discarded')),
370
+ content_ref TEXT,
371
+ payload JSONB NOT NULL DEFAULT '{}'::jsonb,
372
+ metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
373
+ produced_at TIMESTAMPTZ,
374
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
375
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
376
+ );
377
+
378
+ CREATE INDEX IF NOT EXISTS idx_artifacts_lookup
379
+ ON ${schema}.artifacts (tenant_id, agent_id, producer_id, created_at DESC);
380
+
381
+ CREATE INDEX IF NOT EXISTS idx_artifacts_session
382
+ ON ${schema}.artifacts (tenant_id, source_session_id, created_at DESC);
383
+
384
+ CREATE INDEX IF NOT EXISTS idx_artifacts_status
385
+ ON ${schema}.artifacts (tenant_id, status, created_at DESC);
386
+
387
+ DROP TRIGGER IF EXISTS trg_artifacts_updated_at ON ${schema}.artifacts;
388
+ CREATE TRIGGER trg_artifacts_updated_at
389
+ BEFORE UPDATE ON ${schema}.artifacts
390
+ FOR EACH ROW
391
+ EXECUTE FUNCTION ${schema}.set_updated_at();
@@ -0,0 +1,87 @@
1
+ -- entity_state_history: temporal state-change tracking on entities.
2
+ --
3
+ -- Captures discrete attribute transitions (e.g. version.stable=1.2.1 -> 1.3.0,
4
+ -- editor.preference=vim -> nvim). Designed as additive overlay on the entities
5
+ -- table; DROP-clean — no triggers/functions/views, removing this table leaves
6
+ -- the rest of Aquifer untouched.
7
+ --
8
+ -- See spec.md Q3 and ~/.claude/develop-runs/20260419-142432-aquifer-memory-routes/.
9
+
10
+ CREATE TABLE IF NOT EXISTS ${schema}.entity_state_history (
11
+ id BIGSERIAL PRIMARY KEY,
12
+ tenant_id TEXT NOT NULL DEFAULT 'default',
13
+ agent_id TEXT NOT NULL DEFAULT 'main',
14
+ entity_id BIGINT NOT NULL
15
+ REFERENCES ${schema}.entities(id) ON DELETE CASCADE,
16
+ session_row_id BIGINT
17
+ REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
18
+ evidence_session_id TEXT,
19
+ attribute TEXT NOT NULL CHECK (btrim(attribute) <> ''),
20
+ value JSONB NOT NULL,
21
+ valid_from TIMESTAMPTZ NOT NULL,
22
+ valid_to TIMESTAMPTZ,
23
+ evidence_text TEXT NOT NULL DEFAULT '',
24
+ confidence NUMERIC(4,3) NOT NULL DEFAULT 0.7
25
+ CHECK (confidence >= 0 AND confidence <= 1),
26
+ source TEXT NOT NULL DEFAULT 'llm'
27
+ CHECK (source IN ('llm', 'manual', 'infra')),
28
+ idempotency_key TEXT,
29
+ supersedes_state_id BIGINT
30
+ REFERENCES ${schema}.entity_state_history(id) ON DELETE SET NULL,
31
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
32
+ CHECK (valid_to IS NULL OR valid_to > valid_from)
33
+ );
34
+
35
+ -- Partial UNIQUE: only one "current" (valid_to IS NULL) row per
36
+ -- (tenant, agent, entity, attribute). This is the temporal invariant —
37
+ -- two open intervals on the same key would mean the table is corrupt.
38
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_entity_state_history_current
39
+ ON ${schema}.entity_state_history (tenant_id, agent_id, entity_id, attribute)
40
+ WHERE valid_to IS NULL;
41
+
42
+ -- Idempotency: same caller-supplied key writes once. Partial allows NULL keys
43
+ -- (manual writes don't always need them).
44
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_entity_state_history_idempotency
45
+ ON ${schema}.entity_state_history (idempotency_key)
46
+ WHERE idempotency_key IS NOT NULL;
47
+
48
+ -- Hot path: history-by-attribute timeline scan, newest-first.
49
+ CREATE INDEX IF NOT EXISTS idx_entity_state_history_entity_attr_time
50
+ ON ${schema}.entity_state_history
51
+ (tenant_id, agent_id, entity_id, attribute, valid_from DESC, id DESC);
52
+
53
+ -- Hot path: full history for an entity (no attribute filter).
54
+ CREATE INDEX IF NOT EXISTS idx_entity_state_history_entity_time
55
+ ON ${schema}.entity_state_history
56
+ (tenant_id, agent_id, entity_id, valid_from DESC, id DESC);
57
+
58
+ -- Diagnostic: trace all state changes captured from a single session.
59
+ CREATE INDEX IF NOT EXISTS idx_entity_state_history_evidence_session
60
+ ON ${schema}.entity_state_history
61
+ (tenant_id, agent_id, evidence_session_id, created_at DESC)
62
+ WHERE evidence_session_id IS NOT NULL;
63
+
64
+ CREATE INDEX IF NOT EXISTS idx_entity_state_history_session_row
65
+ ON ${schema}.entity_state_history (session_row_id)
66
+ WHERE session_row_id IS NOT NULL;
67
+
68
+ COMMENT ON TABLE ${schema}.entity_state_history IS
69
+ 'Bi-temporal state changes on entities. Each row = one (entity, attribute) value valid over [valid_from, valid_to). NULL valid_to = current. supersedes_state_id chains supersession history.';
70
+
71
+ COMMENT ON COLUMN ${schema}.entity_state_history.attribute IS
72
+ 'Stable snake_case path identifying what changed (e.g. version.stable, editor.preference, runtime.node.version). Caller-defined; treat as opaque key.';
73
+
74
+ COMMENT ON COLUMN ${schema}.entity_state_history.valid_from IS
75
+ 'When the new value became true in the real world (not when it was observed). Use evidence anchor; fall back to session started_at if unspecified.';
76
+
77
+ COMMENT ON COLUMN ${schema}.entity_state_history.valid_to IS
78
+ 'NULL = currently valid. Otherwise, the timestamp at which a successor row took over. Closed intervals must satisfy valid_to > valid_from.';
79
+
80
+ COMMENT ON COLUMN ${schema}.entity_state_history.idempotency_key IS
81
+ 'Caller-supplied dedupe key. Default: sha256(tenant, agent, entity, attribute, canonical_json(value), valid_from, source). Replay safe.';
82
+
83
+ COMMENT ON COLUMN ${schema}.entity_state_history.supersedes_state_id IS
84
+ 'Chain pointer to the row this one closed (set valid_to on). NULL if this is the first known value for (entity, attribute).';
85
+
86
+ COMMENT ON COLUMN ${schema}.entity_state_history.evidence_session_id IS
87
+ 'Session that produced this evidence (text-level session_id, not session_row_id). For audit / re-extraction.';
@@ -0,0 +1,138 @@
1
+ -- insights: higher-order reflection from session content (Q4).
2
+ --
3
+ -- Holds preferences, recurring patterns, frustrations, and successful
4
+ -- workflows distilled from session_summaries over a window. Vector-indexed
5
+ -- for natural-language recall via aquifer.recallInsights().
6
+ --
7
+ -- DROP-clean: no triggers/functions, no FK from anywhere else into this table.
8
+ -- See scripts/drop-insights.sql.
9
+
10
+ CREATE TABLE IF NOT EXISTS ${schema}.insights (
11
+ id BIGSERIAL PRIMARY KEY,
12
+ tenant_id TEXT NOT NULL DEFAULT 'default',
13
+ agent_id TEXT NOT NULL,
14
+ insight_type TEXT NOT NULL
15
+ CHECK (insight_type IN ('preference', 'pattern', 'frustration', 'workflow')),
16
+ title TEXT NOT NULL CHECK (btrim(title) <> ''),
17
+ body TEXT NOT NULL CHECK (btrim(body) <> ''),
18
+ source_session_ids TEXT[] NOT NULL DEFAULT '{}',
19
+ evidence_window TSTZRANGE NOT NULL,
20
+ -- embedding: sized vector so HNSW can be built at migrate time. 1024 matches
21
+ -- the autodetect default (ollama bge-m3). Operators using a provider with
22
+ -- different dimensions (e.g. openai text-embedding-3-small = 1536) should
23
+ -- set `aquifer.embedding_dim` via GUC before running migrate(), or the
24
+ -- coerce block below will pick it up.
25
+ embedding vector(1024),
26
+ importance REAL NOT NULL DEFAULT 0.5
27
+ CHECK (importance >= 0 AND importance <= 1),
28
+ status TEXT NOT NULL DEFAULT 'active'
29
+ CHECK (status IN ('active', 'stale', 'superseded')),
30
+ superseded_by BIGINT REFERENCES ${schema}.insights(id) ON DELETE SET NULL,
31
+ idempotency_key TEXT,
32
+ metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
33
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
34
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
35
+ );
36
+
37
+ -- Phase 2 C1: canonical_key_v2 identifies the CLAIM (type + canonicalClaim +
38
+ -- entitySet). idempotency_key keeps its revision-level role. Old rows have
39
+ -- canonical_key_v2 = NULL and are not retrofitted; new writes populate it.
40
+ ALTER TABLE ${schema}.insights
41
+ ADD COLUMN IF NOT EXISTS canonical_key_v2 TEXT;
42
+
43
+ -- Hot path: recall by agent + type, importance-ranked. Partial idx keeps
44
+ -- the index small by skipping stale/superseded rows.
45
+ CREATE INDEX IF NOT EXISTS idx_insights_active
46
+ ON ${schema}.insights (tenant_id, agent_id, insight_type, importance DESC, created_at DESC)
47
+ WHERE status = 'active';
48
+
49
+ -- Idempotency: caller-supplied key writes once. Partial allows NULL keys.
50
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_insights_idempotency
51
+ ON ${schema}.insights (idempotency_key)
52
+ WHERE idempotency_key IS NOT NULL;
53
+
54
+ -- Phase 2 C1: preflight lookup for canonical_key_v2 active row.
55
+ -- NOT unique — canonical identity can have multiple revisions (legacy as
56
+ -- 'superseded'); only the latest stays 'active'. Partial keeps index small.
57
+ CREATE INDEX IF NOT EXISTS idx_insights_canonical_v2_active
58
+ ON ${schema}.insights (tenant_id, agent_id, insight_type, canonical_key_v2, created_at DESC)
59
+ WHERE status = 'active' AND canonical_key_v2 IS NOT NULL;
60
+
61
+ -- Coerce pre-1.5.1 unsized `vector` column to a sized type so HNSW can be
62
+ -- built. Pre-1.5.1 declared `embedding vector` (no dim) which makes HNSW
63
+ -- creation permanently impossible — the "defer until first row" pattern
64
+ -- was a broken diagnosis of the real problem (pgvector needs a dim on the
65
+ -- COLUMN, not just the data). Idempotent: skipped if already sized.
66
+ -- Dim priority: existing row dim > `aquifer.embedding_dim` GUC > 1024 default.
67
+ -- Note: ${schema} is substituted to a quoted identifier by the loader, so
68
+ -- we string-concat rather than format(%I, ...) to avoid double-quoting.
69
+ DO $$
70
+ DECLARE
71
+ is_unsized BOOLEAN;
72
+ existing_dim INT;
73
+ target_dim INT;
74
+ BEGIN
75
+ SELECT format_type(atttypid, atttypmod) = 'vector'
76
+ INTO is_unsized
77
+ FROM pg_attribute
78
+ WHERE attrelid = '${schema}.insights'::regclass
79
+ AND attname = 'embedding';
80
+
81
+ IF is_unsized THEN
82
+ EXECUTE 'SELECT vector_dims(embedding) FROM ${schema}.insights WHERE embedding IS NOT NULL LIMIT 1'
83
+ INTO existing_dim;
84
+ target_dim := COALESCE(
85
+ existing_dim,
86
+ NULLIF(current_setting('aquifer.embedding_dim', true), '')::int,
87
+ 1024
88
+ );
89
+ EXECUTE 'ALTER TABLE ${schema}.insights ALTER COLUMN embedding TYPE vector('
90
+ || target_dim::text
91
+ || ') USING embedding::vector('
92
+ || target_dim::text
93
+ || ')';
94
+ RAISE NOTICE '[aquifer] insights.embedding coerced from unsized vector to vector(%)', target_dim;
95
+ END IF;
96
+ END$$;
97
+
98
+ -- Vector index: HNSW for cosine distance, only over active insights with
99
+ -- embeddings. Column is now sized so this builds on fresh installs too.
100
+ -- Defer / out-of-memory / unavailable handlers kept as safety nets.
101
+ DO $$
102
+ BEGIN
103
+ EXECUTE 'CREATE INDEX IF NOT EXISTS idx_insights_embedding
104
+ ON ${schema}.insights USING hnsw (embedding vector_cosine_ops)
105
+ WHERE status = ''active'' AND embedding IS NOT NULL';
106
+ EXCEPTION
107
+ WHEN undefined_object THEN
108
+ RAISE NOTICE '[aquifer] pgvector hnsw operator not available; skipping HNSW index on insights';
109
+ WHEN feature_not_supported THEN
110
+ RAISE NOTICE '[aquifer] HNSW not available on this pgvector; upgrade to >= 0.5.0 for index-accelerated insights recall';
111
+ WHEN out_of_memory THEN
112
+ RAISE WARNING '[aquifer] HNSW build on insights.embedding ran out of memory; raise maintenance_work_mem and re-run migrate()';
113
+ WHEN program_limit_exceeded THEN
114
+ RAISE WARNING '[aquifer] HNSW build on insights.embedding exceeded an internal limit; inspect pgvector logs';
115
+ END$$;
116
+
117
+ -- Diagnostic: who-references-which-session, for audit / re-extraction.
118
+ CREATE INDEX IF NOT EXISTS idx_insights_source_sessions
119
+ ON ${schema}.insights USING GIN (source_session_ids)
120
+ WHERE status = 'active';
121
+
122
+ COMMENT ON TABLE ${schema}.insights IS
123
+ 'Higher-order observations distilled from sessions. NOT facts (use entity_state_history). NOT raw recap (use session_summaries). Reflection / skill memory.';
124
+
125
+ COMMENT ON COLUMN ${schema}.insights.insight_type IS
126
+ 'preference = stable user preference; pattern = recurring behaviour/decision; frustration = repeated pain point; workflow = reusable procedure that worked.';
127
+
128
+ COMMENT ON COLUMN ${schema}.insights.evidence_window IS
129
+ 'Time range of source sessions used to derive this insight. Half-open by convention.';
130
+
131
+ COMMENT ON COLUMN ${schema}.insights.importance IS
132
+ 'Caller-supplied [0,1]; recall ranking blends with semantic score and recency.';
133
+
134
+ COMMENT ON COLUMN ${schema}.insights.canonical_key_v2 IS
135
+ 'Phase 2 C1: stable claim identity = sha256(tenant|agent|type|normalizeCanonicalClaim(claim)|normalizeEntitySet(entities)). Survives LLM title drift. idempotency_key tracks revisions within a claim.';
136
+
137
+ COMMENT ON COLUMN ${schema}.insights.idempotency_key IS
138
+ 'Revision-level dedupe key. Default in writer: sha256(canonical_key_v2, normalized_body, sorted_session_ids, window). Same claim in same window with same body = duplicate; body change or window extend = new revision (old superseded).';