@shadowforge0/aquifer-memory 1.2.1 → 1.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -9
- package/consumers/cli.js +11 -1
- package/consumers/default/index.js +17 -4
- package/consumers/mcp.js +21 -0
- package/consumers/miranda/index.js +15 -4
- package/consumers/miranda/profile.json +145 -0
- package/consumers/miranda/recall-format.js +5 -3
- package/consumers/miranda/render-daily-md.js +186 -0
- package/consumers/shared/config.js +8 -0
- package/consumers/shared/factory.js +2 -1
- package/consumers/shared/llm.js +1 -1
- package/consumers/shared/recall-format.js +21 -1
- package/core/aquifer.js +693 -87
- package/core/artifacts.js +174 -0
- package/core/bundles.js +400 -0
- package/core/consolidation.js +340 -0
- package/core/decisions.js +164 -0
- package/core/entity-state.js +483 -0
- package/core/errors.js +97 -0
- package/core/handoff.js +153 -0
- package/core/insights.js +499 -0
- package/core/mcp-manifest.js +131 -0
- package/core/narratives.js +212 -0
- package/core/profiles.js +171 -0
- package/core/state.js +163 -0
- package/core/storage.js +82 -5
- package/core/timeline.js +152 -0
- package/index.js +14 -0
- package/package.json +1 -1
- package/pipeline/extract-state-changes.js +205 -0
- package/schema/001-base.sql +186 -16
- package/schema/002-entities.sql +35 -1
- package/schema/004-completion.sql +391 -0
- package/schema/005-entity-state-history.sql +87 -0
- package/schema/006-insights.sql +138 -0
- package/scripts/diagnose-fts-zh.js +37 -4
- package/scripts/drop-entity-state-history.sql +17 -0
- package/scripts/drop-insights.sql +12 -0
- package/scripts/extract-insights-from-recent-sessions.js +315 -0
- package/scripts/find-dburl-hints.js +29 -0
- package/scripts/queries.json +45 -0
- package/scripts/retro-recall-bench.js +409 -0
- package/scripts/sample-bench-queries.sql +75 -0
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
-- 004-completion.sql — cross-session completion schema (P1 foundation)
|
|
2
|
+
--
|
|
3
|
+
-- Adds the minimal DDL needed for the aquifer-completion capability surface:
|
|
4
|
+
-- * shared set_updated_at() trigger function (reused by narratives, consumer_profiles,
|
|
5
|
+
-- and future completion tables)
|
|
6
|
+
-- * sessions.consolidation_phases JSONB (per-phase state map; see consolidation
|
|
7
|
+
-- orchestration spec)
|
|
8
|
+
-- * narratives table — cross-session state snapshot with supersede chain
|
|
9
|
+
-- * consumer_profiles table — consumer schema registry with composite primary key
|
|
10
|
+
-- (tenant_id, consumer_id, version) for future multi-tenant safety
|
|
11
|
+
--
|
|
12
|
+
-- All identifiers stay parameterised on ${schema} so P4 schema rename
|
|
13
|
+
-- (miranda → aquifer) is a one-line config change rather than a DDL rewrite.
|
|
14
|
+
|
|
15
|
+
-- Ensure pg_trgm available (used by existing migrations; re-declared for independent
|
|
16
|
+
-- run safety).
|
|
17
|
+
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
18
|
+
|
|
19
|
+
-- Shared trigger: bump updated_at on row modification.
|
|
20
|
+
CREATE OR REPLACE FUNCTION ${schema}.set_updated_at()
|
|
21
|
+
RETURNS trigger
|
|
22
|
+
LANGUAGE plpgsql
|
|
23
|
+
AS $$
|
|
24
|
+
BEGIN
|
|
25
|
+
NEW.updated_at := now();
|
|
26
|
+
RETURN NEW;
|
|
27
|
+
END;
|
|
28
|
+
$$;
|
|
29
|
+
|
|
30
|
+
-- sessions.consolidation_phases: per-phase state map keyed by phase name.
|
|
31
|
+
-- Shape (documented in spec, enforced at application layer):
|
|
32
|
+
-- {
|
|
33
|
+
-- "<phase>": {
|
|
34
|
+
-- "status": "pending|claimed|running|succeeded|failed|skipped",
|
|
35
|
+
-- "attempts": int,
|
|
36
|
+
-- "idempotencyKey": string?, "claimToken": string?, "workerId": string?,
|
|
37
|
+
-- "startedAt": iso?, "finishedAt": iso?, "retryAfter": iso?,
|
|
38
|
+
-- "errorCode": string?, "errorMessage": string?,
|
|
39
|
+
-- "outputRef": { ... }?
|
|
40
|
+
-- }
|
|
41
|
+
-- }
|
|
42
|
+
ALTER TABLE ${schema}.sessions
|
|
43
|
+
ADD COLUMN IF NOT EXISTS consolidation_phases JSONB NOT NULL DEFAULT '{}'::jsonb;
|
|
44
|
+
|
|
45
|
+
-- narratives: cross-session state snapshots with scope-based addressing and
|
|
46
|
+
-- supersede chain. Only one 'active' row per (tenant, agent, scope, scope_key).
|
|
47
|
+
CREATE TABLE IF NOT EXISTS ${schema}.narratives (
|
|
48
|
+
id BIGSERIAL PRIMARY KEY,
|
|
49
|
+
tenant_id TEXT NOT NULL DEFAULT 'default',
|
|
50
|
+
session_row_id BIGINT REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
|
|
51
|
+
source_session_id TEXT,
|
|
52
|
+
agent_id TEXT NOT NULL DEFAULT 'main',
|
|
53
|
+
consumer_profile_id TEXT NOT NULL,
|
|
54
|
+
consumer_profile_version INT NOT NULL,
|
|
55
|
+
consumer_schema_hash TEXT NOT NULL,
|
|
56
|
+
idempotency_key TEXT UNIQUE,
|
|
57
|
+
scope TEXT NOT NULL DEFAULT 'agent'
|
|
58
|
+
CHECK (scope IN ('agent', 'workspace', 'project', 'custom')),
|
|
59
|
+
scope_key TEXT NOT NULL,
|
|
60
|
+
text TEXT NOT NULL,
|
|
61
|
+
status TEXT NOT NULL DEFAULT 'active'
|
|
62
|
+
CHECK (status IN ('active', 'archived', 'superseded')),
|
|
63
|
+
based_on_fact_ids BIGINT[] NOT NULL DEFAULT '{}',
|
|
64
|
+
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
65
|
+
superseded_by_narrative_id BIGINT REFERENCES ${schema}.narratives(id) ON DELETE SET NULL,
|
|
66
|
+
effective_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
67
|
+
search_tsv TSVECTOR,
|
|
68
|
+
search_text TEXT,
|
|
69
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
70
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
71
|
+
);
|
|
72
|
+
|
|
73
|
+
-- Only one active narrative per (tenant, agent, scope, scope_key).
|
|
74
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_narratives_active_scope
|
|
75
|
+
ON ${schema}.narratives (tenant_id, agent_id, scope, scope_key)
|
|
76
|
+
WHERE status = 'active';
|
|
77
|
+
|
|
78
|
+
CREATE INDEX IF NOT EXISTS idx_narratives_effective_at
|
|
79
|
+
ON ${schema}.narratives (tenant_id, agent_id, effective_at DESC);
|
|
80
|
+
|
|
81
|
+
CREATE INDEX IF NOT EXISTS idx_narratives_search_tsv
|
|
82
|
+
ON ${schema}.narratives USING GIN (search_tsv);
|
|
83
|
+
|
|
84
|
+
CREATE INDEX IF NOT EXISTS idx_narratives_search_text_trgm
|
|
85
|
+
ON ${schema}.narratives USING GIN (search_text gin_trgm_ops);
|
|
86
|
+
|
|
87
|
+
CREATE OR REPLACE FUNCTION ${schema}.narratives_search_tsv_update()
|
|
88
|
+
RETURNS trigger
|
|
89
|
+
LANGUAGE plpgsql
|
|
90
|
+
AS $$
|
|
91
|
+
BEGIN
|
|
92
|
+
NEW.search_text := COALESCE(NEW.text, '') || ' ' || COALESCE(NEW.metadata::text, '');
|
|
93
|
+
IF EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'zhcfg') THEN
|
|
94
|
+
NEW.search_tsv := setweight(to_tsvector('zhcfg', COALESCE(NEW.text, '')), 'A');
|
|
95
|
+
ELSE
|
|
96
|
+
NEW.search_tsv := setweight(to_tsvector('simple', COALESCE(NEW.text, '')), 'A');
|
|
97
|
+
END IF;
|
|
98
|
+
RETURN NEW;
|
|
99
|
+
END;
|
|
100
|
+
$$;
|
|
101
|
+
|
|
102
|
+
DROP TRIGGER IF EXISTS trg_narratives_search_tsv ON ${schema}.narratives;
|
|
103
|
+
CREATE TRIGGER trg_narratives_search_tsv
|
|
104
|
+
BEFORE INSERT OR UPDATE OF text, metadata
|
|
105
|
+
ON ${schema}.narratives
|
|
106
|
+
FOR EACH ROW
|
|
107
|
+
EXECUTE FUNCTION ${schema}.narratives_search_tsv_update();
|
|
108
|
+
|
|
109
|
+
DROP TRIGGER IF EXISTS trg_narratives_updated_at ON ${schema}.narratives;
|
|
110
|
+
CREATE TRIGGER trg_narratives_updated_at
|
|
111
|
+
BEFORE UPDATE ON ${schema}.narratives
|
|
112
|
+
FOR EACH ROW
|
|
113
|
+
EXECUTE FUNCTION ${schema}.set_updated_at();
|
|
114
|
+
|
|
115
|
+
-- consumer_profiles: registry for consumer output contracts.
|
|
116
|
+
-- Composite primary key (tenant_id, consumer_id, version) future-proofs multi-tenant.
|
|
117
|
+
-- profile_hash UNIQUE per (consumer_id, version) catches accidental hash drift within
|
|
118
|
+
-- a consumer version.
|
|
119
|
+
CREATE TABLE IF NOT EXISTS ${schema}.consumer_profiles (
|
|
120
|
+
tenant_id TEXT NOT NULL DEFAULT 'default',
|
|
121
|
+
consumer_id TEXT NOT NULL,
|
|
122
|
+
version INT NOT NULL,
|
|
123
|
+
profile_hash TEXT NOT NULL,
|
|
124
|
+
profile_json JSONB NOT NULL,
|
|
125
|
+
loaded_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
126
|
+
deprecated_at TIMESTAMPTZ,
|
|
127
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
128
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
129
|
+
PRIMARY KEY (tenant_id, consumer_id, version),
|
|
130
|
+
UNIQUE (consumer_id, version, profile_hash)
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
CREATE INDEX IF NOT EXISTS idx_consumer_profiles_active
|
|
134
|
+
ON ${schema}.consumer_profiles (tenant_id, consumer_id, version DESC)
|
|
135
|
+
WHERE deprecated_at IS NULL;
|
|
136
|
+
|
|
137
|
+
DROP TRIGGER IF EXISTS trg_consumer_profiles_updated_at ON ${schema}.consumer_profiles;
|
|
138
|
+
CREATE TRIGGER trg_consumer_profiles_updated_at
|
|
139
|
+
BEFORE UPDATE ON ${schema}.consumer_profiles
|
|
140
|
+
FOR EACH ROW
|
|
141
|
+
EXECUTE FUNCTION ${schema}.set_updated_at();
|
|
142
|
+
|
|
143
|
+
-- timeline_events: append-only event log keyed by (tenant, agent, occurred_at).
|
|
144
|
+
-- category vocabulary is consumer-owned (focus/todo/mood/handoff/narrative/cli
|
|
145
|
+
-- for Miranda default), event shape is strict core. idempotency_key UNIQUE
|
|
146
|
+
-- across the table to make caller-driven dedupe safe.
|
|
147
|
+
CREATE TABLE IF NOT EXISTS ${schema}.timeline_events (
|
|
148
|
+
id BIGSERIAL PRIMARY KEY,
|
|
149
|
+
tenant_id TEXT NOT NULL DEFAULT 'default',
|
|
150
|
+
session_row_id BIGINT REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
|
|
151
|
+
source_session_id TEXT,
|
|
152
|
+
agent_id TEXT NOT NULL DEFAULT 'main',
|
|
153
|
+
consumer_profile_id TEXT NOT NULL,
|
|
154
|
+
consumer_profile_version INT NOT NULL,
|
|
155
|
+
consumer_schema_hash TEXT NOT NULL,
|
|
156
|
+
idempotency_key TEXT UNIQUE,
|
|
157
|
+
occurred_at TIMESTAMPTZ NOT NULL,
|
|
158
|
+
source TEXT NOT NULL,
|
|
159
|
+
session_ref TEXT,
|
|
160
|
+
category TEXT NOT NULL,
|
|
161
|
+
text TEXT NOT NULL,
|
|
162
|
+
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
163
|
+
search_tsv TSVECTOR,
|
|
164
|
+
search_text TEXT,
|
|
165
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
166
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
167
|
+
);
|
|
168
|
+
|
|
169
|
+
CREATE INDEX IF NOT EXISTS idx_timeline_events_occurred_at
|
|
170
|
+
ON ${schema}.timeline_events (tenant_id, agent_id, occurred_at DESC);
|
|
171
|
+
|
|
172
|
+
CREATE INDEX IF NOT EXISTS idx_timeline_events_category
|
|
173
|
+
ON ${schema}.timeline_events (tenant_id, agent_id, category, occurred_at DESC);
|
|
174
|
+
|
|
175
|
+
CREATE INDEX IF NOT EXISTS idx_timeline_events_search_tsv
|
|
176
|
+
ON ${schema}.timeline_events USING GIN (search_tsv);
|
|
177
|
+
|
|
178
|
+
CREATE INDEX IF NOT EXISTS idx_timeline_events_search_text_trgm
|
|
179
|
+
ON ${schema}.timeline_events USING GIN (search_text gin_trgm_ops);
|
|
180
|
+
|
|
181
|
+
CREATE OR REPLACE FUNCTION ${schema}.timeline_events_search_tsv_update()
|
|
182
|
+
RETURNS trigger
|
|
183
|
+
LANGUAGE plpgsql
|
|
184
|
+
AS $$
|
|
185
|
+
BEGIN
|
|
186
|
+
NEW.search_text :=
|
|
187
|
+
COALESCE(NEW.category, '') || ' ' ||
|
|
188
|
+
COALESCE(NEW.text, '') || ' ' ||
|
|
189
|
+
COALESCE(NEW.metadata::text, '');
|
|
190
|
+
|
|
191
|
+
IF EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'zhcfg') THEN
|
|
192
|
+
NEW.search_tsv :=
|
|
193
|
+
setweight(to_tsvector('zhcfg', COALESCE(NEW.category, '')), 'B') ||
|
|
194
|
+
setweight(to_tsvector('zhcfg', COALESCE(NEW.text, '')), 'A');
|
|
195
|
+
ELSE
|
|
196
|
+
NEW.search_tsv :=
|
|
197
|
+
setweight(to_tsvector('simple', COALESCE(NEW.category, '')), 'B') ||
|
|
198
|
+
setweight(to_tsvector('simple', COALESCE(NEW.text, '')), 'A');
|
|
199
|
+
END IF;
|
|
200
|
+
|
|
201
|
+
RETURN NEW;
|
|
202
|
+
END;
|
|
203
|
+
$$;
|
|
204
|
+
|
|
205
|
+
DROP TRIGGER IF EXISTS trg_timeline_events_search_tsv ON ${schema}.timeline_events;
|
|
206
|
+
CREATE TRIGGER trg_timeline_events_search_tsv
|
|
207
|
+
BEFORE INSERT OR UPDATE OF category, text, metadata
|
|
208
|
+
ON ${schema}.timeline_events
|
|
209
|
+
FOR EACH ROW
|
|
210
|
+
EXECUTE FUNCTION ${schema}.timeline_events_search_tsv_update();
|
|
211
|
+
|
|
212
|
+
DROP TRIGGER IF EXISTS trg_timeline_events_updated_at ON ${schema}.timeline_events;
|
|
213
|
+
CREATE TRIGGER trg_timeline_events_updated_at
|
|
214
|
+
BEFORE UPDATE ON ${schema}.timeline_events
|
|
215
|
+
FOR EACH ROW
|
|
216
|
+
EXECUTE FUNCTION ${schema}.set_updated_at();
|
|
217
|
+
|
|
218
|
+
-- session_states: latest-snapshot-per-scope with supersede chain.
|
|
219
|
+
-- is_latest + partial unique index enforces at-most-one latest per
|
|
220
|
+
-- (tenant, agent, scope_key); writer supersedes prior latest atomically.
|
|
221
|
+
CREATE TABLE IF NOT EXISTS ${schema}.session_states (
|
|
222
|
+
id BIGSERIAL PRIMARY KEY,
|
|
223
|
+
tenant_id TEXT NOT NULL DEFAULT 'default',
|
|
224
|
+
session_row_id BIGINT REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
|
|
225
|
+
source_session_id TEXT,
|
|
226
|
+
agent_id TEXT NOT NULL DEFAULT 'main',
|
|
227
|
+
scope_key TEXT NOT NULL,
|
|
228
|
+
consumer_profile_id TEXT NOT NULL,
|
|
229
|
+
consumer_profile_version INT NOT NULL,
|
|
230
|
+
consumer_schema_hash TEXT NOT NULL,
|
|
231
|
+
idempotency_key TEXT UNIQUE,
|
|
232
|
+
goal TEXT,
|
|
233
|
+
active_work JSONB NOT NULL DEFAULT '[]'::jsonb,
|
|
234
|
+
blockers JSONB NOT NULL DEFAULT '[]'::jsonb,
|
|
235
|
+
affect JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
236
|
+
payload JSONB NOT NULL,
|
|
237
|
+
is_latest BOOLEAN NOT NULL DEFAULT true,
|
|
238
|
+
supersedes_state_id BIGINT REFERENCES ${schema}.session_states(id) ON DELETE SET NULL,
|
|
239
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
240
|
+
);
|
|
241
|
+
|
|
242
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_session_states_latest
|
|
243
|
+
ON ${schema}.session_states (tenant_id, agent_id, scope_key)
|
|
244
|
+
WHERE is_latest = true;
|
|
245
|
+
|
|
246
|
+
CREATE INDEX IF NOT EXISTS idx_session_states_agent
|
|
247
|
+
ON ${schema}.session_states (tenant_id, agent_id, created_at DESC);
|
|
248
|
+
|
|
249
|
+
-- session_handoffs: append-only handoff log. getLatest by (agent) or (agent, session).
|
|
250
|
+
-- No latest-enforcement — every write is a row; retrieval sorts by created_at DESC.
|
|
251
|
+
CREATE TABLE IF NOT EXISTS ${schema}.session_handoffs (
|
|
252
|
+
id BIGSERIAL PRIMARY KEY,
|
|
253
|
+
tenant_id TEXT NOT NULL DEFAULT 'default',
|
|
254
|
+
session_row_id BIGINT REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
|
|
255
|
+
source_session_id TEXT NOT NULL,
|
|
256
|
+
agent_id TEXT NOT NULL DEFAULT 'main',
|
|
257
|
+
consumer_profile_id TEXT NOT NULL,
|
|
258
|
+
consumer_profile_version INT NOT NULL,
|
|
259
|
+
consumer_schema_hash TEXT NOT NULL,
|
|
260
|
+
idempotency_key TEXT UNIQUE,
|
|
261
|
+
status TEXT NOT NULL,
|
|
262
|
+
last_step TEXT,
|
|
263
|
+
next_step TEXT,
|
|
264
|
+
blockers JSONB NOT NULL DEFAULT '[]'::jsonb,
|
|
265
|
+
decided JSONB NOT NULL DEFAULT '[]'::jsonb,
|
|
266
|
+
open_loops JSONB NOT NULL DEFAULT '[]'::jsonb,
|
|
267
|
+
payload JSONB NOT NULL,
|
|
268
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
269
|
+
);
|
|
270
|
+
|
|
271
|
+
CREATE INDEX IF NOT EXISTS idx_session_handoffs_agent
|
|
272
|
+
ON ${schema}.session_handoffs (tenant_id, agent_id, created_at DESC);
|
|
273
|
+
|
|
274
|
+
CREATE INDEX IF NOT EXISTS idx_session_handoffs_session
|
|
275
|
+
ON ${schema}.session_handoffs (tenant_id, source_session_id, created_at DESC);
|
|
276
|
+
|
|
277
|
+
-- decisions: append-only decision log. status vocabulary
|
|
278
|
+
-- (proposed/committed/reversed) lives in a CHECK constraint so bad writes
|
|
279
|
+
-- fail at DB boundary. reversed_by_decision_id forms a supersede chain.
|
|
280
|
+
CREATE TABLE IF NOT EXISTS ${schema}.decisions (
|
|
281
|
+
id BIGSERIAL PRIMARY KEY,
|
|
282
|
+
tenant_id TEXT NOT NULL DEFAULT 'default',
|
|
283
|
+
session_row_id BIGINT REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
|
|
284
|
+
source_session_id TEXT,
|
|
285
|
+
agent_id TEXT NOT NULL DEFAULT 'main',
|
|
286
|
+
consumer_profile_id TEXT NOT NULL,
|
|
287
|
+
consumer_profile_version INT NOT NULL,
|
|
288
|
+
consumer_schema_hash TEXT NOT NULL,
|
|
289
|
+
idempotency_key TEXT UNIQUE,
|
|
290
|
+
payload JSONB NOT NULL,
|
|
291
|
+
status TEXT NOT NULL
|
|
292
|
+
CHECK (status IN ('proposed', 'committed', 'reversed')),
|
|
293
|
+
decision_text TEXT NOT NULL,
|
|
294
|
+
reason_text TEXT,
|
|
295
|
+
decided_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
296
|
+
reversed_by_decision_id BIGINT REFERENCES ${schema}.decisions(id) ON DELETE SET NULL,
|
|
297
|
+
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
298
|
+
search_tsv TSVECTOR,
|
|
299
|
+
search_text TEXT,
|
|
300
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
301
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
302
|
+
);
|
|
303
|
+
|
|
304
|
+
CREATE INDEX IF NOT EXISTS idx_decisions_status
|
|
305
|
+
ON ${schema}.decisions (tenant_id, agent_id, status, decided_at DESC);
|
|
306
|
+
|
|
307
|
+
CREATE INDEX IF NOT EXISTS idx_decisions_session
|
|
308
|
+
ON ${schema}.decisions (tenant_id, source_session_id);
|
|
309
|
+
|
|
310
|
+
CREATE INDEX IF NOT EXISTS idx_decisions_search_tsv
|
|
311
|
+
ON ${schema}.decisions USING GIN (search_tsv);
|
|
312
|
+
|
|
313
|
+
CREATE OR REPLACE FUNCTION ${schema}.decisions_search_tsv_update()
|
|
314
|
+
RETURNS trigger
|
|
315
|
+
LANGUAGE plpgsql
|
|
316
|
+
AS $$
|
|
317
|
+
BEGIN
|
|
318
|
+
NEW.search_text :=
|
|
319
|
+
COALESCE(NEW.decision_text, '') || ' ' ||
|
|
320
|
+
COALESCE(NEW.reason_text, '') || ' ' ||
|
|
321
|
+
COALESCE(NEW.metadata::text, '');
|
|
322
|
+
|
|
323
|
+
IF EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'zhcfg') THEN
|
|
324
|
+
NEW.search_tsv :=
|
|
325
|
+
setweight(to_tsvector('zhcfg', COALESCE(NEW.decision_text, '')), 'A') ||
|
|
326
|
+
setweight(to_tsvector('zhcfg', COALESCE(NEW.reason_text, '')), 'B');
|
|
327
|
+
ELSE
|
|
328
|
+
NEW.search_tsv :=
|
|
329
|
+
setweight(to_tsvector('simple', COALESCE(NEW.decision_text, '')), 'A') ||
|
|
330
|
+
setweight(to_tsvector('simple', COALESCE(NEW.reason_text, '')), 'B');
|
|
331
|
+
END IF;
|
|
332
|
+
|
|
333
|
+
RETURN NEW;
|
|
334
|
+
END;
|
|
335
|
+
$$;
|
|
336
|
+
|
|
337
|
+
DROP TRIGGER IF EXISTS trg_decisions_search_tsv ON ${schema}.decisions;
|
|
338
|
+
CREATE TRIGGER trg_decisions_search_tsv
|
|
339
|
+
BEFORE INSERT OR UPDATE OF decision_text, reason_text, metadata
|
|
340
|
+
ON ${schema}.decisions
|
|
341
|
+
FOR EACH ROW
|
|
342
|
+
EXECUTE FUNCTION ${schema}.decisions_search_tsv_update();
|
|
343
|
+
|
|
344
|
+
DROP TRIGGER IF EXISTS trg_decisions_updated_at ON ${schema}.decisions;
|
|
345
|
+
CREATE TRIGGER trg_decisions_updated_at
|
|
346
|
+
BEFORE UPDATE ON ${schema}.decisions
|
|
347
|
+
FOR EACH ROW
|
|
348
|
+
EXECUTE FUNCTION ${schema}.set_updated_at();
|
|
349
|
+
|
|
350
|
+
-- artifacts: records producer-declared outputs (daily md, render, export).
|
|
351
|
+
-- Aquifer doesn't interpret payload — producers own shape. status lifecycle
|
|
352
|
+
-- pending → produced|failed|discarded.
|
|
353
|
+
CREATE TABLE IF NOT EXISTS ${schema}.artifacts (
|
|
354
|
+
id BIGSERIAL PRIMARY KEY,
|
|
355
|
+
tenant_id TEXT NOT NULL DEFAULT 'default',
|
|
356
|
+
session_row_id BIGINT REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
|
|
357
|
+
source_session_id TEXT,
|
|
358
|
+
agent_id TEXT NOT NULL DEFAULT 'main',
|
|
359
|
+
consumer_profile_id TEXT NOT NULL,
|
|
360
|
+
consumer_profile_version INT NOT NULL,
|
|
361
|
+
consumer_schema_hash TEXT NOT NULL,
|
|
362
|
+
idempotency_key TEXT UNIQUE,
|
|
363
|
+
producer_id TEXT NOT NULL,
|
|
364
|
+
artifact_type TEXT NOT NULL,
|
|
365
|
+
trigger_phase TEXT,
|
|
366
|
+
format TEXT NOT NULL,
|
|
367
|
+
destination TEXT NOT NULL,
|
|
368
|
+
status TEXT NOT NULL DEFAULT 'pending'
|
|
369
|
+
CHECK (status IN ('pending', 'produced', 'failed', 'discarded')),
|
|
370
|
+
content_ref TEXT,
|
|
371
|
+
payload JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
372
|
+
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
373
|
+
produced_at TIMESTAMPTZ,
|
|
374
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
375
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
376
|
+
);
|
|
377
|
+
|
|
378
|
+
CREATE INDEX IF NOT EXISTS idx_artifacts_lookup
|
|
379
|
+
ON ${schema}.artifacts (tenant_id, agent_id, producer_id, created_at DESC);
|
|
380
|
+
|
|
381
|
+
CREATE INDEX IF NOT EXISTS idx_artifacts_session
|
|
382
|
+
ON ${schema}.artifacts (tenant_id, source_session_id, created_at DESC);
|
|
383
|
+
|
|
384
|
+
CREATE INDEX IF NOT EXISTS idx_artifacts_status
|
|
385
|
+
ON ${schema}.artifacts (tenant_id, status, created_at DESC);
|
|
386
|
+
|
|
387
|
+
DROP TRIGGER IF EXISTS trg_artifacts_updated_at ON ${schema}.artifacts;
|
|
388
|
+
CREATE TRIGGER trg_artifacts_updated_at
|
|
389
|
+
BEFORE UPDATE ON ${schema}.artifacts
|
|
390
|
+
FOR EACH ROW
|
|
391
|
+
EXECUTE FUNCTION ${schema}.set_updated_at();
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
-- entity_state_history: temporal state-change tracking on entities.
|
|
2
|
+
--
|
|
3
|
+
-- Captures discrete attribute transitions (e.g. version.stable=1.2.1 -> 1.3.0,
|
|
4
|
+
-- editor.preference=vim -> nvim). Designed as additive overlay on the entities
|
|
5
|
+
-- table; DROP-clean — no triggers/functions/views, removing this table leaves
|
|
6
|
+
-- the rest of Aquifer untouched.
|
|
7
|
+
--
|
|
8
|
+
-- See spec.md Q3 and ~/.claude/develop-runs/20260419-142432-aquifer-memory-routes/.
|
|
9
|
+
|
|
10
|
+
CREATE TABLE IF NOT EXISTS ${schema}.entity_state_history (
|
|
11
|
+
id BIGSERIAL PRIMARY KEY,
|
|
12
|
+
tenant_id TEXT NOT NULL DEFAULT 'default',
|
|
13
|
+
agent_id TEXT NOT NULL DEFAULT 'main',
|
|
14
|
+
entity_id BIGINT NOT NULL
|
|
15
|
+
REFERENCES ${schema}.entities(id) ON DELETE CASCADE,
|
|
16
|
+
session_row_id BIGINT
|
|
17
|
+
REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
|
|
18
|
+
evidence_session_id TEXT,
|
|
19
|
+
attribute TEXT NOT NULL CHECK (btrim(attribute) <> ''),
|
|
20
|
+
value JSONB NOT NULL,
|
|
21
|
+
valid_from TIMESTAMPTZ NOT NULL,
|
|
22
|
+
valid_to TIMESTAMPTZ,
|
|
23
|
+
evidence_text TEXT NOT NULL DEFAULT '',
|
|
24
|
+
confidence NUMERIC(4,3) NOT NULL DEFAULT 0.7
|
|
25
|
+
CHECK (confidence >= 0 AND confidence <= 1),
|
|
26
|
+
source TEXT NOT NULL DEFAULT 'llm'
|
|
27
|
+
CHECK (source IN ('llm', 'manual', 'infra')),
|
|
28
|
+
idempotency_key TEXT,
|
|
29
|
+
supersedes_state_id BIGINT
|
|
30
|
+
REFERENCES ${schema}.entity_state_history(id) ON DELETE SET NULL,
|
|
31
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
32
|
+
CHECK (valid_to IS NULL OR valid_to > valid_from)
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
-- Partial UNIQUE: only one "current" (valid_to IS NULL) row per
|
|
36
|
+
-- (tenant, agent, entity, attribute). This is the temporal invariant —
|
|
37
|
+
-- two open intervals on the same key would mean the table is corrupt.
|
|
38
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_entity_state_history_current
|
|
39
|
+
ON ${schema}.entity_state_history (tenant_id, agent_id, entity_id, attribute)
|
|
40
|
+
WHERE valid_to IS NULL;
|
|
41
|
+
|
|
42
|
+
-- Idempotency: same caller-supplied key writes once. Partial allows NULL keys
|
|
43
|
+
-- (manual writes don't always need them).
|
|
44
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_entity_state_history_idempotency
|
|
45
|
+
ON ${schema}.entity_state_history (idempotency_key)
|
|
46
|
+
WHERE idempotency_key IS NOT NULL;
|
|
47
|
+
|
|
48
|
+
-- Hot path: history-by-attribute timeline scan, newest-first.
|
|
49
|
+
CREATE INDEX IF NOT EXISTS idx_entity_state_history_entity_attr_time
|
|
50
|
+
ON ${schema}.entity_state_history
|
|
51
|
+
(tenant_id, agent_id, entity_id, attribute, valid_from DESC, id DESC);
|
|
52
|
+
|
|
53
|
+
-- Hot path: full history for an entity (no attribute filter).
|
|
54
|
+
CREATE INDEX IF NOT EXISTS idx_entity_state_history_entity_time
|
|
55
|
+
ON ${schema}.entity_state_history
|
|
56
|
+
(tenant_id, agent_id, entity_id, valid_from DESC, id DESC);
|
|
57
|
+
|
|
58
|
+
-- Diagnostic: trace all state changes captured from a single session.
|
|
59
|
+
CREATE INDEX IF NOT EXISTS idx_entity_state_history_evidence_session
|
|
60
|
+
ON ${schema}.entity_state_history
|
|
61
|
+
(tenant_id, agent_id, evidence_session_id, created_at DESC)
|
|
62
|
+
WHERE evidence_session_id IS NOT NULL;
|
|
63
|
+
|
|
64
|
+
CREATE INDEX IF NOT EXISTS idx_entity_state_history_session_row
|
|
65
|
+
ON ${schema}.entity_state_history (session_row_id)
|
|
66
|
+
WHERE session_row_id IS NOT NULL;
|
|
67
|
+
|
|
68
|
+
COMMENT ON TABLE ${schema}.entity_state_history IS
|
|
69
|
+
'Bi-temporal state changes on entities. Each row = one (entity, attribute) value valid over [valid_from, valid_to). NULL valid_to = current. supersedes_state_id chains supersession history.';
|
|
70
|
+
|
|
71
|
+
COMMENT ON COLUMN ${schema}.entity_state_history.attribute IS
|
|
72
|
+
'Stable snake_case path identifying what changed (e.g. version.stable, editor.preference, runtime.node.version). Caller-defined; treat as opaque key.';
|
|
73
|
+
|
|
74
|
+
COMMENT ON COLUMN ${schema}.entity_state_history.valid_from IS
|
|
75
|
+
'When the new value became true in the real world (not when it was observed). Use evidence anchor; fall back to session started_at if unspecified.';
|
|
76
|
+
|
|
77
|
+
COMMENT ON COLUMN ${schema}.entity_state_history.valid_to IS
|
|
78
|
+
'NULL = currently valid. Otherwise, the timestamp at which a successor row took over. Closed intervals must satisfy valid_to > valid_from.';
|
|
79
|
+
|
|
80
|
+
COMMENT ON COLUMN ${schema}.entity_state_history.idempotency_key IS
|
|
81
|
+
'Caller-supplied dedupe key. Default: sha256(tenant, agent, entity, attribute, canonical_json(value), valid_from, source). Replay safe.';
|
|
82
|
+
|
|
83
|
+
COMMENT ON COLUMN ${schema}.entity_state_history.supersedes_state_id IS
|
|
84
|
+
'Chain pointer to the row this one closed (set valid_to on). NULL if this is the first known value for (entity, attribute).';
|
|
85
|
+
|
|
86
|
+
COMMENT ON COLUMN ${schema}.entity_state_history.evidence_session_id IS
|
|
87
|
+
'Session that produced this evidence (text-level session_id, not session_row_id). For audit / re-extraction.';
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
-- insights: higher-order reflection from session content (Q4).
|
|
2
|
+
--
|
|
3
|
+
-- Holds preferences, recurring patterns, frustrations, and successful
|
|
4
|
+
-- workflows distilled from session_summaries over a window. Vector-indexed
|
|
5
|
+
-- for natural-language recall via aquifer.recallInsights().
|
|
6
|
+
--
|
|
7
|
+
-- DROP-clean: no triggers/functions, no FK from anywhere else into this table.
|
|
8
|
+
-- See scripts/drop-insights.sql.
|
|
9
|
+
|
|
10
|
+
CREATE TABLE IF NOT EXISTS ${schema}.insights (
|
|
11
|
+
id BIGSERIAL PRIMARY KEY,
|
|
12
|
+
tenant_id TEXT NOT NULL DEFAULT 'default',
|
|
13
|
+
agent_id TEXT NOT NULL,
|
|
14
|
+
insight_type TEXT NOT NULL
|
|
15
|
+
CHECK (insight_type IN ('preference', 'pattern', 'frustration', 'workflow')),
|
|
16
|
+
title TEXT NOT NULL CHECK (btrim(title) <> ''),
|
|
17
|
+
body TEXT NOT NULL CHECK (btrim(body) <> ''),
|
|
18
|
+
source_session_ids TEXT[] NOT NULL DEFAULT '{}',
|
|
19
|
+
evidence_window TSTZRANGE NOT NULL,
|
|
20
|
+
-- embedding: sized vector so HNSW can be built at migrate time. 1024 matches
|
|
21
|
+
-- the autodetect default (ollama bge-m3). Operators using a provider with
|
|
22
|
+
-- different dimensions (e.g. openai text-embedding-3-small = 1536) should
|
|
23
|
+
-- set `aquifer.embedding_dim` via GUC before running migrate(), or the
|
|
24
|
+
-- coerce block below will pick it up.
|
|
25
|
+
embedding vector(1024),
|
|
26
|
+
importance REAL NOT NULL DEFAULT 0.5
|
|
27
|
+
CHECK (importance >= 0 AND importance <= 1),
|
|
28
|
+
status TEXT NOT NULL DEFAULT 'active'
|
|
29
|
+
CHECK (status IN ('active', 'stale', 'superseded')),
|
|
30
|
+
superseded_by BIGINT REFERENCES ${schema}.insights(id) ON DELETE SET NULL,
|
|
31
|
+
idempotency_key TEXT,
|
|
32
|
+
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
33
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
34
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
35
|
+
);
|
|
36
|
+
|
|
37
|
+
-- Phase 2 C1: canonical_key_v2 identifies the CLAIM (type + canonicalClaim +
|
|
38
|
+
-- entitySet). idempotency_key keeps its revision-level role. Old rows have
|
|
39
|
+
-- canonical_key_v2 = NULL and are not retrofitted; new writes populate it.
|
|
40
|
+
ALTER TABLE ${schema}.insights
|
|
41
|
+
ADD COLUMN IF NOT EXISTS canonical_key_v2 TEXT;
|
|
42
|
+
|
|
43
|
+
-- Hot path: recall by agent + type, importance-ranked. Partial idx keeps
|
|
44
|
+
-- the index small by skipping stale/superseded rows.
|
|
45
|
+
CREATE INDEX IF NOT EXISTS idx_insights_active
|
|
46
|
+
ON ${schema}.insights (tenant_id, agent_id, insight_type, importance DESC, created_at DESC)
|
|
47
|
+
WHERE status = 'active';
|
|
48
|
+
|
|
49
|
+
-- Idempotency: caller-supplied key writes once. Partial allows NULL keys.
|
|
50
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_insights_idempotency
|
|
51
|
+
ON ${schema}.insights (idempotency_key)
|
|
52
|
+
WHERE idempotency_key IS NOT NULL;
|
|
53
|
+
|
|
54
|
+
-- Phase 2 C1: preflight lookup for canonical_key_v2 active row.
|
|
55
|
+
-- NOT unique — canonical identity can have multiple revisions (legacy as
|
|
56
|
+
-- 'superseded'); only the latest stays 'active'. Partial keeps index small.
|
|
57
|
+
CREATE INDEX IF NOT EXISTS idx_insights_canonical_v2_active
|
|
58
|
+
ON ${schema}.insights (tenant_id, agent_id, insight_type, canonical_key_v2, created_at DESC)
|
|
59
|
+
WHERE status = 'active' AND canonical_key_v2 IS NOT NULL;
|
|
60
|
+
|
|
61
|
+
-- Coerce pre-1.5.1 unsized `vector` column to a sized type so HNSW can be
|
|
62
|
+
-- built. Pre-1.5.1 declared `embedding vector` (no dim) which makes HNSW
|
|
63
|
+
-- creation permanently impossible — the "defer until first row" pattern
|
|
64
|
+
-- was a broken diagnosis of the real problem (pgvector needs a dim on the
|
|
65
|
+
-- COLUMN, not just the data). Idempotent: skipped if already sized.
|
|
66
|
+
-- Dim priority: existing row dim > `aquifer.embedding_dim` GUC > 1024 default.
|
|
67
|
+
-- Note: ${schema} is substituted to a quoted identifier by the loader, so
|
|
68
|
+
-- we string-concat rather than format(%I, ...) to avoid double-quoting.
|
|
69
|
+
DO $$
|
|
70
|
+
DECLARE
|
|
71
|
+
is_unsized BOOLEAN;
|
|
72
|
+
existing_dim INT;
|
|
73
|
+
target_dim INT;
|
|
74
|
+
BEGIN
|
|
75
|
+
SELECT format_type(atttypid, atttypmod) = 'vector'
|
|
76
|
+
INTO is_unsized
|
|
77
|
+
FROM pg_attribute
|
|
78
|
+
WHERE attrelid = '${schema}.insights'::regclass
|
|
79
|
+
AND attname = 'embedding';
|
|
80
|
+
|
|
81
|
+
IF is_unsized THEN
|
|
82
|
+
EXECUTE 'SELECT vector_dims(embedding) FROM ${schema}.insights WHERE embedding IS NOT NULL LIMIT 1'
|
|
83
|
+
INTO existing_dim;
|
|
84
|
+
target_dim := COALESCE(
|
|
85
|
+
existing_dim,
|
|
86
|
+
NULLIF(current_setting('aquifer.embedding_dim', true), '')::int,
|
|
87
|
+
1024
|
|
88
|
+
);
|
|
89
|
+
EXECUTE 'ALTER TABLE ${schema}.insights ALTER COLUMN embedding TYPE vector('
|
|
90
|
+
|| target_dim::text
|
|
91
|
+
|| ') USING embedding::vector('
|
|
92
|
+
|| target_dim::text
|
|
93
|
+
|| ')';
|
|
94
|
+
RAISE NOTICE '[aquifer] insights.embedding coerced from unsized vector to vector(%)', target_dim;
|
|
95
|
+
END IF;
|
|
96
|
+
END$$;
|
|
97
|
+
|
|
98
|
+
-- Vector index: HNSW for cosine distance, only over active insights with
|
|
99
|
+
-- embeddings. Column is now sized so this builds on fresh installs too.
|
|
100
|
+
-- Defer / out-of-memory / unavailable handlers kept as safety nets.
|
|
101
|
+
DO $$
|
|
102
|
+
BEGIN
|
|
103
|
+
EXECUTE 'CREATE INDEX IF NOT EXISTS idx_insights_embedding
|
|
104
|
+
ON ${schema}.insights USING hnsw (embedding vector_cosine_ops)
|
|
105
|
+
WHERE status = ''active'' AND embedding IS NOT NULL';
|
|
106
|
+
EXCEPTION
|
|
107
|
+
WHEN undefined_object THEN
|
|
108
|
+
RAISE NOTICE '[aquifer] pgvector hnsw operator not available; skipping HNSW index on insights';
|
|
109
|
+
WHEN feature_not_supported THEN
|
|
110
|
+
RAISE NOTICE '[aquifer] HNSW not available on this pgvector; upgrade to >= 0.5.0 for index-accelerated insights recall';
|
|
111
|
+
WHEN out_of_memory THEN
|
|
112
|
+
RAISE WARNING '[aquifer] HNSW build on insights.embedding ran out of memory; raise maintenance_work_mem and re-run migrate()';
|
|
113
|
+
WHEN program_limit_exceeded THEN
|
|
114
|
+
RAISE WARNING '[aquifer] HNSW build on insights.embedding exceeded an internal limit; inspect pgvector logs';
|
|
115
|
+
END$$;
|
|
116
|
+
|
|
117
|
+
-- Diagnostic: who-references-which-session, for audit / re-extraction.
|
|
118
|
+
CREATE INDEX IF NOT EXISTS idx_insights_source_sessions
|
|
119
|
+
ON ${schema}.insights USING GIN (source_session_ids)
|
|
120
|
+
WHERE status = 'active';
|
|
121
|
+
|
|
122
|
+
COMMENT ON TABLE ${schema}.insights IS
|
|
123
|
+
'Higher-order observations distilled from sessions. NOT facts (use entity_state_history). NOT raw recap (use session_summaries). Reflection / skill memory.';
|
|
124
|
+
|
|
125
|
+
COMMENT ON COLUMN ${schema}.insights.insight_type IS
|
|
126
|
+
'preference = stable user preference; pattern = recurring behaviour/decision; frustration = repeated pain point; workflow = reusable procedure that worked.';
|
|
127
|
+
|
|
128
|
+
COMMENT ON COLUMN ${schema}.insights.evidence_window IS
|
|
129
|
+
'Time range of source sessions used to derive this insight. Half-open by convention.';
|
|
130
|
+
|
|
131
|
+
COMMENT ON COLUMN ${schema}.insights.importance IS
|
|
132
|
+
'Caller-supplied [0,1]; recall ranking blends with semantic score and recency.';
|
|
133
|
+
|
|
134
|
+
COMMENT ON COLUMN ${schema}.insights.canonical_key_v2 IS
|
|
135
|
+
'Phase 2 C1: stable claim identity = sha256(tenant|agent|type|normalizeCanonicalClaim(claim)|normalizeEntitySet(entities)). Survives LLM title drift. idempotency_key tracks revisions within a claim.';
|
|
136
|
+
|
|
137
|
+
COMMENT ON COLUMN ${schema}.insights.idempotency_key IS
|
|
138
|
+
'Revision-level dedupe key. Default in writer: sha256(canonical_key_v2, normalized_body, sorted_session_ids, window). Same claim in same window with same body = duplicate; body change or window extend = new revision (old superseded).';
|