@shadowforge0/aquifer-memory 1.2.1 → 1.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +8 -9
  2. package/consumers/cli.js +11 -1
  3. package/consumers/default/index.js +17 -4
  4. package/consumers/mcp.js +21 -0
  5. package/consumers/miranda/index.js +15 -4
  6. package/consumers/miranda/profile.json +145 -0
  7. package/consumers/miranda/recall-format.js +5 -3
  8. package/consumers/miranda/render-daily-md.js +186 -0
  9. package/consumers/shared/config.js +8 -0
  10. package/consumers/shared/factory.js +2 -1
  11. package/consumers/shared/llm.js +1 -1
  12. package/consumers/shared/recall-format.js +21 -1
  13. package/core/aquifer.js +693 -87
  14. package/core/artifacts.js +174 -0
  15. package/core/bundles.js +400 -0
  16. package/core/consolidation.js +340 -0
  17. package/core/decisions.js +164 -0
  18. package/core/entity-state.js +483 -0
  19. package/core/errors.js +97 -0
  20. package/core/handoff.js +153 -0
  21. package/core/insights.js +499 -0
  22. package/core/mcp-manifest.js +131 -0
  23. package/core/narratives.js +212 -0
  24. package/core/profiles.js +171 -0
  25. package/core/state.js +163 -0
  26. package/core/storage.js +82 -5
  27. package/core/timeline.js +152 -0
  28. package/index.js +14 -0
  29. package/package.json +1 -1
  30. package/pipeline/extract-state-changes.js +205 -0
  31. package/schema/001-base.sql +186 -16
  32. package/schema/002-entities.sql +35 -1
  33. package/schema/004-completion.sql +391 -0
  34. package/schema/005-entity-state-history.sql +87 -0
  35. package/schema/006-insights.sql +138 -0
  36. package/scripts/diagnose-fts-zh.js +37 -4
  37. package/scripts/drop-entity-state-history.sql +17 -0
  38. package/scripts/drop-insights.sql +12 -0
  39. package/scripts/extract-insights-from-recent-sessions.js +315 -0
  40. package/scripts/find-dburl-hints.js +29 -0
  41. package/scripts/queries.json +45 -0
  42. package/scripts/retro-recall-bench.js +409 -0
  43. package/scripts/sample-bench-queries.sql +75 -0
@@ -3,6 +3,95 @@
3
3
 
4
4
  CREATE EXTENSION IF NOT EXISTS vector;
5
5
  CREATE EXTENSION IF NOT EXISTS pg_trgm;
6
+
7
+ -- Chinese text search: prefer pg_jieba (dict.txt.big Traditional-aware, proper
8
+ -- word segmentation via jiebaqry search-engine mode that expands compounds into
9
+ -- multi-granularity tokens). Fall back to zhparser if jieba not installed; else
10
+ -- migration silently uses the simple tokenizer (trigram primary path unaffected).
11
+ -- Extension install errors (missing .so, non-superuser, OOM, etc.) are caught
12
+ -- per-extension so one failure doesn't prevent the other from being tried.
13
+ DO $$
14
+ BEGIN
15
+ BEGIN
16
+ CREATE EXTENSION IF NOT EXISTS pg_jieba;
17
+ EXCEPTION WHEN OTHERS THEN
18
+ RAISE NOTICE '[aquifer] pg_jieba install skipped (%); trying zhparser', SQLERRM;
19
+ END;
20
+ BEGIN
21
+ CREATE EXTENSION IF NOT EXISTS zhparser;
22
+ EXCEPTION WHEN OTHERS THEN
23
+ RAISE NOTICE '[aquifer] zhparser install skipped (%); Chinese FTS will use simple tokenizer', SQLERRM;
24
+ END;
25
+ END$$;
26
+
27
+ -- Build/upgrade zhcfg in the public namespace (where Aquifer consumers resolve
28
+ -- `to_tsvector('zhcfg', ...)` from). State machine:
29
+ -- S1: jieba present, no zhcfg in public -> CREATE zhcfg (COPY = jiebaqry)
30
+ -- S2: jieba absent, zhparser present, no zhcfg -> CREATE zhcfg zhparser + simple mapping
31
+ -- S3: jieba present, zhcfg backed by zhparser -> DROP + CREATE (COPY = jiebaqry)
32
+ -- S4: zhcfg already jieba-backed -> noop
33
+ -- S9: no backing extension but zhcfg still there -> rebuild against best available, or drop
34
+ --
35
+ -- zhcfg is a database-wide object; acquire a transaction-scoped global advisory
36
+ -- lock so concurrent migrate() calls on different Aquifer schemas in the same
37
+ -- database don't race on the DROP/CREATE. The lock auto-releases at COMMIT.
38
+ -- Key: hash of 'aquifer:zhcfg' truncated to PG advisory-lock int4 range.
39
+ --
40
+ -- Queries restrict to the public namespace to avoid ambiguity if operators have
41
+ -- created same-named text search configs elsewhere.
42
+ DO $$
43
+ DECLARE
44
+ have_jieba boolean := EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pg_jieba');
45
+ have_zhparser boolean := EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'zhparser');
46
+ public_oid oid := (SELECT oid FROM pg_namespace WHERE nspname = 'public');
47
+ zhcfg_parser text := NULL;
48
+ BEGIN
49
+ PERFORM pg_advisory_xact_lock(1434531247); -- stable global key
50
+
51
+ IF public_oid IS NOT NULL THEN
52
+ SELECT p.prsname INTO zhcfg_parser
53
+ FROM pg_ts_config c JOIN pg_ts_parser p ON c.cfgparser = p.oid
54
+ WHERE c.cfgname = 'zhcfg' AND c.cfgnamespace = public_oid
55
+ LIMIT 1;
56
+ END IF;
57
+
58
+ BEGIN
59
+ IF have_jieba AND (zhcfg_parser IS NULL OR zhcfg_parser = 'zhparser') THEN
60
+ -- S1 / S3: promote to jieba
61
+ IF zhcfg_parser = 'zhparser' THEN
62
+ EXECUTE 'DROP TEXT SEARCH CONFIGURATION public.zhcfg';
63
+ END IF;
64
+ EXECUTE 'CREATE TEXT SEARCH CONFIGURATION public.zhcfg ( COPY = public.jiebaqry )';
65
+
66
+ ELSIF have_zhparser AND zhcfg_parser IS NULL THEN
67
+ -- S2: zhparser-only new install. `eng` covers English tokens that zhparser
68
+ -- emits for Latin words in mixed-language text; without it they'd be dropped.
69
+ EXECUTE 'CREATE TEXT SEARCH CONFIGURATION public.zhcfg (PARSER = zhparser)';
70
+ EXECUTE 'ALTER TEXT SEARCH CONFIGURATION public.zhcfg
71
+ ADD MAPPING FOR n,v,a,i,e,l,j,nr,ns,nt,nz,vd,vn,m,r,t,c,p,u,d,o,y,w,x,q,b,k,s,f,h,g,eng WITH simple';
72
+
73
+ ELSIF NOT have_jieba AND NOT have_zhparser AND zhcfg_parser IS NOT NULL THEN
74
+ -- S9: backing extension dropped but zhcfg stayed; any `to_tsvector('zhcfg',...)`
75
+ -- would throw "parser does not exist" and break the FTS trigger.
76
+ -- Safer to remove zhcfg and let consumers fall back to 'simple'.
77
+ EXECUTE 'DROP TEXT SEARCH CONFIGURATION public.zhcfg';
78
+ RAISE WARNING '[aquifer] zhcfg removed: neither pg_jieba nor zhparser is installed; Chinese FTS falls back to simple';
79
+
80
+ ELSIF NOT have_jieba AND have_zhparser AND zhcfg_parser NOT IN ('zhparser') THEN
81
+ -- S9 partial: jieba gone but zhparser available; rebuild on zhparser.
82
+ EXECUTE 'DROP TEXT SEARCH CONFIGURATION public.zhcfg';
83
+ EXECUTE 'CREATE TEXT SEARCH CONFIGURATION public.zhcfg (PARSER = zhparser)';
84
+ EXECUTE 'ALTER TEXT SEARCH CONFIGURATION public.zhcfg
85
+ ADD MAPPING FOR n,v,a,i,e,l,j,nr,ns,nt,nz,vd,vn,m,r,t,c,p,u,d,o,y,w,x,q,b,k,s,f,h,g,eng WITH simple';
86
+ RAISE WARNING '[aquifer] zhcfg rebuilt on zhparser: pg_jieba no longer installed';
87
+ END IF;
88
+ EXCEPTION WHEN OTHERS THEN
89
+ -- Ownership mismatch, concurrent-modify race, dependency blocking DROP, etc.
90
+ -- Don't abort the entire migrate(); leave zhcfg as-is and warn.
91
+ RAISE WARNING '[aquifer] zhcfg (re)build skipped (%); existing config left untouched', SQLERRM;
92
+ END;
93
+ END$$;
94
+
6
95
  CREATE SCHEMA IF NOT EXISTS ${schema};
7
96
 
8
97
  -- =========================================================================
@@ -61,7 +150,9 @@ CREATE TABLE IF NOT EXISTS ${schema}.session_summaries (
61
150
  ended_at TIMESTAMPTZ,
62
151
  summary_text TEXT,
63
152
  structured_summary JSONB NOT NULL DEFAULT '{}',
64
- embedding vector,
153
+ -- Sized so HNSW can build at migrate time; 1024 matches ollama bge-m3 default.
154
+ -- Coerce DO block below upgrades pre-1.5.2 unsized columns.
155
+ embedding vector(1024),
65
156
  search_tsv TSVECTOR,
66
157
  search_text TEXT,
67
158
  access_count INT NOT NULL DEFAULT 0,
@@ -99,18 +190,48 @@ CREATE INDEX IF NOT EXISTS idx_summaries_embedding
99
190
  ON ${schema}.session_summaries (session_row_id)
100
191
  WHERE embedding IS NOT NULL;
101
192
 
193
+ -- Coerce pre-1.5.2 unsized `vector` column to sized so HNSW can be built.
194
+ -- pgvector requires a dim on the COLUMN, not just the data. Dim priority:
195
+ -- existing row dim > `aquifer.embedding_dim` GUC > 1024 default.
196
+ DO $$
197
+ DECLARE
198
+ is_unsized BOOLEAN;
199
+ existing_dim INT;
200
+ target_dim INT;
201
+ BEGIN
202
+ SELECT format_type(atttypid, atttypmod) = 'vector'
203
+ INTO is_unsized
204
+ FROM pg_attribute
205
+ WHERE attrelid = '${schema}.session_summaries'::regclass
206
+ AND attname = 'embedding';
207
+
208
+ IF is_unsized THEN
209
+ EXECUTE 'SELECT vector_dims(embedding) FROM ${schema}.session_summaries WHERE embedding IS NOT NULL LIMIT 1'
210
+ INTO existing_dim;
211
+ target_dim := COALESCE(
212
+ existing_dim,
213
+ NULLIF(current_setting('aquifer.embedding_dim', true), '')::int,
214
+ 1024
215
+ );
216
+ EXECUTE 'ALTER TABLE ${schema}.session_summaries ALTER COLUMN embedding TYPE vector('
217
+ || target_dim::text
218
+ || ') USING embedding::vector('
219
+ || target_dim::text
220
+ || ')';
221
+ RAISE NOTICE '[aquifer] session_summaries.embedding coerced from unsized vector to vector(%)', target_dim;
222
+ END IF;
223
+ END$$;
224
+
102
225
  -- HNSW approximate nearest-neighbor index for cosine-distance vector search.
103
- -- Without this, ORDER BY embedding <=> $vec degrades to seq scan at scale.
104
- -- Requires pgvector >= 0.5.0. HNSW cannot build on an empty unsized `vector`
105
- -- column (can't infer dim), so we defer on failure — re-running migrate()
106
- -- after the first insert will finish the job.
226
+ -- Column is sized via CREATE TABLE or the coerce block above, so the index
227
+ -- builds on fresh installs too. Safety-net EXCEPTION handlers stay for the
228
+ -- genuine recoverable failures; invalid_parameter_value is intentionally
229
+ -- NOT caught it used to mask the unsized-column schema bug.
107
230
  DO $$
108
231
  BEGIN
109
232
  BEGIN
110
233
  EXECUTE 'CREATE INDEX IF NOT EXISTS idx_summaries_embedding_hnsw ON ${schema}.session_summaries USING hnsw (embedding vector_cosine_ops)';
111
234
  EXCEPTION
112
- WHEN invalid_parameter_value THEN
113
- RAISE NOTICE '[aquifer] HNSW index on session_summaries.embedding deferred; re-run migrate() after the first embedded row';
114
235
  WHEN feature_not_supported THEN
115
236
  RAISE NOTICE '[aquifer] HNSW not available on this pgvector; upgrade to >= 0.5.0 for index-accelerated vector search';
116
237
  WHEN out_of_memory THEN
@@ -155,11 +276,28 @@ BEGIN
155
276
  INTO facts_text
156
277
  FROM jsonb_array_elements(COALESCE(ss->'important_facts', '[]'::jsonb)) AS elem;
157
278
 
158
- NEW.search_tsv :=
159
- setweight(to_tsvector('simple', title_text), 'A') ||
160
- setweight(to_tsvector('simple', overview_text || ' ' || topics_text || ' ' || decisions_text), 'B') ||
161
- setweight(to_tsvector('simple', COALESCE(NEW.summary_text, '')), 'C') ||
162
- setweight(to_tsvector('simple', open_loops_text || ' ' || facts_text), 'D');
279
+ -- Use zhcfg if available (Chinese segmentation — pg_jieba jiebaqry on new
280
+ -- installs, zhparser as legacy fallback; zhcfg name is a stable indirection
281
+ -- managed by the DO block above). Else fall back to simple tokenizer.
282
+ -- The per-row IF EXISTS lookup hits a tiny fully-cached system catalog
283
+ -- (pg_ts_config, ~12 rows) effectively free. Chose this over migrate-time
284
+ -- codegen because installing pg_jieba POST-install immediately benefits new
285
+ -- inserts without requiring a manual re-migrate.
286
+ IF EXISTS (SELECT 1 FROM pg_ts_config
287
+ WHERE cfgname = 'zhcfg'
288
+ AND cfgnamespace = 'public'::regnamespace) THEN
289
+ NEW.search_tsv :=
290
+ setweight(to_tsvector('zhcfg', title_text), 'A') ||
291
+ setweight(to_tsvector('zhcfg', overview_text || ' ' || topics_text || ' ' || decisions_text), 'B') ||
292
+ setweight(to_tsvector('zhcfg', COALESCE(NEW.summary_text, '')), 'C') ||
293
+ setweight(to_tsvector('zhcfg', open_loops_text || ' ' || facts_text), 'D');
294
+ ELSE
295
+ NEW.search_tsv :=
296
+ setweight(to_tsvector('simple', title_text), 'A') ||
297
+ setweight(to_tsvector('simple', overview_text || ' ' || topics_text || ' ' || decisions_text), 'B') ||
298
+ setweight(to_tsvector('simple', COALESCE(NEW.summary_text, '')), 'C') ||
299
+ setweight(to_tsvector('simple', open_loops_text || ' ' || facts_text), 'D');
300
+ END IF;
163
301
 
164
302
  NEW.search_text :=
165
303
  title_text || ' ' || overview_text || ' ' || topics_text || ' ' ||
@@ -198,7 +336,9 @@ CREATE TABLE IF NOT EXISTS ${schema}.turn_embeddings (
198
336
  role TEXT NOT NULL DEFAULT 'user' CHECK (role = 'user'),
199
337
  content_text TEXT NOT NULL,
200
338
  content_hash TEXT NOT NULL,
201
- embedding vector NOT NULL,
339
+ -- Sized so HNSW can build at migrate time. Coerce DO block below upgrades
340
+ -- pre-1.5.2 unsized columns.
341
+ embedding vector(1024) NOT NULL,
202
342
  created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
203
343
  UNIQUE (session_row_id, message_index)
204
344
  );
@@ -209,15 +349,45 @@ CREATE INDEX IF NOT EXISTS idx_turn_emb_session_row
209
349
  CREATE INDEX IF NOT EXISTS idx_turn_emb_tenant_agent
210
350
  ON ${schema}.turn_embeddings (tenant_id, agent_id, source);
211
351
 
352
+ -- Coerce pre-1.5.2 unsized `vector` column for turn_embeddings.
353
+ -- NOT NULL so every row has a dim; existing_dim should always resolve.
354
+ DO $$
355
+ DECLARE
356
+ is_unsized BOOLEAN;
357
+ existing_dim INT;
358
+ target_dim INT;
359
+ BEGIN
360
+ SELECT format_type(atttypid, atttypmod) = 'vector'
361
+ INTO is_unsized
362
+ FROM pg_attribute
363
+ WHERE attrelid = '${schema}.turn_embeddings'::regclass
364
+ AND attname = 'embedding';
365
+
366
+ IF is_unsized THEN
367
+ EXECUTE 'SELECT vector_dims(embedding) FROM ${schema}.turn_embeddings WHERE embedding IS NOT NULL LIMIT 1'
368
+ INTO existing_dim;
369
+ target_dim := COALESCE(
370
+ existing_dim,
371
+ NULLIF(current_setting('aquifer.embedding_dim', true), '')::int,
372
+ 1024
373
+ );
374
+ EXECUTE 'ALTER TABLE ${schema}.turn_embeddings ALTER COLUMN embedding TYPE vector('
375
+ || target_dim::text
376
+ || ') USING embedding::vector('
377
+ || target_dim::text
378
+ || ')';
379
+ RAISE NOTICE '[aquifer] turn_embeddings.embedding coerced from unsized vector to vector(%)', target_dim;
380
+ END IF;
381
+ END$$;
382
+
212
383
  -- HNSW approximate nearest-neighbor index for turn-level vector search.
213
- -- See notes on session_summaries.embedding HNSW above.
384
+ -- See notes on session_summaries.embedding HNSW above. invalid_parameter_value
385
+ -- intentionally NOT caught — it used to mask the unsized-column schema bug.
214
386
  DO $$
215
387
  BEGIN
216
388
  BEGIN
217
389
  EXECUTE 'CREATE INDEX IF NOT EXISTS idx_turn_emb_embedding_hnsw ON ${schema}.turn_embeddings USING hnsw (embedding vector_cosine_ops)';
218
390
  EXCEPTION
219
- WHEN invalid_parameter_value THEN
220
- RAISE NOTICE '[aquifer] HNSW index on turn_embeddings.embedding deferred; re-run migrate() after the first embedded row';
221
391
  WHEN feature_not_supported THEN
222
392
  RAISE NOTICE '[aquifer] HNSW not available on this pgvector; upgrade to >= 0.5.0 for index-accelerated vector search';
223
393
  WHEN out_of_memory THEN
@@ -23,7 +23,10 @@ CREATE TABLE IF NOT EXISTS ${schema}.entities (
23
23
  entity_scope TEXT NOT NULL DEFAULT 'default',
24
24
  created_by TEXT,
25
25
  metadata JSONB NOT NULL DEFAULT '{}',
26
- embedding vector,
26
+ -- Sized so future HNSW index on entities.embedding builds cleanly. No HNSW
27
+ -- currently — entity lookup is name-trgm, not vector. Coerce block below
28
+ -- upgrades pre-1.5.2 installs.
29
+ embedding vector(1024),
27
30
  first_seen_at TIMESTAMPTZ NOT NULL DEFAULT now(),
28
31
  last_seen_at TIMESTAMPTZ NOT NULL DEFAULT now()
29
32
  );
@@ -48,6 +51,37 @@ BEGIN
48
51
  END$$;
49
52
  ALTER TABLE ${schema}.entities ALTER COLUMN entity_scope SET NOT NULL;
50
53
 
54
+ -- Coerce pre-1.5.2 unsized `vector` column to sized for HNSW-ready shape.
55
+ -- Mirrors the session_summaries / turn_embeddings / insights coerce blocks.
56
+ DO $$
57
+ DECLARE
58
+ is_unsized BOOLEAN;
59
+ existing_dim INT;
60
+ target_dim INT;
61
+ BEGIN
62
+ SELECT format_type(atttypid, atttypmod) = 'vector'
63
+ INTO is_unsized
64
+ FROM pg_attribute
65
+ WHERE attrelid = '${schema}.entities'::regclass
66
+ AND attname = 'embedding';
67
+
68
+ IF is_unsized THEN
69
+ EXECUTE 'SELECT vector_dims(embedding) FROM ${schema}.entities WHERE embedding IS NOT NULL LIMIT 1'
70
+ INTO existing_dim;
71
+ target_dim := COALESCE(
72
+ existing_dim,
73
+ NULLIF(current_setting('aquifer.embedding_dim', true), '')::int,
74
+ 1024
75
+ );
76
+ EXECUTE 'ALTER TABLE ${schema}.entities ALTER COLUMN embedding TYPE vector('
77
+ || target_dim::text
78
+ || ') USING embedding::vector('
79
+ || target_dim::text
80
+ || ')';
81
+ RAISE NOTICE '[aquifer] entities.embedding coerced from unsized vector to vector(%)', target_dim;
82
+ END IF;
83
+ END$$;
84
+
51
85
  -- Unique constraint: entity identity is (tenant, name, scope)
52
86
  -- Drop legacy agent-based constraint if it exists
53
87
  DROP INDEX IF EXISTS ${schema}.idx_entities_tenant_name_agent;