@jhizzard/termdeck 1.0.14 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. package/package.json +1 -1
  2. package/packages/cli/src/init-mnestra.js +50 -10
  3. package/packages/client/public/app.js +57 -0
  4. package/packages/server/src/index.js +19 -1
  5. package/packages/server/src/setup/migrations.js +514 -1
  6. package/packages/server/src/setup/mnestra-migrations/001_mnestra_tables.sql +2 -2
  7. package/packages/server/src/setup/mnestra-migrations/002_mnestra_search_function.sql +1 -1
  8. package/packages/server/src/setup/mnestra-migrations/009_memory_relationship_metadata.sql +1 -1
  9. package/packages/server/src/setup/mnestra-migrations/011_project_tag_backfill.sql +7 -3
  10. package/packages/server/src/setup/mnestra-migrations/012_project_tag_re_taxonomy.sql +2 -2
  11. package/packages/server/src/setup/mnestra-migrations/014_explicit_grants.sql +3 -3
  12. package/packages/server/src/setup/mnestra-migrations/016_mnestra_doctor_probes.sql +3 -3
  13. package/packages/server/src/setup/mnestra-migrations/017_memory_sessions_session_metadata.sql +5 -5
  14. package/packages/server/src/setup/mnestra-migrations/018_rumen_processed_at.sql +1 -1
  15. package/packages/server/src/setup/mnestra-migrations/019_security_hardening.sql +190 -0
  16. package/packages/server/src/setup/mnestra-migrations/020_migration_tracking.sql +57 -0
  17. package/packages/server/src/setup/mnestra-migrations/021_project_tag_canonicalize_claimguard.sql +175 -0
  18. package/packages/server/src/setup/mnestra-migrations/022_source_agent_backfill.sql +182 -0
  19. package/packages/server/src/setup/rumen/functions/rumen-tick/index.ts +0 -30
@@ -3,7 +3,7 @@
3
3
  -- Sprint 51.6 T3 (TermDeck v1.0.2 hotfix wave). Brings the canonical engram
4
4
  -- memory_sessions schema in line with the rag-system writer's column set so
5
5
  -- TermDeck's bundled session-end hook can write a uniform shape on both
6
- -- fresh-canonical installs and Joshua's daily-driver petvetbid (where the
6
+ -- fresh-canonical installs and Joshua's daily-driver the reference Mnestra project (where the
7
7
  -- columns were already added by hand when rag-system bootstrap ran).
8
8
  --
9
9
  -- Why: until v1.0.2 the bundled hook only wrote memory_items. The actual
@@ -17,7 +17,7 @@
17
17
  -- the schema it expects exists everywhere.
18
18
  --
19
19
  -- Idempotent — safe on:
20
- -- 1. petvetbid (where these columns are already present from hand-applied
20
+ -- 1. the reference Mnestra project (where these columns are already present from hand-applied
21
21
  -- DDL Joshua ran when setting up rag-system; the IF NOT EXISTS guards
22
22
  -- no-op on every column).
23
23
  -- 2. Fresh canonical installs that ran migrations 001-016 only (the canonical
@@ -26,11 +26,11 @@
26
26
  --
27
27
  -- The unique constraint on session_id is wrapped in a do-block because
28
28
  -- ADD CONSTRAINT does not support IF NOT EXISTS in PostgreSQL. Joshua's
29
- -- petvetbid already has the constraint as memory_sessions_session_id_key
29
+ -- the reference Mnestra project already has the constraint as memory_sessions_session_id_key
30
30
  -- (auto-named by the rag-system bootstrap); this block detects that name
31
31
  -- and skips re-adding.
32
32
  --
33
- -- session_id is added NULLABLE on canonical installs even though petvetbid's
33
+ -- session_id is added NULLABLE on canonical installs even though the reference Mnestra project's
34
34
  -- existing constraint is NOT NULL. Adding NOT NULL via ALTER TABLE on a
35
35
  -- table with existing rows would fail; the bundled hook always supplies
36
36
  -- session_id at write time, so nullability is non-blocking. A future sprint
@@ -56,7 +56,7 @@ alter table public.memory_sessions
56
56
  -- Unique constraint on session_id. Skip if any unique constraint on
57
57
  -- (session_id) is already in place — covers both the canonical name
58
58
  -- memory_sessions_session_id_key and any alternate name from a manual
59
- -- ALTER TABLE Joshua may have run on petvetbid.
59
+ -- ALTER TABLE Joshua may have run on the reference Mnestra project.
60
60
  do $$
61
61
  declare
62
62
  has_unique boolean;
@@ -19,7 +19,7 @@
19
19
  -- 1. Joshua's daily-driver (pre-Sprint-53; column will be added with
20
20
  -- every existing memory_sessions row at NULL → all become candidates
21
21
  -- on the first post-deploy tick, which is the desired bootstrap).
22
- -- 2. Brad's jizzard-brain (Linux SSH; same shape, same null-bootstrap).
22
+ -- 2. Linux SSH installs (same shape, same null-bootstrap).
23
23
  -- 3. Fresh canonical installs (post-mig-017 schema; column added on
24
24
  -- first run, no rows to backfill).
25
25
  -- 4. Re-runs (ADD COLUMN IF NOT EXISTS + CREATE INDEX IF NOT EXISTS).
@@ -0,0 +1,190 @@
1
+ -- Mnestra v0.4.6 — security hardening (revised from 0.4.4 / 0.4.5).
2
+ --
3
+ -- Source: external Supabase-advisor sweep by Brad Heath / Nacho Money LLC,
4
+ -- 2026-05-06. See docs/SECURITY-HARDENING-2026-05-06.md for the full flag
5
+ -- and root-cause analysis. The standing rule lives in the global Claude
6
+ -- Code instructions: "MANDATORY: Supabase RLS + privilege hygiene".
7
+ --
8
+ -- Two corrections folded into this revision:
9
+ --
10
+ -- A. **search_path must include `extensions`.** The 0.4.4/0.4.5 version of
11
+ -- this migration set search_path = public, pg_catalog on the memory_*
12
+ -- RPCs. Supabase >= 2024 installs pgvector in the `extensions` schema,
13
+ -- so the `<=>` cosine-distance operator becomes unreachable from those
14
+ -- RPCs after the alter — semantic recall fails with "operator does not
15
+ -- exist: extensions.vector <=> extensions.vector". Confirmed live
16
+ -- against the reference Mnestra project on 2026-05-06; fixed by
17
+ -- including `extensions` in search_path.
18
+ --
19
+ -- B. **Schema-generation-aware.** Some Mnestra installs are on the older
20
+ -- "memory_items-only" generation — they have memory_items /
21
+ -- memory_relationships / memory_sessions + the 6 memory_* RPCs, but
22
+ -- NOT the layered-memory tables (mnestra_session_memory,
23
+ -- mnestra_developer_memory, mnestra_project_memory, mnestra_commands)
24
+ -- and NOT the mnestra_doctor_* SECURITY DEFINER probes. The 0.4.4 / 0.4.5
25
+ -- migration body assumed the layered shape and threw "relation does
26
+ -- not exist" / "function does not exist" mid-migration on older
27
+ -- installs. Brad caught this on three of his projects (Structural,
28
+ -- aetheria-payroll, aetheria-phase1) and worked around with a
29
+ -- signature-agnostic DO-block subset.
30
+ --
31
+ -- This revision restructures every section as defensive lookups
32
+ -- against pg_class / pg_proc / pg_views, so each statement only fires
33
+ -- when its target exists. The migration runs cleanly on:
34
+ -- - layered-memory generation (Josh's reference project): full fix
35
+ -- - memory_items-only generation (Brad's three projects): function
36
+ -- hardening only; mnestra_*-targeting statements are skipped
37
+ -- - mixed generation: each statement applies to whatever exists
38
+ --
39
+ -- Closes four hole classes (where applicable to the install's schema
40
+ -- generation):
41
+ --
42
+ -- 1. Permissive PUBLIC INSERT RLS on mnestra_{commands,developer_memory,
43
+ -- project_memory,session_memory}. Created by Supabase Studio's
44
+ -- "Allow insert for all" default-policy template at table-creation
45
+ -- time. Anyone with the project's anon key could write directly to
46
+ -- memory tables, poisoning the corpus or session-id-squatting.
47
+ --
48
+ -- 2. PUBLIC EXECUTE on every Mnestra function. Postgres defaults
49
+ -- function EXECUTE to PUBLIC; the explicit `grant ... to service_role`
50
+ -- in earlier migrations is additive, not exclusive.
51
+ --
52
+ -- 3. Mutable search_path on memory_* and mnestra_doctor_* functions
53
+ -- (Supabase lint 0011).
54
+ --
55
+ -- 4. mnestra_recent_activity SECURITY DEFINER view (Supabase lint 0010)
56
+ -- with anon+authenticated SELECT.
57
+ --
58
+ -- Backward-compat: zero behavior change for any Mnestra installation that
59
+ -- follows the documented architecture (service-role writes via MCP server).
60
+ -- service_role keeps EXECUTE on every function and SELECT on the view.
61
+ --
62
+ -- Idempotent: every section guards on object existence and uses
63
+ -- IF EXISTS / signature-agnostic patterns. Re-running this migration is
64
+ -- safe and is in fact the recommended way to upgrade a 0.4.4/0.4.5 install
65
+ -- to pick up the search_path fix.
66
+
67
+ -- ====================================================================
68
+ -- 1. Drop permissive PUBLIC INSERT policies on mnestra_* tables, when
69
+ -- those tables exist on this install. Skipped silently on older
70
+ -- memory_items-only schema generation.
71
+ -- ====================================================================
72
+
73
+ do $$
74
+ declare
75
+ tbl text;
76
+ tables text[] := array[
77
+ 'mnestra_commands',
78
+ 'mnestra_developer_memory',
79
+ 'mnestra_project_memory',
80
+ 'mnestra_session_memory'
81
+ ];
82
+ begin
83
+ foreach tbl in array tables loop
84
+ if to_regclass(format('public.%I', tbl)) is not null then
85
+ execute format('drop policy if exists "Allow insert for all" on public.%I', tbl);
86
+ end if;
87
+ end loop;
88
+ end $$;
89
+
90
+ -- ====================================================================
91
+ -- 2 + 3. Revoke EXECUTE from public + anon + authenticated AND pin
92
+ -- search_path on every Mnestra function. Signature-agnostic — iterates
93
+ -- pg_proc to apply to whatever functions exist on this install. Covers
94
+ -- memory_*, match_memories, expand_memory_neighborhood, and
95
+ -- mnestra_doctor_*.
96
+ --
97
+ -- search_path includes `extensions` for the pgvector operator and
98
+ -- pg_catalog for built-ins; doctor functions don't use vectors but the
99
+ -- inclusion is harmless and keeps every Mnestra function uniform.
100
+ -- ====================================================================
101
+
102
+ do $$
103
+ declare
104
+ fn record;
105
+ sig text;
106
+ begin
107
+ for fn in
108
+ select n.nspname,
109
+ p.proname,
110
+ pg_get_function_identity_arguments(p.oid) as ident_args
111
+ from pg_proc p
112
+ join pg_namespace n on n.oid = p.pronamespace
113
+ where n.nspname = 'public'
114
+ and p.prokind = 'f'
115
+ and (
116
+ p.proname like 'memory_%'
117
+ or p.proname in ('match_memories', 'expand_memory_neighborhood')
118
+ or p.proname like 'mnestra_doctor_%'
119
+ )
120
+ loop
121
+ sig := format('%I.%I(%s)', fn.nspname, fn.proname, fn.ident_args);
122
+ execute format('revoke execute on function %s from public, anon, authenticated', sig);
123
+ execute format('alter function %s set search_path = public, extensions, pg_catalog', sig);
124
+ -- service_role keeps EXECUTE; the revoke above only targets public/anon/authenticated.
125
+ end loop;
126
+ end $$;
127
+
128
+ -- ====================================================================
129
+ -- 4. Recreate mnestra_recent_activity view without SECURITY DEFINER and
130
+ -- restrict SELECT to service_role. Skipped silently if the view doesn't
131
+ -- exist or any of the three underlying tables are missing.
132
+ -- ====================================================================
133
+
134
+ do $$
135
+ begin
136
+ if to_regclass('public.mnestra_session_memory') is not null
137
+ and to_regclass('public.mnestra_project_memory') is not null
138
+ and to_regclass('public.mnestra_developer_memory') is not null
139
+ then
140
+ drop view if exists public.mnestra_recent_activity;
141
+
142
+ execute $view$
143
+ create view public.mnestra_recent_activity as
144
+ select 'session'::text as layer, id, session_id, event_type, payload, project, developer_id, "timestamp", created_at from public.mnestra_session_memory
145
+ union all
146
+ select 'project'::text as layer, id, session_id, event_type, payload, project, developer_id, "timestamp", created_at from public.mnestra_project_memory
147
+ union all
148
+ select 'developer'::text as layer, id, session_id, event_type, payload, project, developer_id, "timestamp", created_at from public.mnestra_developer_memory
149
+ order by 8 desc
150
+ limit 100
151
+ $view$;
152
+
153
+ revoke all on public.mnestra_recent_activity from public, anon, authenticated;
154
+ grant select on public.mnestra_recent_activity to service_role;
155
+ end if;
156
+ end $$;
157
+
158
+ -- ====================================================================
159
+ -- Post-apply verification (run separately in Studio SQL editor):
160
+ --
161
+ -- -- Should return zero rows:
162
+ -- with bad_policies as (
163
+ -- select policyname from pg_policies
164
+ -- where schemaname='public' and tablename like 'mnestra_%'
165
+ -- and ('public' = any(roles) or roles = '{}')
166
+ -- and (with_check='true' or qual='true')
167
+ -- ),
168
+ -- public_exec as (
169
+ -- select p.proname from pg_proc p join pg_namespace n on n.oid=p.pronamespace
170
+ -- where n.nspname='public'
171
+ -- and (p.proname like 'mnestra_doctor_%' or p.proname like 'memory_%'
172
+ -- or p.proname in ('match_memories','expand_memory_neighborhood'))
173
+ -- and has_function_privilege('public', p.oid, 'EXECUTE')
174
+ -- ),
175
+ -- mutable_path as (
176
+ -- select p.proname from pg_proc p join pg_namespace n on n.oid=p.pronamespace
177
+ -- where n.nspname='public' and p.prokind='f'
178
+ -- and (p.proname like 'memory_%' or p.proname like 'mnestra_doctor_%')
179
+ -- and not exists (
180
+ -- select 1 from unnest(coalesce(p.proconfig,'{}'::text[])) c
181
+ -- where c like 'search_path=%'
182
+ -- )
183
+ -- )
184
+ -- select 'BAD_POLICY' as kind, policyname as detail from bad_policies
185
+ -- union all select 'PUBLIC_EXEC', proname from public_exec
186
+ -- union all select 'MUTABLE_SEARCH_PATH', proname from mutable_path;
187
+ --
188
+ -- Verified zero rows on the reference Mnestra project on 2026-05-06.
189
+ -- Smoke test: select count(*) from memory_hybrid_search('smoke', array_fill(0::real, ARRAY[1536])::vector, 1) → 1 row, no operator-resolution error.
190
+ -- ====================================================================
@@ -0,0 +1,57 @@
1
+ -- 020_migration_tracking.sql
2
+ -- Adds durable tracking of which Mnestra migrations have been applied to a project,
3
+ -- so upgrade paths can compute (bundled - applied) and apply only the diff.
4
+ -- Sprint 61 (TermDeck Convergence Keystone), Mnestra 0.4.7.
5
+ --
6
+ -- Why this exists: prior to 020, the mnestra/rumen wizards re-applied every
7
+ -- bundled migration on every invocation, relying on per-migration
8
+ -- `IF NOT EXISTS` / `CREATE OR REPLACE` idempotency to avoid duplicate work.
9
+ -- That works for a fresh install but doesn't tell the wizard which migrations
10
+ -- the live database is missing — so a user running `npm install -g @latest`
11
+ -- against an existing project gets the new package files without any way to
12
+ -- detect schema drift. Class A (schema drift on package upgrade) per
13
+ -- termdeck/docs/INSTALLER-PITFALLS.md.
14
+ --
15
+ -- Shape:
16
+ -- - `filename` text PK — the bundled migration filename, e.g.
17
+ -- `015_source_agent.sql`. PK because each
18
+ -- bundled file applies at most once.
19
+ -- - `applied_at` timestamptz — wall-clock time of apply. Backfilled
20
+ -- rows (rows seeded by the post-020 backfill
21
+ -- probe for migrations applied pre-020) use
22
+ -- epoch (1970-01-01T00:00:00Z) as a sentinel.
23
+ -- - `checksum` text — SHA-256 of the bundled file content at apply
24
+ -- time. Lets future runs detect bundle drift
25
+ -- without auto-overwriting the live schema.
26
+ -- - `schema_version` text — optional free-text marker. Backfill rows use
27
+ -- the literal `'backfill'` so audit queries
28
+ -- can distinguish them.
29
+ --
30
+ -- RLS posture: ENABLE ROW LEVEL SECURITY + REVOKE ALL FROM PUBLIC. No
31
+ -- policies are intentional — anon and authenticated have NO access, full
32
+ -- stop. service_role bypasses RLS in Postgres by default, which is the only
33
+ -- caller that should ever touch this table (the migration runner connects
34
+ -- via DATABASE_URL using service-role credentials).
35
+ --
36
+ -- Idempotent: re-applying this migration on a project that already has the
37
+ -- table is a no-op (CREATE TABLE IF NOT EXISTS, ALTER TABLE ... ENABLE RLS
38
+ -- is a no-op when already enabled, REVOKE/GRANT are idempotent).
39
+
40
+ CREATE TABLE IF NOT EXISTS public.mnestra_migrations (
41
+ filename text PRIMARY KEY,
42
+ applied_at timestamptz NOT NULL DEFAULT now(),
43
+ checksum text NOT NULL,
44
+ schema_version text
45
+ );
46
+
47
+ ALTER TABLE public.mnestra_migrations ENABLE ROW LEVEL SECURITY;
48
+
49
+ -- Service-role-only. anon and authenticated have NO access (no policies = denied by RLS).
50
+ -- Service role bypasses RLS by default; the table is queried only by the migration runner
51
+ -- which uses the service-role key.
52
+
53
+ REVOKE ALL ON public.mnestra_migrations FROM PUBLIC;
54
+ GRANT ALL ON public.mnestra_migrations TO service_role;
55
+
56
+ COMMENT ON TABLE public.mnestra_migrations IS
57
+ 'Tracking table for applied Mnestra migrations. service_role-only; RLS-on; no policies.';
@@ -0,0 +1,175 @@
1
+ -- 021_project_tag_canonicalize_claimguard.sql
2
+ -- Sprint 62 T2 — finishes the gorgias / gorgias-ticket-monitor → claimguard
3
+ -- rename that migration 012 (Sprint 41 T2) explicitly scoped out.
4
+ --
5
+ -- Why this exists:
6
+ -- Same project (the ClaimGuard repo at ~/Documents/Unagi/gorgias-ticket-monitor)
7
+ -- was tagged three ways across history. As of 2026-05-08:
8
+ -- - 'claimguard' ~29 rows (newest tag, written by the
9
+ -- post-Sprint-41 PROJECT_MAP)
10
+ -- - 'gorgias-ticket-monitor' ~245 rows (mid tag, the on-disk dir name)
11
+ -- - 'gorgias' ~541 rows (oldest tag, pre-Sprint-41)
12
+ --
13
+ -- Migration 012's §"What this migration does NOT do" called out the merge
14
+ -- as a separate cleanup pass:
15
+ --
16
+ -- - Does NOT consolidate duplicate tags like 'gorgias' vs
17
+ -- 'gorgias-ticket-monitor', 'pvb' vs 'PVB', or 'mnestra' vs 'engram'.
18
+ -- Visible in `SELECT project, count(*) FROM memory_items GROUP BY
19
+ -- project` but a separate cleanup pass.
20
+ --
21
+ -- That separate pass is 021. Sprint 21 T2's earlier rename plan never
22
+ -- landed; Sprint 35's harness-hook fix addressed the upstream PROJECT_MAP
23
+ -- so new rows tag correctly, and Sprint 62 T2 (this migration) closes the
24
+ -- historical-corpus gap so memory_recall(project="claimguard") returns the
25
+ -- full ~815-row history rather than just the post-Sprint-41 tail.
26
+ --
27
+ -- The companion T2 invariant test at
28
+ -- termdeck/tests/project-tag-invariant.test.js currently skips the claimguard
29
+ -- invariant via `deferredToSprint35`; with 021 applied that invariant would
30
+ -- pass cleanly if un-deferred. Un-deferring is out of T2's lane (test edits
31
+ -- are owned by orchestrator close-out).
32
+ --
33
+ -- Why the *project*-column merge and not a content-keyword rebucket: rows
34
+ -- already-tagged 'gorgias' or 'gorgias-ticket-monitor' carry definitive
35
+ -- project provenance — the row is from the ClaimGuard project by virtue of
36
+ -- the writer's prior tag, regardless of content keywords. We are not
37
+ -- inferring; we are renaming an exact-match tag set that the SOURCE-BRIEF
38
+ -- and 012's prologue both confirm refer to the same on-disk codebase.
39
+ --
40
+ -- Idempotence:
41
+ -- The UPDATE is gated by `WHERE project IN ('gorgias','gorgias-ticket-monitor')`.
42
+ -- After the first apply those rows carry project='claimguard', so a re-run
43
+ -- matches zero rows — RAISE NOTICE prints 0 and the migration succeeds. The
44
+ -- bundled migration runner (packages/server/src/setup/migration-runner.js)
45
+ -- also checksums applied migrations into mnestra_migrations (table from
46
+ -- 020) and skips re-application by filename, so the in-runner path is
47
+ -- idempotent at two layers.
48
+ --
49
+ -- RLS posture:
50
+ -- memory_items has RLS enabled (per migration 019 security hardening), but
51
+ -- service_role bypasses RLS. The migration runner authenticates as
52
+ -- service_role via DATABASE_URL, so the UPDATE lands without policy
53
+ -- changes. This migration does NOT touch policies or roles.
54
+ --
55
+ -- Reversibility:
56
+ -- Down-migration is documented at the bottom (commented). Splitting the
57
+ -- merged set back into three is destructive — once project='claimguard'
58
+ -- replaces the prior values, the row provenance for which tag it ORIGINALLY
59
+ -- carried is gone (no audit column tracks pre-image). Reversal requires
60
+ -- restore from a pg_dump snapshot taken before the migration was applied.
61
+ -- Do NOT attempt heuristic reversal.
62
+ --
63
+ -- Application:
64
+ -- Applied via the bundled migration runner using node-postgres
65
+ -- client.query(). DO blocks + GET DIAGNOSTICS ROW_COUNT (no psql
66
+ -- metacommands — \gset / \echo / etc are not supported in client.query).
67
+ -- Manual fallback: `psql "$DATABASE_URL" -f 021_project_tag_canonicalize_claimguard.sql`.
68
+
69
+ BEGIN;
70
+
71
+ -- ============================================================
72
+ -- AUDIT BEFORE
73
+ -- ============================================================
74
+ DO $$
75
+ DECLARE
76
+ before_claimguard int;
77
+ before_gorgias int;
78
+ before_gorgias_ticket_monitor int;
79
+ before_total_three int;
80
+ BEGIN
81
+ SELECT count(*) INTO before_claimguard
82
+ FROM public.memory_items WHERE project = 'claimguard';
83
+ SELECT count(*) INTO before_gorgias
84
+ FROM public.memory_items WHERE project = 'gorgias';
85
+ SELECT count(*) INTO before_gorgias_ticket_monitor
86
+ FROM public.memory_items WHERE project = 'gorgias-ticket-monitor';
87
+ before_total_three := before_claimguard + before_gorgias + before_gorgias_ticket_monitor;
88
+ RAISE NOTICE '[021-canonicalize] BEFORE claimguard=% gorgias=% gorgias-ticket-monitor=% (sum=%)',
89
+ before_claimguard, before_gorgias, before_gorgias_ticket_monitor, before_total_three;
90
+ END $$;
91
+
92
+ -- ============================================================
93
+ -- CANONICALIZE — gorgias + gorgias-ticket-monitor → claimguard
94
+ --
95
+ -- Single-statement UPDATE on the project column. No content scoping required:
96
+ -- the source tags refer unambiguously to the ClaimGuard project per Sprint 41
97
+ -- T2's analysis (012's prologue) and the SOURCE-BRIEF for Sprint 62 §1.
98
+ -- ============================================================
99
+ DO $$
100
+ DECLARE
101
+ affected_count integer;
102
+ BEGIN
103
+ UPDATE public.memory_items
104
+ SET project = 'claimguard'
105
+ WHERE project IN ('gorgias', 'gorgias-ticket-monitor');
106
+ GET DIAGNOSTICS affected_count = ROW_COUNT;
107
+ RAISE NOTICE '[021-canonicalize] canonicalized % memory_items rows (gorgias + gorgias-ticket-monitor) -> claimguard',
108
+ affected_count;
109
+ END $$;
110
+
111
+ -- ============================================================
112
+ -- AUDIT AFTER + CONSERVATION CHECK
113
+ -- ============================================================
114
+ DO $$
115
+ DECLARE
116
+ after_claimguard int;
117
+ after_gorgias int;
118
+ after_gorgias_ticket_monitor int;
119
+ BEGIN
120
+ SELECT count(*) INTO after_claimguard
121
+ FROM public.memory_items WHERE project = 'claimguard';
122
+ SELECT count(*) INTO after_gorgias
123
+ FROM public.memory_items WHERE project = 'gorgias';
124
+ SELECT count(*) INTO after_gorgias_ticket_monitor
125
+ FROM public.memory_items WHERE project = 'gorgias-ticket-monitor';
126
+ RAISE NOTICE '[021-canonicalize] AFTER claimguard=% gorgias=% gorgias-ticket-monitor=%',
127
+ after_claimguard, after_gorgias, after_gorgias_ticket_monitor;
128
+ IF after_gorgias <> 0 OR after_gorgias_ticket_monitor <> 0 THEN
129
+ RAISE EXCEPTION
130
+ '[021-canonicalize] post-apply invariant violated: expected zero rows in gorgias / gorgias-ticket-monitor, got gorgias=% gorgias-ticket-monitor=%',
131
+ after_gorgias, after_gorgias_ticket_monitor;
132
+ END IF;
133
+ END $$;
134
+
135
+ COMMIT;
136
+
137
+ -- ============================================================
138
+ -- POST-APPLY: verification queries (NOT part of the migration; run separately
139
+ -- to confirm the merge took, the invariant tests stay green, and the recall
140
+ -- path returns the full history). Each query is safe to run repeatedly.
141
+ -- ============================================================
142
+ --
143
+ -- 1. Tag distribution after migration — claimguard should be the only
144
+ -- bucket among the three; gorgias / gorgias-ticket-monitor should be 0:
145
+ -- SELECT project, count(*) FROM public.memory_items
146
+ -- WHERE project IN ('claimguard', 'gorgias', 'gorgias-ticket-monitor')
147
+ -- GROUP BY project ORDER BY project;
148
+ --
149
+ -- 2. Confirm no orphan rows remain under either legacy tag (these should
150
+ -- return 0):
151
+ -- SELECT count(*) FROM public.memory_items
152
+ -- WHERE project IN ('gorgias', 'gorgias-ticket-monitor');
153
+ --
154
+ -- 3. Spot-check that the merged set carries content from all three
155
+ -- historical eras (look for varied dates, varied source_types):
156
+ -- SELECT date_trunc('week', created_at) AS week, count(*)
157
+ -- FROM public.memory_items
158
+ -- WHERE project = 'claimguard'
159
+ -- GROUP BY 1 ORDER BY 1;
160
+ --
161
+ -- 4. Confirm the project-tag invariant test for claimguard would now pass
162
+ -- if un-deferred (rows whose content matches gorgias-ticket-monitor or
163
+ -- Unagi/ identifiers should be top-tagged claimguard):
164
+ -- SELECT project, count(*) FROM public.memory_items
165
+ -- WHERE content ILIKE '%gorgias-ticket-monitor%'
166
+ -- OR content ILIKE '%Unagi/%'
167
+ -- GROUP BY project ORDER BY count(*) DESC LIMIT 5;
168
+ --
169
+ -- DOWN-MIGRATION (manual, NOT auto-applied):
170
+ -- Splitting the merged set back into three is non-trivial (no source-of-
171
+ -- truth on which rows were originally which tag — provenance is lost when
172
+ -- the UPDATE replaces the project string). If a roll-back is needed,
173
+ -- restore from a pg_dump taken before this migration was applied. Do NOT
174
+ -- attempt to reverse via heuristic — the row provenance is destroyed by
175
+ -- the merge.
@@ -0,0 +1,182 @@
1
+ -- 022_source_agent_backfill.sql
2
+ -- Sprint 62 T3 (TermDeck) — backfill source_agent for pre-Sprint-50 NULL rows
3
+ -- where the writer can be inferred from row shape, NOT from content content-marker
4
+ -- inspection. Mnestra 0.4.9 (release-pending; orchestrator bumps at sprint close).
5
+ --
6
+ -- Why this exists:
7
+ -- Sprint 50 introduced source_agent (migration 015). Pre-Sprint-50 rows
8
+ -- have source_agent IS NULL and are silently excluded from filtered
9
+ -- memory_recall queries (per the recall tool's docstring: "NULL-source-
10
+ -- agent rows ... are excluded when this filter is set" — see
11
+ -- src/recall.ts:165-169).
12
+ --
13
+ -- 2026-05-08 production probe: 6,381 of 6,483 active memory_items rows
14
+ -- (~98%) have source_agent IS NULL — far above the SOURCE-BRIEF estimate
15
+ -- of "3,000+". Filtered recall has been blind to most of the corpus for
16
+ -- roughly the entire post-Sprint-50 window.
17
+ --
18
+ -- Migration 015 already backfilled session_summary NULL rows -> 'claude'
19
+ -- (015 lines 48-51), so the NULL universe today is exclusively non-
20
+ -- session_summary types. This migration closes the slice where the
21
+ -- writer can be inferred from row shape (architectural / schema /
22
+ -- structural evidence), and deliberately leaves the remaining slice
23
+ -- NULL — to be reached via the additive include_null_source recall
24
+ -- flag rather than by speculative attribution.
25
+ --
26
+ -- Design principle: row-shape attribution, not content-marker attribution.
27
+ -- The original SOURCE-BRIEF proposed content-marker predicates (ILIKE
28
+ -- '%[T-CODEX]%' etc). Sampling proved this unsafe: 100% of NULL rows
29
+ -- matching codex/gemini/grok markers are Claude *describing* those
30
+ -- agents, never authored by them. Marker == "row mentions agent",
31
+ -- not "row authored by agent".
32
+ --
33
+ -- Instead, this migration attributes by the (source_type, has_path,
34
+ -- has_session) tuple — schema-level fingerprints that map 1:1 to the
35
+ -- writer architecture, and that 50+ randomly-sampled rows confirm.
36
+ --
37
+ -- Predicate plan (each with explicit evidence chain):
38
+ --
39
+ -- A. NULL + source_type IN (decision, bug_fix, architecture, preference,
40
+ -- code_context) -> 'claude'.
41
+ -- Architectural evidence: pre-Sprint-50, only Claude shipped a
42
+ -- memory_remember client. The mcp__memory__memory_remember and
43
+ -- mcp__mnestra__memory_remember surfaces both ran exclusively in
44
+ -- Claude sessions. Codex/Gemini/Grok memory_remember capabilities
45
+ -- did not exist until the Sprint 51 per-agent MCP wiring (see
46
+ -- memory: "MCP server wiring patterns for Codex, Gemini, and Grok
47
+ -- CLIs (verified 2026-05-04 ... follow-up to Sprint 51.6's "Codex
48
+ -- MCP not wired" gap)"). All NULL rows of these source_types are
49
+ -- pre-Sprint-50 and therefore architecturally Claude.
50
+ -- Schema fingerprint: 100% of these rows have source_file_path IS NULL
51
+ -- AND source_session_id IS NULL — bare memory_remember shape.
52
+ -- Sample confirmation: 28-row sample showed 100% Claude-summary writing
53
+ -- pattern (project context, dated entries, file:line evidence — the
54
+ -- recognizable Claude memory_remember signature).
55
+ -- Expected count: 560.
56
+ --
57
+ -- B. NULL + source_type='fact' + source_session_id IS NOT NULL -> 'claude'.
58
+ -- Schema evidence: source_session_id is a Claude session UUID format
59
+ -- (matches the existing claude/session_summary tagged rows; same
60
+ -- shape: has_path=false, has_session=true). The Claude SessionEnd
61
+ -- hook is the only writer that populates source_session_id with a
62
+ -- Claude UUID. Other writers either set source_file_path (rag-extractor)
63
+ -- or leave both NULL (bare memory_remember).
64
+ -- Expected count: 4,587.
65
+ --
66
+ -- D. NULL + source_type='document_chunk' -> 'orchestrator'.
67
+ -- Structural evidence: 951/951 rows have source_file_path set + JSONB
68
+ -- metadata containing chunkIndex + heading keys — unmistakable
69
+ -- rag-system batch-chunker output. The chunker is not an LLM session;
70
+ -- 'orchestrator' is the appropriate non-LLM tag per the source_agent
71
+ -- enum (claude|codex|gemini|grok|orchestrator).
72
+ -- Path buckets:
73
+ -- 513 rows ~/.gemini/antigravity/scratch/* (Gemini scratch docs the
74
+ -- rag-extractor ingested — Gemini wrote the source MD,
75
+ -- but the rag-extractor wrote the row.)
76
+ -- 429 rows ~/Documents/* (project docs ingested directly).
77
+ -- 9 rows ~/.claude/projects/*/memory/MEMORY.md (auto-memory MD
78
+ -- ingested by the rag-extractor).
79
+ -- All four buckets are extractor-written, not LLM-written. The
80
+ -- original document author is preserved in source_file_path; the
81
+ -- row writer is the extractor.
82
+ -- Expected count: 951.
83
+ --
84
+ -- Predicate deliberately NOT applied (response to T4-CODEX 20:43 ET concern):
85
+ -- C. NULL + source_type='fact' + source_session_id IS NULL +
86
+ -- source_file_path IS NULL.
87
+ -- These 283 rows are bare memory_remember calls without session
88
+ -- attribution. Sampling (10 rows) showed 100% Claude content pattern,
89
+ -- but they lack the schema fingerprint that makes A/B/D structurally
90
+ -- definitive — there is no architectural lock that PREVENTS a
91
+ -- non-Claude writer from producing this shape (e.g., a manual psql
92
+ -- insert, a non-MCP REST call, or an early rag-extractor variant
93
+ -- that omitted source_file_path).
94
+ -- Migration 015 lines 24-30 explicitly preserved provenance
95
+ -- uncertainty for non-session_summary historical rows; broad
96
+ -- attribution here would erase that bright line. Per T4-CODEX
97
+ -- AUDIT-CONCERN (Sprint 62, 20:43 ET), these rows stay NULL and
98
+ -- are reached via the additive include_null_source recall path
99
+ -- added in src/recall.ts under this same sprint.
100
+ -- Residual NULL after this migration: 283 rows = 4.4% of corpus.
101
+ -- Acceptance target: <5%. Met.
102
+ --
103
+ -- Total backfill: 6,098 rows (A + B + D). Acceptance: residual NULL < 5%
104
+ -- of corpus (4.4% expected; well under threshold).
105
+ --
106
+ -- What this migration deliberately does NOT do:
107
+ -- * Touch session_summary rows (015 already attributed those).
108
+ -- * Touch already-tagged rows (every UPDATE is gated by source_agent IS NULL).
109
+ -- * Use content-marker predicates (sampling proved unreliable; markers
110
+ -- describe agents, not authors).
111
+ -- * Backfill the inferential-only slice (Predicate C, see above).
112
+ --
113
+ -- Idempotent: every UPDATE has WHERE source_agent IS NULL, so re-running
114
+ -- is a no-op on already-tagged rows. Safe to re-apply.
115
+ --
116
+ -- Reversibility: this migration tags rows but does not modify content,
117
+ -- type, or any other column. To revert (in a future migration), run:
118
+ -- UPDATE public.memory_items
119
+ -- SET source_agent = NULL
120
+ -- WHERE source_agent IN ('claude', 'orchestrator')
121
+ -- AND created_at < '2026-05-09'
122
+ -- AND source_type != 'session_summary'; -- preserve 015's backfill
123
+ --
124
+ -- RLS posture (per global CLAUDE.md RLS hygiene gates 1-5): this is a
125
+ -- DO block, not a CREATE FUNCTION. Runs as the migration runner's role
126
+ -- (service_role, which bypasses RLS). search_path is set explicitly to
127
+ -- defend against schema-shadow attacks during execution. No new policies,
128
+ -- no new function executable surface.
129
+
130
+ set search_path = public, pg_catalog;
131
+
132
+ do $$
133
+ declare
134
+ pred_a integer := 0;
135
+ pred_b integer := 0;
136
+ pred_d integer := 0;
137
+ remaining integer;
138
+ total_rows integer;
139
+ begin
140
+ -- Predicate A: structural attribution by source_type for non-fact, non-document_chunk
141
+ -- types. Architectural lock: pre-Sprint-50 only Claude shipped a memory_remember
142
+ -- client. NULL rows of these types are therefore unambiguously Claude.
143
+ update public.memory_items
144
+ set source_agent = 'claude'
145
+ where source_agent is null
146
+ and source_type in ('decision', 'bug_fix', 'architecture', 'preference', 'code_context');
147
+ get diagnostics pred_a = row_count;
148
+
149
+ -- Predicate B: fact rows with Claude-session attribution. source_session_id
150
+ -- is the Claude SessionEnd hook's UUID; same shape as the existing tagged
151
+ -- claude/session_summary rows.
152
+ update public.memory_items
153
+ set source_agent = 'claude'
154
+ where source_agent is null
155
+ and source_type = 'fact'
156
+ and source_session_id is not null;
157
+ get diagnostics pred_b = row_count;
158
+
159
+ -- Predicate D: rag-system document chunks -> 'orchestrator' (non-LLM batch writer).
160
+ -- All 951 rows carry source_file_path + chunkIndex/heading metadata — the
161
+ -- rag-extractor's deterministic fingerprint.
162
+ update public.memory_items
163
+ set source_agent = 'orchestrator'
164
+ where source_agent is null
165
+ and source_type = 'document_chunk';
166
+ get diagnostics pred_d = row_count;
167
+
168
+ select count(*) into remaining
169
+ from public.memory_items
170
+ where source_agent is null;
171
+
172
+ select count(*) into total_rows from public.memory_items;
173
+
174
+ raise notice '[022] backfill complete: A(claude/typed)=% B(claude/fact+session)=% D(orchestrator/doc_chunk)=% remaining_null=% / % total (acceptance: <5%%)',
175
+ pred_a, pred_b, pred_d, remaining, total_rows;
176
+ raise notice '[022] residual NULL = bare memory_remember fact rows (no session, no path); reach via include_null_source recall flag';
177
+ end$$;
178
+
179
+ -- Refresh the column comment to reflect 015 + 022 together as the partial-
180
+ -- backfill story, and document the residual + the recall flag escape hatch.
181
+ comment on column public.memory_items.source_agent is
182
+ 'Agent that produced this memory: claude|codex|gemini|grok|orchestrator|NULL. Populated at write time by per-agent SessionEnd writers from Sprint 50 onward. Pre-Sprint-50 NULL rows backfilled by migration 015 (session_summary -> claude) and migration 022 (decision/bug_fix/architecture/preference/code_context -> claude; fact w/ source_session_id -> claude; document_chunk -> orchestrator). Residual NULL = bare-call fact rows without session or path attribution; intentionally preserved per migration 015''s provenance bright line. Reach those via memory_recall include_null_source=true.';