@jhizzard/termdeck 0.10.0 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -1
- package/packages/client/public/app.js +42 -3
- package/packages/client/public/graph.html +1 -0
- package/packages/client/public/graph.js +119 -23
- package/packages/client/public/style.css +53 -0
- package/packages/server/src/flashback-diag.js +51 -0
- package/packages/server/src/graph-routes.js +100 -0
- package/packages/server/src/index.js +62 -3
- package/packages/server/src/mnestra-bridge/index.js +63 -9
- package/packages/server/src/session.js +95 -5
- package/packages/server/src/setup/mnestra-migrations/011_project_tag_backfill.sql +237 -0
- package/packages/server/src/setup/mnestra-migrations/012_project_tag_re_taxonomy.sql +397 -0
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
-- Sprint 41 T2 — chopin-nashville re-taxonomy.
|
|
2
|
+
--
|
|
3
|
+
-- Why this exists:
|
|
4
|
+
-- Sprint 39 T3 migration 011_project_tag_backfill.sql moved 192 rows out of
|
|
5
|
+
-- the chopin-nashville bucket (1,139 → 947) using a conservative 5-bucket
|
|
6
|
+
-- keyword pass. 947 rows still remain mis-tagged. Sprint 41 widens the
|
|
7
|
+
-- keyword sets per the new project taxonomy (T1 owns the canonical doc at
|
|
8
|
+
-- docs/PROJECT-TAXONOMY.md) and adds three buckets that 011 did not have:
|
|
9
|
+
-- - chopin-in-bohemia (festival, distinct from the Nashville competition)
|
|
10
|
+
-- - chopin-scheduler (the SchedulingApp / "Maestro" project — single
|
|
11
|
+
-- project under two names per orchestrator
|
|
12
|
+
-- mid-inject clarification 2026-04-28 12:51 ET)
|
|
13
|
+
-- - claimguard (Gorgias-ticket-monitor / ClaimGuard-AI work)
|
|
14
|
+
--
|
|
15
|
+
-- The remaining residue after this migration is what Sprint 41 T4 hands to
|
|
16
|
+
-- the LLM-classification runner. Conservative wins: rows with no clear
|
|
17
|
+
-- keyword signal STAY chopin-nashville for T4 to handle.
|
|
18
|
+
--
|
|
19
|
+
-- What this migration does NOT do:
|
|
20
|
+
-- - Does NOT touch mnestra_session_memory / mnestra_project_memory / etc.
|
|
21
|
+
-- (legacy rag-events tables; different write path; Sprint 42+ cleanup).
|
|
22
|
+
-- - Does NOT consolidate duplicate tags like 'gorgias' vs
|
|
23
|
+
-- 'gorgias-ticket-monitor', 'pvb' vs 'PVB', or 'mnestra' vs 'engram'.
|
|
24
|
+
-- Visible in `SELECT project, count(*) FROM memory_items GROUP BY
|
|
25
|
+
-- project` but a separate cleanup pass.
|
|
26
|
+
-- - Does NOT re-tag rows whose only signal is the legitimate
|
|
27
|
+
-- chopin-nashville vocabulary (competition / performance / jury /
|
|
28
|
+
-- sponsor / applicant / repertoire / Acceptd / NICPC / laureate). Those
|
|
29
|
+
-- are the rows the chopin-nashville tag SHOULD claim.
|
|
30
|
+
-- - Does NOT touch source_session_id → transcript_path → cwd resolution.
|
|
31
|
+
-- The briefing scoped that as a possible additional signal, not
|
|
32
|
+
-- required. Keyword bucketing + T4's LLM pass hits the < 100 target.
|
|
33
|
+
--
|
|
34
|
+
-- Heuristic — content keyword bucketing:
|
|
35
|
+
-- The migration runs UPDATEs sequentially. Earlier buckets claim ambiguous
|
|
36
|
+
-- multi-project rows first; later buckets only see rows that no earlier
|
|
37
|
+
-- bucket has already re-tagged. Order is broadest-first (largest expected
|
|
38
|
+
-- bucket size first):
|
|
39
|
+
--
|
|
40
|
+
-- 1. termdeck — termdeck, mnestra, "4+1 sprint", xterm,
|
|
41
|
+
-- node-pty, flashback, memory_items,
|
|
42
|
+
-- memory_relationships
|
|
43
|
+
-- 2. rumen — rumen, rumen-tick, "insight synthesis"
|
|
44
|
+
-- 3. podium — podium
|
|
45
|
+
-- 4. chopin-in-bohemia — bohemia, "chopin in bohemia", "2026 festival"
|
|
46
|
+
-- 5. chopin-scheduler — scheduling, schedulingapp, \mMaestro\M
|
|
47
|
+
-- (Maestro is the working name; chopin-scheduler
|
|
48
|
+
-- is the canonical tag — alias confirmed 2026-04-28
|
|
49
|
+
-- by Joshua; case-sensitive word-boundary token
|
|
50
|
+
-- avoids matching unrelated "[maestro]" log
|
|
51
|
+
-- prefixes)
|
|
52
|
+
-- 6. pvb — PVB, petvetbid, "pet vet bid"
|
|
53
|
+
-- 7. claimguard — claimguard, gorgias-ticket-monitor,
|
|
54
|
+
-- "gorgias ticket monitor"
|
|
55
|
+
-- 8. dor — \mDOR\M, /DOR/, ~/Documents/DOR, dor.config,
|
|
56
|
+
-- "Rust LLM gateway", openclaw
|
|
57
|
+
-- (reused verbatim from 011's tightened
|
|
58
|
+
-- pattern; word-boundary uppercase rules out
|
|
59
|
+
-- "dormant", "vendored", "indoor", etc.)
|
|
60
|
+
--
|
|
61
|
+
-- Idempotence:
|
|
62
|
+
-- Every UPDATE is gated by `WHERE project = 'chopin-nashville'`. After the
|
|
63
|
+
-- first run, those rows have a different project tag, so re-running this
|
|
64
|
+
-- migration is a no-op (zero rows updated per bucket). RAISE NOTICE on a
|
|
65
|
+
-- re-run will print zeros, which is the expected idempotent signal.
|
|
66
|
+
--
|
|
67
|
+
-- Application:
|
|
68
|
+
-- THIS MIGRATION IS NOT EXECUTED BY THE LANE THAT WROTE IT. Orchestrator
|
|
69
|
+
-- reviews the RAISE NOTICE counts after applying. Apply via the bundled
|
|
70
|
+
-- migration runner at packages/server/src/setup/migration-runner.js (which
|
|
71
|
+
-- uses node-postgres client.query — psql metacommands like \gset are NOT
|
|
72
|
+
-- available, so the count probes use GET DIAGNOSTICS ROW_COUNT inside DO
|
|
73
|
+
-- blocks). Manual fallback:
|
|
74
|
+
-- `psql "$DATABASE_URL" -f 012_project_tag_re_taxonomy.sql`.
|
|
75
|
+
|
|
76
|
+
BEGIN;
|
|
77
|
+
|
|
78
|
+
-- ============================================================
|
|
79
|
+
-- AUDIT BEFORE
|
|
80
|
+
-- ============================================================
|
|
81
|
+
DO $$
|
|
82
|
+
DECLARE
|
|
83
|
+
before_chopin int;
|
|
84
|
+
before_termdeck int;
|
|
85
|
+
before_rumen int;
|
|
86
|
+
before_podium int;
|
|
87
|
+
before_bohemia int;
|
|
88
|
+
before_scheduler int;
|
|
89
|
+
before_pvb int;
|
|
90
|
+
before_claimguard int;
|
|
91
|
+
before_dor int;
|
|
92
|
+
BEGIN
|
|
93
|
+
SELECT count(*) INTO before_chopin FROM memory_items WHERE project = 'chopin-nashville';
|
|
94
|
+
SELECT count(*) INTO before_termdeck FROM memory_items WHERE project = 'termdeck';
|
|
95
|
+
SELECT count(*) INTO before_rumen FROM memory_items WHERE project = 'rumen';
|
|
96
|
+
SELECT count(*) INTO before_podium FROM memory_items WHERE project = 'podium';
|
|
97
|
+
SELECT count(*) INTO before_bohemia FROM memory_items WHERE project = 'chopin-in-bohemia';
|
|
98
|
+
SELECT count(*) INTO before_scheduler FROM memory_items WHERE project = 'chopin-scheduler';
|
|
99
|
+
SELECT count(*) INTO before_pvb FROM memory_items WHERE project = 'pvb';
|
|
100
|
+
SELECT count(*) INTO before_claimguard FROM memory_items WHERE project = 'claimguard';
|
|
101
|
+
SELECT count(*) INTO before_dor FROM memory_items WHERE project = 'dor';
|
|
102
|
+
RAISE NOTICE '[012-retaxonomy] BEFORE chopin-nashville=% termdeck=% rumen=% podium=% chopin-in-bohemia=% chopin-scheduler=% pvb=% claimguard=% dor=%',
|
|
103
|
+
before_chopin, before_termdeck, before_rumen, before_podium, before_bohemia, before_scheduler, before_pvb, before_claimguard, before_dor;
|
|
104
|
+
END $$;
|
|
105
|
+
|
|
106
|
+
-- ============================================================
|
|
107
|
+
-- BUCKET 1 — termdeck (broadest first; claims ambiguous multi-project rows)
|
|
108
|
+
--
|
|
109
|
+
-- Widened from 011's 3-keyword set [termdeck | mnestra | "4+1 sprint"] to
|
|
110
|
+
-- include TermDeck-internal vocabulary that almost never appears outside the
|
|
111
|
+
-- TermDeck stack (xterm, node-pty), the Flashback subsystem name, and the
|
|
112
|
+
-- memory_* table identifiers (which are spoken about in TermDeck/Mnestra
|
|
113
|
+
-- context overwhelmingly — graph-routes, mnestra-bridge, the migrations
|
|
114
|
+
-- themselves).
|
|
115
|
+
-- ============================================================
|
|
116
|
+
DO $$
|
|
117
|
+
DECLARE
|
|
118
|
+
rows_updated int;
|
|
119
|
+
BEGIN
|
|
120
|
+
UPDATE memory_items SET project = 'termdeck'
|
|
121
|
+
WHERE project = 'chopin-nashville'
|
|
122
|
+
AND (
|
|
123
|
+
content ILIKE '%termdeck%'
|
|
124
|
+
OR content ILIKE '%mnestra%'
|
|
125
|
+
OR content ILIKE '%4+1 sprint%'
|
|
126
|
+
OR content ILIKE '%xterm%'
|
|
127
|
+
OR content ILIKE '%node-pty%'
|
|
128
|
+
OR content ILIKE '%flashback%'
|
|
129
|
+
OR content ILIKE '%memory_items%'
|
|
130
|
+
OR content ILIKE '%memory_relationships%'
|
|
131
|
+
);
|
|
132
|
+
GET DIAGNOSTICS rows_updated = ROW_COUNT;
|
|
133
|
+
RAISE NOTICE '[012-retaxonomy] bucket 1 (termdeck): % rows re-tagged', rows_updated;
|
|
134
|
+
END $$;
|
|
135
|
+
|
|
136
|
+
-- ============================================================
|
|
137
|
+
-- BUCKET 2 — rumen
|
|
138
|
+
--
|
|
139
|
+
-- 011 used [rumen] alone. 012 widens to include rumen-tick (the Rumen
|
|
140
|
+
-- cron-tick subsystem) and "insight synthesis" (Rumen's product vocabulary).
|
|
141
|
+
-- ============================================================
|
|
142
|
+
DO $$
|
|
143
|
+
DECLARE
|
|
144
|
+
rows_updated int;
|
|
145
|
+
BEGIN
|
|
146
|
+
UPDATE memory_items SET project = 'rumen'
|
|
147
|
+
WHERE project = 'chopin-nashville'
|
|
148
|
+
AND (
|
|
149
|
+
content ILIKE '%rumen%'
|
|
150
|
+
OR content ILIKE '%rumen-tick%'
|
|
151
|
+
OR content ILIKE '%insight synthesis%'
|
|
152
|
+
);
|
|
153
|
+
GET DIAGNOSTICS rows_updated = ROW_COUNT;
|
|
154
|
+
RAISE NOTICE '[012-retaxonomy] bucket 2 (rumen): % rows re-tagged', rows_updated;
|
|
155
|
+
END $$;
|
|
156
|
+
|
|
157
|
+
-- ============================================================
|
|
158
|
+
-- BUCKET 3 — podium
|
|
159
|
+
--
|
|
160
|
+
-- Same single-keyword pattern as 011. Podium-specific vocabulary doesn't
|
|
161
|
+
-- have synonyms that justify widening. (The Chopin in Bohemia festival
|
|
162
|
+
-- mentions Podium often, but bucket 1's broadest-first ordering means
|
|
163
|
+
-- podium-AND-bohemia rows where podium is the dominant tag claim it here;
|
|
164
|
+
-- bohemia-only rows fall to bucket 4.)
|
|
165
|
+
-- ============================================================
|
|
166
|
+
DO $$
|
|
167
|
+
DECLARE
|
|
168
|
+
rows_updated int;
|
|
169
|
+
BEGIN
|
|
170
|
+
UPDATE memory_items SET project = 'podium'
|
|
171
|
+
WHERE project = 'chopin-nashville'
|
|
172
|
+
AND content ILIKE '%podium%';
|
|
173
|
+
GET DIAGNOSTICS rows_updated = ROW_COUNT;
|
|
174
|
+
RAISE NOTICE '[012-retaxonomy] bucket 3 (podium): % rows re-tagged', rows_updated;
|
|
175
|
+
END $$;
|
|
176
|
+
|
|
177
|
+
-- ============================================================
|
|
178
|
+
-- BUCKET 4 — chopin-in-bohemia (NEW in 012)
|
|
179
|
+
--
|
|
180
|
+
-- The 2026 festival is a distinct project from the Chopin Nashville
|
|
181
|
+
-- competition. Keywords: bohemia (substring; festival-specific), "chopin in
|
|
182
|
+
-- bohemia" (full phrase, near-zero false positives), "2026 festival" (date+
|
|
183
|
+
-- project disambiguator).
|
|
184
|
+
--
|
|
185
|
+
-- Note: rows that mention "Chopin Nashville" AND "Bohemia" together (rare —
|
|
186
|
+
-- maybe cross-project planning notes) will already have been claimed by
|
|
187
|
+
-- earlier buckets if they also mention TermDeck/Rumen/Podium tooling.
|
|
188
|
+
-- Otherwise they land here, which is the right call: the "current festival
|
|
189
|
+
-- being planned" is Bohemia 2026.
|
|
190
|
+
-- ============================================================
|
|
191
|
+
DO $$
|
|
192
|
+
DECLARE
|
|
193
|
+
rows_updated int;
|
|
194
|
+
BEGIN
|
|
195
|
+
UPDATE memory_items SET project = 'chopin-in-bohemia'
|
|
196
|
+
WHERE project = 'chopin-nashville'
|
|
197
|
+
AND (
|
|
198
|
+
content ILIKE '%bohemia%'
|
|
199
|
+
OR content ILIKE '%chopin in bohemia%'
|
|
200
|
+
OR content ILIKE '%2026 festival%'
|
|
201
|
+
);
|
|
202
|
+
GET DIAGNOSTICS rows_updated = ROW_COUNT;
|
|
203
|
+
RAISE NOTICE '[012-retaxonomy] bucket 4 (chopin-in-bohemia): % rows re-tagged', rows_updated;
|
|
204
|
+
END $$;
|
|
205
|
+
|
|
206
|
+
-- ============================================================
|
|
207
|
+
-- BUCKET 5 — chopin-scheduler (NEW in 012; absorbs Maestro alias)
|
|
208
|
+
--
|
|
209
|
+
-- Per orchestrator clarification 2026-04-28 12:51 ET: "Maestro" is the
|
|
210
|
+
-- working/branding name for the chopin-scheduler project. Same project,
|
|
211
|
+
-- two names. The on-disk path is SchedulingApp/, so the keywords cover both
|
|
212
|
+
-- the path-style identifier (scheduling, schedulingapp) and the branding
|
|
213
|
+
-- alias (\mMaestro\M — POSIX word-boundary, case-sensitive Capitalized
|
|
214
|
+
-- token).
|
|
215
|
+
--
|
|
216
|
+
-- The case-sensitive Maestro pattern matters: lowercase "maestro" can
|
|
217
|
+
-- appear in unrelated content (log prefixes like "[maestro]" if any tool
|
|
218
|
+
-- ever named itself that, generic music vocabulary). Capitalized Maestro
|
|
219
|
+
-- with word boundaries is much closer to "the project name" intent.
|
|
220
|
+
-- ============================================================
|
|
221
|
+
DO $$
|
|
222
|
+
DECLARE
|
|
223
|
+
rows_updated int;
|
|
224
|
+
BEGIN
|
|
225
|
+
UPDATE memory_items SET project = 'chopin-scheduler'
|
|
226
|
+
WHERE project = 'chopin-nashville'
|
|
227
|
+
AND (
|
|
228
|
+
content ILIKE '%scheduling%'
|
|
229
|
+
OR content ILIKE '%schedulingapp%'
|
|
230
|
+
OR content ~ '\mMaestro\M'
|
|
231
|
+
);
|
|
232
|
+
GET DIAGNOSTICS rows_updated = ROW_COUNT;
|
|
233
|
+
RAISE NOTICE '[012-retaxonomy] bucket 5 (chopin-scheduler): % rows re-tagged', rows_updated;
|
|
234
|
+
END $$;
|
|
235
|
+
|
|
236
|
+
-- ============================================================
|
|
237
|
+
-- BUCKET 6 — pvb (case-insensitive PVB / petvetbid markers)
|
|
238
|
+
--
|
|
239
|
+
-- Same pattern as 011 bucket 4. PVB is small in the chopin-nashville bucket
|
|
240
|
+
-- (Sprint 39 dry-run found 7 rows; live apply landed 3 because bucket 1
|
|
241
|
+
-- claimed mnestra-AND-PVB rows first). 012's earlier expansion of bucket 1
|
|
242
|
+
-- means this stays small or zero.
|
|
243
|
+
-- ============================================================
|
|
244
|
+
DO $$
|
|
245
|
+
DECLARE
|
|
246
|
+
rows_updated int;
|
|
247
|
+
BEGIN
|
|
248
|
+
UPDATE memory_items SET project = 'pvb'
|
|
249
|
+
WHERE project = 'chopin-nashville'
|
|
250
|
+
AND (
|
|
251
|
+
content ILIKE '%PVB%'
|
|
252
|
+
OR content ILIKE '%petvetbid%'
|
|
253
|
+
OR content ILIKE '%pet vet bid%'
|
|
254
|
+
);
|
|
255
|
+
GET DIAGNOSTICS rows_updated = ROW_COUNT;
|
|
256
|
+
RAISE NOTICE '[012-retaxonomy] bucket 6 (pvb): % rows re-tagged', rows_updated;
|
|
257
|
+
END $$;
|
|
258
|
+
|
|
259
|
+
-- ============================================================
|
|
260
|
+
-- BUCKET 7 — claimguard (NEW in 012)
|
|
261
|
+
--
|
|
262
|
+
-- ClaimGuard-AI is the active Unagi project (Joshua's roadmap shows it as
|
|
263
|
+
-- the next 1-2 sprints after Sprint 41 ships). On-disk path is
|
|
264
|
+
-- ~/Documents/Unagi/gorgias-ticket-monitor/. Keywords:
|
|
265
|
+
-- - claimguard (substring; product name, near-zero false positives)
|
|
266
|
+
-- - gorgias-ticket-monitor (the on-disk dir name; near-zero FP)
|
|
267
|
+
-- - "gorgias ticket monitor" (the spoken-form variant)
|
|
268
|
+
--
|
|
269
|
+
-- The bare "gorgias" keyword is intentionally NOT used here because the
|
|
270
|
+
-- pre-existing `gorgias` tag (468 rows) and `gorgias-ticket-monitor` tag
|
|
271
|
+
-- (207 rows) are separate categories — bare "gorgias" content could be
|
|
272
|
+
-- about Gorgias-the-helpdesk-product unrelated to ClaimGuard. The
|
|
273
|
+
-- compound-token discipline keeps the bucket precise.
|
|
274
|
+
-- ============================================================
|
|
275
|
+
DO $$
|
|
276
|
+
DECLARE
|
|
277
|
+
rows_updated int;
|
|
278
|
+
BEGIN
|
|
279
|
+
UPDATE memory_items SET project = 'claimguard'
|
|
280
|
+
WHERE project = 'chopin-nashville'
|
|
281
|
+
AND (
|
|
282
|
+
content ILIKE '%claimguard%'
|
|
283
|
+
OR content ILIKE '%gorgias-ticket-monitor%'
|
|
284
|
+
OR content ILIKE '%gorgias ticket monitor%'
|
|
285
|
+
);
|
|
286
|
+
GET DIAGNOSTICS rows_updated = ROW_COUNT;
|
|
287
|
+
RAISE NOTICE '[012-retaxonomy] bucket 7 (claimguard): % rows re-tagged', rows_updated;
|
|
288
|
+
END $$;
|
|
289
|
+
|
|
290
|
+
-- ============================================================
|
|
291
|
+
-- BUCKET 8 — dor (REUSED VERBATIM from 011's tightened pattern)
|
|
292
|
+
--
|
|
293
|
+
-- 011's audit found that the original briefing's `%dor%` ILIKE pattern
|
|
294
|
+
-- produced ~33% false positives (matched "dormant", "vendored", "indoor",
|
|
295
|
+
-- etc.). 011 tightened to:
|
|
296
|
+
-- - POSIX word boundary `\mDOR\M` — case-sensitive uppercase only
|
|
297
|
+
-- - path/identifier markers: /DOR/, ~/Documents/DOR, dor.config,
|
|
298
|
+
-- "Rust LLM gateway" (DOR's tagline)
|
|
299
|
+
-- - openclaw substring (OpenClaw is the slack-channel automation product
|
|
300
|
+
-- that lives next to DOR in Joshua's stack)
|
|
301
|
+
--
|
|
302
|
+
-- 012 reuses this verbatim. After 011 caught 3 dor rows live, residue is
|
|
303
|
+
-- expected to be near-zero — but the bucket stays in case any new rows
|
|
304
|
+
-- accumulated post-Sprint-39 carry the markers.
|
|
305
|
+
-- ============================================================
|
|
306
|
+
DO $$
|
|
307
|
+
DECLARE
|
|
308
|
+
rows_updated int;
|
|
309
|
+
BEGIN
|
|
310
|
+
UPDATE memory_items SET project = 'dor'
|
|
311
|
+
WHERE project = 'chopin-nashville'
|
|
312
|
+
AND (
|
|
313
|
+
content ~ '\mDOR\M'
|
|
314
|
+
OR content ILIKE '%/DOR/%'
|
|
315
|
+
OR content ILIKE '%~/Documents/DOR%'
|
|
316
|
+
OR content ILIKE '%dor.config%'
|
|
317
|
+
OR content ILIKE '%Rust LLM gateway%'
|
|
318
|
+
OR content ILIKE '%openclaw%'
|
|
319
|
+
);
|
|
320
|
+
GET DIAGNOSTICS rows_updated = ROW_COUNT;
|
|
321
|
+
RAISE NOTICE '[012-retaxonomy] bucket 8 (dor): % rows re-tagged', rows_updated;
|
|
322
|
+
END $$;
|
|
323
|
+
|
|
324
|
+
-- ============================================================
|
|
325
|
+
-- AUDIT AFTER
|
|
326
|
+
-- ============================================================
|
|
327
|
+
DO $$
|
|
328
|
+
DECLARE
|
|
329
|
+
after_chopin int;
|
|
330
|
+
after_termdeck int;
|
|
331
|
+
after_rumen int;
|
|
332
|
+
after_podium int;
|
|
333
|
+
after_bohemia int;
|
|
334
|
+
after_scheduler int;
|
|
335
|
+
after_pvb int;
|
|
336
|
+
after_claimguard int;
|
|
337
|
+
after_dor int;
|
|
338
|
+
BEGIN
|
|
339
|
+
SELECT count(*) INTO after_chopin FROM memory_items WHERE project = 'chopin-nashville';
|
|
340
|
+
SELECT count(*) INTO after_termdeck FROM memory_items WHERE project = 'termdeck';
|
|
341
|
+
SELECT count(*) INTO after_rumen FROM memory_items WHERE project = 'rumen';
|
|
342
|
+
SELECT count(*) INTO after_podium FROM memory_items WHERE project = 'podium';
|
|
343
|
+
SELECT count(*) INTO after_bohemia FROM memory_items WHERE project = 'chopin-in-bohemia';
|
|
344
|
+
SELECT count(*) INTO after_scheduler FROM memory_items WHERE project = 'chopin-scheduler';
|
|
345
|
+
SELECT count(*) INTO after_pvb FROM memory_items WHERE project = 'pvb';
|
|
346
|
+
SELECT count(*) INTO after_claimguard FROM memory_items WHERE project = 'claimguard';
|
|
347
|
+
SELECT count(*) INTO after_dor FROM memory_items WHERE project = 'dor';
|
|
348
|
+
RAISE NOTICE '[012-retaxonomy] AFTER chopin-nashville=% termdeck=% rumen=% podium=% chopin-in-bohemia=% chopin-scheduler=% pvb=% claimguard=% dor=%',
|
|
349
|
+
after_chopin, after_termdeck, after_rumen, after_podium, after_bohemia, after_scheduler, after_pvb, after_claimguard, after_dor;
|
|
350
|
+
RAISE NOTICE '[012-retaxonomy] Sprint 41 acceptance target: chopin-nashville drops 947 -> < 100 after T2+T4. T2 (this migration) handles deterministic keyword cases; T4 LLM-classifies the residue. If chopin-nashville count after this migration is still > 200, T4 has more rows to chew through; if < 100 already, T4 may have very little to do.';
|
|
351
|
+
END $$;
|
|
352
|
+
|
|
353
|
+
COMMIT;
|
|
354
|
+
|
|
355
|
+
-- ============================================================
|
|
356
|
+
-- POST-APPLY: optional verification queries (NOT part of the migration).
|
|
357
|
+
-- Run separately to confirm the new taxonomy holds and to spot-check
|
|
358
|
+
-- false-positive rates per bucket.
|
|
359
|
+
-- ============================================================
|
|
360
|
+
--
|
|
361
|
+
-- 1. Tag distribution after migration:
|
|
362
|
+
-- SELECT project, count(*) FROM memory_items
|
|
363
|
+
-- GROUP BY project ORDER BY count(*) DESC LIMIT 20;
|
|
364
|
+
--
|
|
365
|
+
-- 2. Confirm no chopin-nashville rows match obvious termdeck/rumen/podium
|
|
366
|
+
-- keywords (these should all return 0 if the migration succeeded):
|
|
367
|
+
-- SELECT count(*) FROM memory_items
|
|
368
|
+
-- WHERE project='chopin-nashville'
|
|
369
|
+
-- AND (content ILIKE '%termdeck%' OR content ILIKE '%rumen%'
|
|
370
|
+
-- OR content ILIKE '%podium%' OR content ILIKE '%bohemia%'
|
|
371
|
+
-- OR content ILIKE '%scheduling%' OR content ILIKE '%claimguard%');
|
|
372
|
+
-- -- Expected: 0
|
|
373
|
+
--
|
|
374
|
+
-- 3. Spot-check false-positive rate per bucket (replace 'termdeck' with
|
|
375
|
+
-- each new tag in turn):
|
|
376
|
+
-- SELECT id, left(content, 200) AS preview
|
|
377
|
+
-- FROM memory_items
|
|
378
|
+
-- WHERE project='termdeck' AND id IN (
|
|
379
|
+
-- SELECT id FROM memory_items
|
|
380
|
+
-- WHERE project='termdeck'
|
|
381
|
+
-- ORDER BY updated_at DESC LIMIT 10
|
|
382
|
+
-- );
|
|
383
|
+
--
|
|
384
|
+
-- 4. Confirm the legitimate-chopin-nashville signal is preserved (rows
|
|
385
|
+
-- matching competition/laureate/applicant/Acceptd/NICPC/Bohemia/
|
|
386
|
+
-- repertoire keywords should still be tagged chopin-nashville,
|
|
387
|
+
-- EXCEPT for those that ALSO matched a code-project keyword and got
|
|
388
|
+
-- legitimately re-tagged):
|
|
389
|
+
-- SELECT count(*) FROM memory_items
|
|
390
|
+
-- WHERE project='chopin-nashville'
|
|
391
|
+
-- AND (content ILIKE '%competition%' OR content ILIKE '%laureate%'
|
|
392
|
+
-- OR content ILIKE '%applicant%' OR content ILIKE '%Acceptd%'
|
|
393
|
+
-- OR content ILIKE '%NICPC%' OR content ILIKE '%repertoire%'
|
|
394
|
+
-- OR content ILIKE '%jury%');
|
|
395
|
+
-- -- Expected: most of the residue (~71+ rows from Sprint 39 baseline,
|
|
396
|
+
-- -- possibly higher as more legitimate competition content has
|
|
397
|
+
-- -- accumulated since 2026-04-27).
|