@jhizzard/termdeck 0.10.0 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,397 @@
1
+ -- Sprint 41 T2 — chopin-nashville re-taxonomy.
2
+ --
3
+ -- Why this exists:
4
+ -- Sprint 39 T3 migration 011_project_tag_backfill.sql moved 192 rows out of
5
+ -- the chopin-nashville bucket (1,139 → 947) using a conservative 5-bucket
6
+ -- keyword pass. 947 rows still remain mis-tagged. Sprint 41 widens the
7
+ -- keyword sets per the new project taxonomy (T1 owns the canonical doc at
8
+ -- docs/PROJECT-TAXONOMY.md) and adds three buckets that 011 did not have:
9
+ -- - chopin-in-bohemia (festival, distinct from the Nashville competition)
10
+ -- - chopin-scheduler (the SchedulingApp / "Maestro" project — single
11
+ -- project under two names per orchestrator
12
+ -- mid-inject clarification 2026-04-28 12:51 ET)
13
+ -- - claimguard (Gorgias-ticket-monitor / ClaimGuard-AI work)
14
+ --
15
+ -- The remaining residue after this migration is what Sprint 41 T4 hands to
16
+ -- the LLM-classification runner. Conservative wins: rows with no clear
17
+ -- keyword signal STAY chopin-nashville for T4 to handle.
18
+ --
19
+ -- What this migration does NOT do:
20
+ -- - Does NOT touch mnestra_session_memory / mnestra_project_memory / etc.
21
+ -- (legacy rag-events tables; different write path; Sprint 42+ cleanup).
22
+ -- - Does NOT consolidate duplicate tags like 'gorgias' vs
23
+ -- 'gorgias-ticket-monitor', 'pvb' vs 'PVB', or 'mnestra' vs 'engram'.
24
+ -- Visible in `SELECT project, count(*) FROM memory_items GROUP BY
25
+ -- project` but a separate cleanup pass.
26
+ -- - Does NOT re-tag rows whose only signal is the legitimate
27
+ -- chopin-nashville vocabulary (competition / performance / jury /
28
+ -- sponsor / applicant / repertoire / Acceptd / NICPC / laureate). Those
29
+ -- are the rows the chopin-nashville tag SHOULD claim.
30
+ -- - Does NOT touch source_session_id → transcript_path → cwd resolution.
31
+ -- The briefing scoped that as a possible additional signal, not
32
+ -- required. Keyword bucketing + T4's LLM pass hits the < 100 target.
33
+ --
34
+ -- Heuristic — content keyword bucketing:
35
+ -- The migration runs UPDATEs sequentially. Earlier buckets claim ambiguous
36
+ -- multi-project rows first; later buckets only see rows that no earlier
37
+ -- bucket has already re-tagged. Order is broadest-first (largest expected
38
+ -- bucket size first):
39
+ --
40
+ -- 1. termdeck — termdeck, mnestra, "4+1 sprint", xterm,
41
+ -- node-pty, flashback, memory_items,
42
+ -- memory_relationships
43
+ -- 2. rumen — rumen, rumen-tick, "insight synthesis"
44
+ -- 3. podium — podium
45
+ -- 4. chopin-in-bohemia — bohemia, "chopin in bohemia", "2026 festival"
46
+ -- 5. chopin-scheduler — scheduling, schedulingapp, \mMaestro\M
47
+ -- (Maestro is the working name; chopin-scheduler
48
+ -- is the canonical tag — alias confirmed 2026-04-28
49
+ -- by Joshua; case-sensitive word-boundary token
50
+ -- avoids matching unrelated "[maestro]" log
51
+ -- prefixes)
52
+ -- 6. pvb — PVB, petvetbid, "pet vet bid"
53
+ -- 7. claimguard — claimguard, gorgias-ticket-monitor,
54
+ -- "gorgias ticket monitor"
55
+ -- 8. dor — \mDOR\M, /DOR/, ~/Documents/DOR, dor.config,
56
+ -- "Rust LLM gateway", openclaw
57
+ -- (reused verbatim from 011's tightened
58
+ -- pattern; word-boundary uppercase rules out
59
+ -- "dormant", "vendored", "indoor", etc.)
60
+ --
61
+ -- Idempotence:
62
+ -- Every UPDATE is gated by `WHERE project = 'chopin-nashville'`. After the
63
+ -- first run, those rows have a different project tag, so re-running this
64
+ -- migration is a no-op (zero rows updated per bucket). RAISE NOTICE on a
65
+ -- re-run will print zeros, which is the expected idempotent signal.
66
+ --
67
+ -- Application:
68
+ -- THIS MIGRATION IS NOT EXECUTED BY THE LANE THAT WROTE IT. Orchestrator
69
+ -- reviews the RAISE NOTICE counts after applying. Apply via the bundled
70
+ -- migration runner at packages/server/src/setup/migration-runner.js (which
71
+ -- uses node-postgres client.query — psql metacommands like \gset are NOT
72
+ -- available, so the count probes use GET DIAGNOSTICS ROW_COUNT inside DO
73
+ -- blocks). Manual fallback:
74
+ -- `psql "$DATABASE_URL" -f 012_project_tag_re_taxonomy.sql`.
75
+
76
+ BEGIN;
77
+
78
+ -- ============================================================
79
+ -- AUDIT BEFORE
80
+ -- ============================================================
81
+ DO $$
82
+ DECLARE
83
+ before_chopin int;
84
+ before_termdeck int;
85
+ before_rumen int;
86
+ before_podium int;
87
+ before_bohemia int;
88
+ before_scheduler int;
89
+ before_pvb int;
90
+ before_claimguard int;
91
+ before_dor int;
92
+ BEGIN
93
+ SELECT count(*) INTO before_chopin FROM memory_items WHERE project = 'chopin-nashville';
94
+ SELECT count(*) INTO before_termdeck FROM memory_items WHERE project = 'termdeck';
95
+ SELECT count(*) INTO before_rumen FROM memory_items WHERE project = 'rumen';
96
+ SELECT count(*) INTO before_podium FROM memory_items WHERE project = 'podium';
97
+ SELECT count(*) INTO before_bohemia FROM memory_items WHERE project = 'chopin-in-bohemia';
98
+ SELECT count(*) INTO before_scheduler FROM memory_items WHERE project = 'chopin-scheduler';
99
+ SELECT count(*) INTO before_pvb FROM memory_items WHERE project = 'pvb';
100
+ SELECT count(*) INTO before_claimguard FROM memory_items WHERE project = 'claimguard';
101
+ SELECT count(*) INTO before_dor FROM memory_items WHERE project = 'dor';
102
+ RAISE NOTICE '[012-retaxonomy] BEFORE chopin-nashville=% termdeck=% rumen=% podium=% chopin-in-bohemia=% chopin-scheduler=% pvb=% claimguard=% dor=%',
103
+ before_chopin, before_termdeck, before_rumen, before_podium, before_bohemia, before_scheduler, before_pvb, before_claimguard, before_dor;
104
+ END $$;
105
+
106
+ -- ============================================================
107
+ -- BUCKET 1 — termdeck (broadest first; claims ambiguous multi-project rows)
108
+ --
109
+ -- Widened from 011's 3-keyword set [termdeck | mnestra | "4+1 sprint"] to
110
+ -- include TermDeck-internal vocabulary that almost never appears outside the
111
+ -- TermDeck stack (xterm, node-pty), the Flashback subsystem name, and the
112
+ -- memory_* table identifiers (which are spoken about in TermDeck/Mnestra
113
+ -- context overwhelmingly — graph-routes, mnestra-bridge, the migrations
114
+ -- themselves).
115
+ -- ============================================================
116
+ DO $$
117
+ DECLARE
118
+ rows_updated int;
119
+ BEGIN
120
+ UPDATE memory_items SET project = 'termdeck'
121
+ WHERE project = 'chopin-nashville'
122
+ AND (
123
+ content ILIKE '%termdeck%'
124
+ OR content ILIKE '%mnestra%'
125
+ OR content ILIKE '%4+1 sprint%'
126
+ OR content ILIKE '%xterm%'
127
+ OR content ILIKE '%node-pty%'
128
+ OR content ILIKE '%flashback%'
129
+ OR content ILIKE '%memory_items%'
130
+ OR content ILIKE '%memory_relationships%'
131
+ );
132
+ GET DIAGNOSTICS rows_updated = ROW_COUNT;
133
+ RAISE NOTICE '[012-retaxonomy] bucket 1 (termdeck): % rows re-tagged', rows_updated;
134
+ END $$;
135
+
136
+ -- ============================================================
137
+ -- BUCKET 2 — rumen
138
+ --
139
+ -- 011 used [rumen] alone. 012 widens to include rumen-tick (the Rumen
140
+ -- cron-tick subsystem) and "insight synthesis" (Rumen's product vocabulary).
141
+ -- ============================================================
142
+ DO $$
143
+ DECLARE
144
+ rows_updated int;
145
+ BEGIN
146
+ UPDATE memory_items SET project = 'rumen'
147
+ WHERE project = 'chopin-nashville'
148
+ AND (
149
+ content ILIKE '%rumen%'
150
+ OR content ILIKE '%rumen-tick%'
151
+ OR content ILIKE '%insight synthesis%'
152
+ );
153
+ GET DIAGNOSTICS rows_updated = ROW_COUNT;
154
+ RAISE NOTICE '[012-retaxonomy] bucket 2 (rumen): % rows re-tagged', rows_updated;
155
+ END $$;
156
+
157
+ -- ============================================================
158
+ -- BUCKET 3 — podium
159
+ --
160
+ -- Same single-keyword pattern as 011. Podium-specific vocabulary doesn't
161
+ -- have synonyms that justify widening. (The Chopin in Bohemia festival
162
+ -- mentions Podium often, but bucket 1's broadest-first ordering means
163
+ -- podium-AND-bohemia rows where podium is the dominant tag claim it here;
164
+ -- bohemia-only rows fall to bucket 4.)
165
+ -- ============================================================
166
+ DO $$
167
+ DECLARE
168
+ rows_updated int;
169
+ BEGIN
170
+ UPDATE memory_items SET project = 'podium'
171
+ WHERE project = 'chopin-nashville'
172
+ AND content ILIKE '%podium%';
173
+ GET DIAGNOSTICS rows_updated = ROW_COUNT;
174
+ RAISE NOTICE '[012-retaxonomy] bucket 3 (podium): % rows re-tagged', rows_updated;
175
+ END $$;
176
+
177
+ -- ============================================================
178
+ -- BUCKET 4 — chopin-in-bohemia (NEW in 012)
179
+ --
180
+ -- The 2026 festival is a distinct project from the Chopin Nashville
181
+ -- competition. Keywords: bohemia (substring; festival-specific), "chopin in
182
+ -- bohemia" (full phrase, near-zero false positives), "2026 festival" (date+
183
+ -- project disambiguator).
184
+ --
185
+ -- Note: rows that mention "Chopin Nashville" AND "Bohemia" together (rare —
186
+ -- maybe cross-project planning notes) will already have been claimed by
187
+ -- earlier buckets if they also mention TermDeck/Rumen/Podium tooling.
188
+ -- Otherwise they land here, which is the right call: the "current festival
189
+ -- being planned" is Bohemia 2026.
190
+ -- ============================================================
191
+ DO $$
192
+ DECLARE
193
+ rows_updated int;
194
+ BEGIN
195
+ UPDATE memory_items SET project = 'chopin-in-bohemia'
196
+ WHERE project = 'chopin-nashville'
197
+ AND (
198
+ content ILIKE '%bohemia%'
199
+ OR content ILIKE '%chopin in bohemia%'
200
+ OR content ILIKE '%2026 festival%'
201
+ );
202
+ GET DIAGNOSTICS rows_updated = ROW_COUNT;
203
+ RAISE NOTICE '[012-retaxonomy] bucket 4 (chopin-in-bohemia): % rows re-tagged', rows_updated;
204
+ END $$;
205
+
206
+ -- ============================================================
207
+ -- BUCKET 5 — chopin-scheduler (NEW in 012; absorbs Maestro alias)
208
+ --
209
+ -- Per orchestrator clarification 2026-04-28 12:51 ET: "Maestro" is the
210
+ -- working/branding name for the chopin-scheduler project. Same project,
211
+ -- two names. The on-disk path is SchedulingApp/, so the keywords cover both
212
+ -- the path-style identifier (scheduling, schedulingapp) and the branding
213
+ -- alias (\mMaestro\M — POSIX word-boundary, case-sensitive Capitalized
214
+ -- token).
215
+ --
216
+ -- The case-sensitive Maestro pattern matters: lowercase "maestro" can
217
+ -- appear in unrelated content (log prefixes like "[maestro]" if any tool
218
+ -- ever named itself that, generic music vocabulary). Capitalized Maestro
219
+ -- with word boundaries is much closer to "the project name" intent.
220
+ -- ============================================================
221
+ DO $$
222
+ DECLARE
223
+ rows_updated int;
224
+ BEGIN
225
+ UPDATE memory_items SET project = 'chopin-scheduler'
226
+ WHERE project = 'chopin-nashville'
227
+ AND (
228
+ content ILIKE '%scheduling%'
229
+ OR content ILIKE '%schedulingapp%'
230
+ OR content ~ '\mMaestro\M'
231
+ );
232
+ GET DIAGNOSTICS rows_updated = ROW_COUNT;
233
+ RAISE NOTICE '[012-retaxonomy] bucket 5 (chopin-scheduler): % rows re-tagged', rows_updated;
234
+ END $$;
235
+
236
+ -- ============================================================
237
+ -- BUCKET 6 — pvb (case-insensitive PVB / petvetbid markers)
238
+ --
239
+ -- Same pattern as 011 bucket 4. PVB is small in the chopin-nashville bucket
240
+ -- (Sprint 39 dry-run found 7 rows; live apply landed 3 because bucket 1
241
+ -- claimed mnestra-AND-PVB rows first). 012's earlier expansion of bucket 1
242
+ -- means this stays small or zero.
243
+ -- ============================================================
244
+ DO $$
245
+ DECLARE
246
+ rows_updated int;
247
+ BEGIN
248
+ UPDATE memory_items SET project = 'pvb'
249
+ WHERE project = 'chopin-nashville'
250
+ AND (
251
+ content ILIKE '%PVB%'
252
+ OR content ILIKE '%petvetbid%'
253
+ OR content ILIKE '%pet vet bid%'
254
+ );
255
+ GET DIAGNOSTICS rows_updated = ROW_COUNT;
256
+ RAISE NOTICE '[012-retaxonomy] bucket 6 (pvb): % rows re-tagged', rows_updated;
257
+ END $$;
258
+
259
+ -- ============================================================
260
+ -- BUCKET 7 — claimguard (NEW in 012)
261
+ --
262
+ -- ClaimGuard-AI is the active Unagi project (Joshua's roadmap shows it as
263
+ -- the next 1-2 sprints after Sprint 41 ships). On-disk path is
264
+ -- ~/Documents/Unagi/gorgias-ticket-monitor/. Keywords:
265
+ -- - claimguard (substring; product name, near-zero false positives)
266
+ -- - gorgias-ticket-monitor (the on-disk dir name; near-zero FP)
267
+ -- - "gorgias ticket monitor" (the spoken-form variant)
268
+ --
269
+ -- The bare "gorgias" keyword is intentionally NOT used here because the
270
+ -- pre-existing `gorgias` tag (468 rows) and `gorgias-ticket-monitor` tag
271
+ -- (207 rows) are separate categories — bare "gorgias" content could be
272
+ -- about Gorgias-the-helpdesk-product unrelated to ClaimGuard. The
273
+ -- compound-token discipline keeps the bucket precise.
274
+ -- ============================================================
275
+ DO $$
276
+ DECLARE
277
+ rows_updated int;
278
+ BEGIN
279
+ UPDATE memory_items SET project = 'claimguard'
280
+ WHERE project = 'chopin-nashville'
281
+ AND (
282
+ content ILIKE '%claimguard%'
283
+ OR content ILIKE '%gorgias-ticket-monitor%'
284
+ OR content ILIKE '%gorgias ticket monitor%'
285
+ );
286
+ GET DIAGNOSTICS rows_updated = ROW_COUNT;
287
+ RAISE NOTICE '[012-retaxonomy] bucket 7 (claimguard): % rows re-tagged', rows_updated;
288
+ END $$;
289
+
290
+ -- ============================================================
291
+ -- BUCKET 8 — dor (REUSED VERBATIM from 011's tightened pattern)
292
+ --
293
+ -- 011's audit found that the original briefing's `%dor%` ILIKE pattern
294
+ -- produced ~33% false positives (matched "dormant", "vendored", "indoor",
295
+ -- etc.). 011 tightened to:
296
+ -- - POSIX word boundary `\mDOR\M` — case-sensitive uppercase only
297
+ -- - path/identifier markers: /DOR/, ~/Documents/DOR, dor.config,
298
+ -- "Rust LLM gateway" (DOR's tagline)
299
+ -- - openclaw substring (OpenClaw is the slack-channel automation product
300
+ -- that lives next to DOR in Joshua's stack)
301
+ --
302
+ -- 012 reuses this verbatim. After 011 caught 3 dor rows live, residue is
303
+ -- expected to be near-zero — but the bucket stays in case any new rows
304
+ -- accumulated post-Sprint-39 carry the markers.
305
+ -- ============================================================
306
+ DO $$
307
+ DECLARE
308
+ rows_updated int;
309
+ BEGIN
310
+ UPDATE memory_items SET project = 'dor'
311
+ WHERE project = 'chopin-nashville'
312
+ AND (
313
+ content ~ '\mDOR\M'
314
+ OR content ILIKE '%/DOR/%'
315
+ OR content ILIKE '%~/Documents/DOR%'
316
+ OR content ILIKE '%dor.config%'
317
+ OR content ILIKE '%Rust LLM gateway%'
318
+ OR content ILIKE '%openclaw%'
319
+ );
320
+ GET DIAGNOSTICS rows_updated = ROW_COUNT;
321
+ RAISE NOTICE '[012-retaxonomy] bucket 8 (dor): % rows re-tagged', rows_updated;
322
+ END $$;
323
+
324
+ -- ============================================================
325
+ -- AUDIT AFTER
326
+ -- ============================================================
327
+ DO $$
328
+ DECLARE
329
+ after_chopin int;
330
+ after_termdeck int;
331
+ after_rumen int;
332
+ after_podium int;
333
+ after_bohemia int;
334
+ after_scheduler int;
335
+ after_pvb int;
336
+ after_claimguard int;
337
+ after_dor int;
338
+ BEGIN
339
+ SELECT count(*) INTO after_chopin FROM memory_items WHERE project = 'chopin-nashville';
340
+ SELECT count(*) INTO after_termdeck FROM memory_items WHERE project = 'termdeck';
341
+ SELECT count(*) INTO after_rumen FROM memory_items WHERE project = 'rumen';
342
+ SELECT count(*) INTO after_podium FROM memory_items WHERE project = 'podium';
343
+ SELECT count(*) INTO after_bohemia FROM memory_items WHERE project = 'chopin-in-bohemia';
344
+ SELECT count(*) INTO after_scheduler FROM memory_items WHERE project = 'chopin-scheduler';
345
+ SELECT count(*) INTO after_pvb FROM memory_items WHERE project = 'pvb';
346
+ SELECT count(*) INTO after_claimguard FROM memory_items WHERE project = 'claimguard';
347
+ SELECT count(*) INTO after_dor FROM memory_items WHERE project = 'dor';
348
+ RAISE NOTICE '[012-retaxonomy] AFTER chopin-nashville=% termdeck=% rumen=% podium=% chopin-in-bohemia=% chopin-scheduler=% pvb=% claimguard=% dor=%',
349
+ after_chopin, after_termdeck, after_rumen, after_podium, after_bohemia, after_scheduler, after_pvb, after_claimguard, after_dor;
350
+ RAISE NOTICE '[012-retaxonomy] Sprint 41 acceptance target: chopin-nashville drops 947 -> < 100 after T2+T4. T2 (this migration) handles deterministic keyword cases; T4 LLM-classifies the residue. If chopin-nashville count after this migration is still > 200, T4 has more rows to chew through; if < 100 already, T4 may have very little to do.';
351
+ END $$;
352
+
353
+ COMMIT;
354
+
355
+ -- ============================================================
356
+ -- POST-APPLY: optional verification queries (NOT part of the migration).
357
+ -- Run separately to confirm the new taxonomy holds and to spot-check
358
+ -- false-positive rates per bucket.
359
+ -- ============================================================
360
+ --
361
+ -- 1. Tag distribution after migration:
362
+ -- SELECT project, count(*) FROM memory_items
363
+ -- GROUP BY project ORDER BY count(*) DESC LIMIT 20;
364
+ --
365
+ -- 2. Confirm no chopin-nashville rows match obvious termdeck/rumen/podium
366
+ -- keywords (these should all return 0 if the migration succeeded):
367
+ -- SELECT count(*) FROM memory_items
368
+ -- WHERE project='chopin-nashville'
369
+ -- AND (content ILIKE '%termdeck%' OR content ILIKE '%rumen%'
370
+ -- OR content ILIKE '%podium%' OR content ILIKE '%bohemia%'
371
+ -- OR content ILIKE '%scheduling%' OR content ILIKE '%claimguard%');
372
+ -- -- Expected: 0
373
+ --
374
+ -- 3. Spot-check false-positive rate per bucket (replace 'termdeck' with
375
+ -- each new tag in turn):
376
+ -- SELECT id, left(content, 200) AS preview
377
+ -- FROM memory_items
378
+ -- WHERE project='termdeck' AND id IN (
379
+ -- SELECT id FROM memory_items
380
+ -- WHERE project='termdeck'
381
+ -- ORDER BY updated_at DESC LIMIT 10
382
+ -- );
383
+ --
384
+ -- 4. Confirm the legitimate-chopin-nashville signal is preserved (rows
385
+ -- matching competition/laureate/applicant/Acceptd/NICPC/Bohemia/
386
+ -- repertoire keywords should still be tagged chopin-nashville,
387
+ -- EXCEPT for those that ALSO matched a code-project keyword and got
388
+ -- legitimately re-tagged):
389
+ -- SELECT count(*) FROM memory_items
390
+ -- WHERE project='chopin-nashville'
391
+ -- AND (content ILIKE '%competition%' OR content ILIKE '%laureate%'
392
+ -- OR content ILIKE '%applicant%' OR content ILIKE '%Acceptd%'
393
+ -- OR content ILIKE '%NICPC%' OR content ILIKE '%repertoire%'
394
+ -- OR content ILIKE '%jury%');
395
+ -- -- Expected: most of the residue (~71+ rows from Sprint 39 baseline,
396
+ -- -- possibly higher as more legitimate competition content has
397
+ -- -- accumulated since 2026-04-27).