switchroom 0.13.53 → 0.13.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -271,4 +271,65 @@ describe('validateClientMessage', () => {
271
271
  expect(validateClientMessage({ type: 'heartbeat' })).toBe(false)
272
272
  })
273
273
  })
274
+
275
+ describe('request_ms365_approval (RFC #1873 §8 PR 4)', () => {
276
+ const valid = {
277
+ type: 'request_ms365_approval',
278
+ correlationId: 'abc123',
279
+ agentName: 'clerk',
280
+ preview: { agentName: 'clerk', toolName: 'mcp__ms-365__upload-file-content' },
281
+ ttlMs: 300000,
282
+ }
283
+
284
+ it('accepts a valid request_ms365_approval', () => {
285
+ expect(validateClientMessage(valid)).toBe(true)
286
+ })
287
+
288
+ it('accepts when ttlMs is omitted (handler uses default)', () => {
289
+ const { ttlMs: _, ...without } = valid
290
+ expect(validateClientMessage(without)).toBe(true)
291
+ })
292
+
293
+ it('rejects missing correlationId', () => {
294
+ const { correlationId: _, ...m } = valid
295
+ expect(validateClientMessage(m)).toBe(false)
296
+ })
297
+
298
+ it('rejects empty correlationId', () => {
299
+ expect(validateClientMessage({ ...valid, correlationId: '' })).toBe(false)
300
+ })
301
+
302
+ it('rejects oversized correlationId (>64 chars)', () => {
303
+ expect(validateClientMessage({ ...valid, correlationId: 'x'.repeat(65) })).toBe(false)
304
+ })
305
+
306
+ it('rejects missing agentName', () => {
307
+ const { agentName: _, ...m } = valid
308
+ expect(validateClientMessage(m)).toBe(false)
309
+ })
310
+
311
+ it('rejects malformed agentName (caps, spaces)', () => {
312
+ expect(validateClientMessage({ ...valid, agentName: 'NOT-LOWER' })).toBe(false)
313
+ expect(validateClientMessage({ ...valid, agentName: 'has space' })).toBe(false)
314
+ })
315
+
316
+ it('rejects null / non-object preview', () => {
317
+ expect(validateClientMessage({ ...valid, preview: null })).toBe(false)
318
+ expect(validateClientMessage({ ...valid, preview: 'string' })).toBe(false)
319
+ expect(validateClientMessage({ ...valid, preview: 42 })).toBe(false)
320
+ })
321
+
322
+ it('rejects negative ttlMs', () => {
323
+ expect(validateClientMessage({ ...valid, ttlMs: -1 })).toBe(false)
324
+ })
325
+
326
+ it('rejects non-finite ttlMs', () => {
327
+ expect(validateClientMessage({ ...valid, ttlMs: Infinity })).toBe(false)
328
+ expect(validateClientMessage({ ...valid, ttlMs: NaN })).toBe(false)
329
+ })
330
+
331
+ it('rejects non-number ttlMs', () => {
332
+ expect(validateClientMessage({ ...valid, ttlMs: '300000' })).toBe(false)
333
+ })
334
+ })
274
335
  })
@@ -0,0 +1,115 @@
1
+ import { describe, expect, it } from 'vitest'
2
+ import { resolveOutboundTopic } from '../../src/telegram/topic-router.js'
3
+
4
+ /**
5
+ * PR5 — supergroup-mode slash-command smart-split (CPO #4).
6
+ *
7
+ * The gateway wires `runSwitchroomCommand` → `switchroomReply` →
8
+ * `slashCommandReplyOpts(ctx, classification)` → `resolveOutboundTopic`.
9
+ * The helper is a thin classifier on top of the existing router; this
10
+ * test pins the END contract that drives all 4 heavy-output commands
11
+ * (/logs, /audit, /upgradestatus, /memory) and any future mutation
12
+ * additions:
13
+ *
14
+ * - query → follows the originating topic (or undefined for fleet/DM)
15
+ * - mutation → admin alias (or undefined for fleet/DM)
16
+ * - heavy → admin alias (or undefined for fleet/DM)
17
+ *
18
+ * The gateway wrapper additionally collapses `target === originThreadId`
19
+ * back to `{}` so a query in the originating topic doesn't write a
20
+ * redundant `message_thread_id` opt. That's a wire-shape micro-opt
21
+ * tested separately at the call site.
22
+ */
23
+
24
+ describe('PR5 slash-command smart split — router contract', () => {
25
+ const supergroup = {
26
+ default_topic_id: 1,
27
+ topic_aliases: { planning: 17, admin: 31, alerts: 42 },
28
+ }
29
+ const fleet = {} // no chat_id / default_topic_id → fleet/DM
30
+
31
+ describe('query class', () => {
32
+ it('supergroup: follows originThreadId', () => {
33
+ expect(
34
+ resolveOutboundTopic(supergroup, {
35
+ kind: 'command-query',
36
+ originThreadId: 17,
37
+ }),
38
+ ).toBe(17)
39
+ })
40
+
41
+ it('fleet: returns originThreadId unchanged (caller passes-through)', () => {
42
+ expect(
43
+ resolveOutboundTopic(fleet, {
44
+ kind: 'command-query',
45
+ originThreadId: 17,
46
+ }),
47
+ ).toBe(17)
48
+ })
49
+
50
+ it('supergroup, no origin thread (chat root): default_topic_id fallback', () => {
51
+ // command-query returns originThreadId verbatim, including
52
+ // undefined; the wrapper collapses undefined to "no override"
53
+ // and grammY's ctx.reply picks the originating topic anyway.
54
+ expect(
55
+ resolveOutboundTopic(supergroup, {
56
+ kind: 'command-query',
57
+ originThreadId: undefined,
58
+ }),
59
+ ).toBeUndefined()
60
+ })
61
+ })
62
+
63
+ describe('mutation class', () => {
64
+ it('supergroup: routes to admin alias', () => {
65
+ expect(resolveOutboundTopic(supergroup, { kind: 'command-mutation' })).toBe(31)
66
+ })
67
+
68
+ it('supergroup with no admin alias: default_topic_id fallback', () => {
69
+ const cfg = { default_topic_id: 1, topic_aliases: { planning: 17 } }
70
+ expect(resolveOutboundTopic(cfg, { kind: 'command-mutation' })).toBe(1)
71
+ })
72
+
73
+ it('fleet: returns undefined (caller falls through to ctx.reply)', () => {
74
+ expect(resolveOutboundTopic(fleet, { kind: 'command-mutation' })).toBeUndefined()
75
+ })
76
+ })
77
+
78
+ describe('heavy class (the 4 commands actually wired in PR5)', () => {
79
+ it('supergroup: /logs /audit /upgradestatus /memory all route to admin', () => {
80
+ // All four commands fold through the same `slashCommandReplyOpts(ctx, "heavy")`
81
+ // wrapper, which fires the same router event. One assertion covers
82
+ // all of them.
83
+ expect(resolveOutboundTopic(supergroup, { kind: 'command-heavy' })).toBe(31)
84
+ })
85
+
86
+ it('supergroup with no admin alias: default_topic_id fallback', () => {
87
+ const cfg = { default_topic_id: 1, topic_aliases: { planning: 17 } }
88
+ expect(resolveOutboundTopic(cfg, { kind: 'command-heavy' })).toBe(1)
89
+ })
90
+
91
+ it('fleet: returns undefined (caller falls through to ctx.reply)', () => {
92
+ expect(resolveOutboundTopic(fleet, { kind: 'command-heavy' })).toBeUndefined()
93
+ })
94
+ })
95
+
96
+ describe('separation contract: query vs mutation/heavy take different paths', () => {
97
+ // Pins the structural intent: a query and a mutation issued from
98
+ // the SAME originating topic in the SAME supergroup must resolve
99
+ // to DIFFERENT topics. If anyone collapses the three classes back
100
+ // to one event kind, this test fails loudly.
101
+ it('query.originThread !== mutation.adminAlias', () => {
102
+ const q = resolveOutboundTopic(supergroup, {
103
+ kind: 'command-query',
104
+ originThreadId: 17,
105
+ })
106
+ const m = resolveOutboundTopic(supergroup, { kind: 'command-mutation' })
107
+ const h = resolveOutboundTopic(supergroup, { kind: 'command-heavy' })
108
+ expect(q).toBe(17)
109
+ expect(m).toBe(31)
110
+ expect(h).toBe(31)
111
+ expect(q).not.toBe(m)
112
+ expect(m).toBe(h) // mutation and heavy both → admin
113
+ })
114
+ })
115
+ })
@@ -54,6 +54,41 @@ def extract_chat_id_from_prompt(prompt: str) -> Optional[str]:
54
54
  return chat_id or None
55
55
 
56
56
 
57
+ # Switchroom PR6a — extract topic context (chat_id + message_thread_id)
58
+ # from the `<channel ...>` envelope. message_thread_id is present only
59
+ # when the inbound came from a forum topic in a supergroup; for DMs and
60
+ # fleet-shared groups it's absent. Topic alias resolution is the
61
+ # caller's responsibility (env-injected JSON map of thread_id → alias).
62
+ _THREAD_ID_RE = re.compile(
63
+ r"<channel\b[^>]*\bmessage_thread_id=[\"']([^\"']+)[\"']",
64
+ re.IGNORECASE,
65
+ )
66
+
67
+
68
+ def extract_topic_from_prompt(
69
+ prompt: str,
70
+ ) -> tuple[Optional[str], Optional[str]]:
71
+ """Pull (chat_id, message_thread_id) out of the channel envelope.
72
+
73
+ Returns ``(None, None)`` when the prompt isn't channel-wrapped.
74
+ Returns ``(chat_id, None)`` for DMs / non-forum chats where
75
+ `message_thread_id` is absent.
76
+
77
+ Both values are strings (mirroring the wire format — Telegram
78
+ thread_ids are numeric but we keep them as strings for cache-key
79
+ stability and config-map lookups).
80
+ """
81
+ chat_id = extract_chat_id_from_prompt(prompt)
82
+ if chat_id is None:
83
+ return None, None
84
+ head = prompt[:1024] if isinstance(prompt, str) else ""
85
+ tmatch = _THREAD_ID_RE.search(head)
86
+ thread_id = tmatch.group(1).strip() if tmatch else None
87
+ if thread_id == "":
88
+ thread_id = None
89
+ return chat_id, thread_id
90
+
91
+
57
92
  def gateway_socket_path() -> Optional[str]:
58
93
  """Resolve the gateway socket path for the current agent.
59
94
 
@@ -58,7 +58,7 @@ from lib.content import (
58
58
  )
59
59
  from lib.daemon import get_api_url
60
60
  from lib.directives import fetch_active_directives, format_active_directives_block
61
- from lib.gateway_ipc import extract_chat_id_from_prompt, update_placeholder
61
+ from lib.gateway_ipc import extract_chat_id_from_prompt, extract_topic_from_prompt, update_placeholder
62
62
  from lib.state import read_state, write_state
63
63
 
64
64
  LAST_RECALL_STATE = "last_recall.json"
@@ -99,6 +99,70 @@ DEMOTE_TAG_VARIANTS = (
99
99
  "no-recall",
100
100
  )
101
101
 
102
+ # Switchroom PR6 — supergroup-mode topic filter mode.
103
+ #
104
+ # Controls how memories from OTHER topics are surfaced to the model
105
+ # during recall. Default is "soft-preamble": all topic-tagged memories
106
+ # are returned (the model decides relevance via the preamble that names
107
+ # the active topic). "hard-filter" drops any memory whose stored
108
+ # `metadata.thread_id` doesn't match the active prompt's thread_id —
109
+ # the escape hatch if instrumentation shows binding failures (model
110
+ # applying the right memory to the wrong topic).
111
+ #
112
+ # The mode is process-wide via env var. Memories with no thread_id
113
+ # tag (legacy retains pre-PR6, or fleet-shared/DM agents) are NEVER
114
+ # dropped — they pass through both modes regardless of active topic.
115
+ TOPIC_FILTER_MODE_ENV = "HINDSIGHT_TOPIC_FILTER_MODE"
116
+ TOPIC_FILTER_MODES = ("soft-preamble", "hard-filter")
117
+
118
+
119
+ def _topic_filter_mode() -> str:
120
+ raw = os.environ.get(TOPIC_FILTER_MODE_ENV, "").strip().lower()
121
+ if raw in TOPIC_FILTER_MODES:
122
+ return raw
123
+ return "soft-preamble"
124
+
125
+
126
+ def _filter_by_active_topic(results: list, active_thread_id: str | None) -> tuple[list, int]:
127
+ """When hard-filter mode is on AND we know the active thread, drop
128
+ any memory whose stored metadata.thread_id is set to a different
129
+ value. Untagged memories pass through unconditionally.
130
+
131
+ Returns (filtered_results, dropped_count).
132
+ """
133
+ if active_thread_id is None:
134
+ return results, 0
135
+ kept: list = []
136
+ dropped = 0
137
+ for m in results:
138
+ meta = m.get("metadata") if isinstance(m, dict) else None
139
+ if not isinstance(meta, dict):
140
+ kept.append(m)
141
+ continue
142
+ source_thread = meta.get("thread_id")
143
+ if source_thread is None or str(source_thread) == str(active_thread_id):
144
+ kept.append(m)
145
+ else:
146
+ dropped += 1
147
+ return kept, dropped
148
+
149
+
150
+ def _summarise_source_topics(results: list) -> dict:
151
+ """Build a {thread_id: count} summary of recalled memories'
152
+ source topics. Used for instrumented binding-failure analysis
153
+ in the recall log.
154
+ """
155
+ summary: dict = {}
156
+ for m in results:
157
+ meta = m.get("metadata") if isinstance(m, dict) else None
158
+ if not isinstance(meta, dict):
159
+ summary["__untagged__"] = summary.get("__untagged__", 0) + 1
160
+ continue
161
+ tid = meta.get("thread_id")
162
+ key = str(tid) if tid is not None else "__no_thread__"
163
+ summary[key] = summary.get(key, 0) + 1
164
+ return summary
165
+
102
166
  # Switchroom #432 phase 4.3 — recall telemetry log.
103
167
  #
104
168
  # Every recall (cache hit or miss) appends a JSONL record to
@@ -123,15 +187,29 @@ def _cache_ttl_secs() -> int:
123
187
  return 0
124
188
 
125
189
 
126
- def _cache_key(session_id: str, prompt: str, bank_id: str, extra_banks: list) -> str:
190
+ def _cache_key(
191
+ session_id: str,
192
+ prompt: str,
193
+ bank_id: str,
194
+ extra_banks: list,
195
+ active_thread_id: str | None = None,
196
+ ) -> str:
127
197
  """Stable hash for cache keying. Session_id is included so a new
128
198
  session always misses, regardless of the TTL setting. Extra banks
129
- are sorted so list-order doesn't change the key."""
199
+ are sorted so list-order doesn't change the key.
200
+
201
+ PR6a: `active_thread_id` is included so cross-topic prompts in
202
+ supergroup mode (same session, same model, same prompt verbatim
203
+ but different topic) don't collide on the cache. Empty/None
204
+ collapses to the empty string — backward-compatible for
205
+ fleet-shared / DM agents where no thread_id is present.
206
+ """
130
207
  parts = [
131
208
  session_id or "",
132
209
  prompt or "",
133
210
  bank_id or "",
134
211
  ",".join(sorted(extra_banks or [])),
212
+ active_thread_id or "",
135
213
  ]
136
214
  payload = "\x1f".join(parts)
137
215
  return hashlib.sha256(payload.encode("utf-8")).hexdigest()
@@ -458,6 +536,25 @@ def main():
458
536
  if placeholder_chat_id:
459
537
  update_placeholder(placeholder_chat_id, "📚 recalling memories")
460
538
 
539
+ # PR6a — supergroup-mode topic context for the current turn.
540
+ # active_thread_id is the message_thread_id from the inbound
541
+ # envelope, used to (a) key the cache so cross-topic prompts
542
+ # don't collide, (b) optionally hard-filter memories by source
543
+ # topic, and (c) log source-vs-active distribution for
544
+ # binding-failure instrumentation.
545
+ active_chat_id, active_thread_id = extract_topic_from_prompt(prompt)
546
+ active_topic_alias = None
547
+ if active_thread_id is not None:
548
+ aliases_json = os.environ.get("HINDSIGHT_TOPIC_ALIASES_JSON", "")
549
+ if aliases_json:
550
+ try:
551
+ aliases = json.loads(aliases_json)
552
+ if isinstance(aliases, dict):
553
+ inverse = {str(v): k for k, v in aliases.items()}
554
+ active_topic_alias = inverse.get(str(active_thread_id))
555
+ except (json.JSONDecodeError, ValueError, TypeError):
556
+ pass
557
+
461
558
  # Resolve API URL (handles all three connection modes)
462
559
  def _dbg(*a):
463
560
  debug_log(config, *a)
@@ -483,7 +580,7 @@ def main():
483
580
  # Whole-session-scoped, opt-in via HINDSIGHT_RECALL_CACHE_TTL_SECS.
484
581
  cache_ttl = _cache_ttl_secs()
485
582
  cache_key = (
486
- _cache_key(session_id, prompt, bank_id, additional_banks)
583
+ _cache_key(session_id, prompt, bank_id, additional_banks, active_thread_id)
487
584
  if cache_ttl > 0
488
585
  else ""
489
586
  )
@@ -507,6 +604,13 @@ def main():
507
604
  "demoted_count": 0,
508
605
  "capped": False,
509
606
  "cache_hit": True,
607
+ # PR6 — record the active topic on cache hits too so the
608
+ # log is uniformly queryable (cache_key now includes
609
+ # active_thread_id, so a hit means the prior recall was
610
+ # for the same topic — no source_topics inferable here).
611
+ "active_thread_id": active_thread_id,
612
+ "active_topic_alias": active_topic_alias,
613
+ "topic_filter_mode": _topic_filter_mode(),
510
614
  })
511
615
  return
512
616
  debug_log(config, f"Recall cache MISS (key={cache_key[:12]}…)")
@@ -612,6 +716,28 @@ def main():
612
716
  if demoted_count > 0:
613
717
  debug_log(config, f"Filtered {demoted_count} demote-from-recall memories")
614
718
 
719
+ # PR6 — capture source-topic distribution BEFORE optional
720
+ # hard-filter so we can log the would-have-leaked count for
721
+ # binding-failure analysis. Computed unconditionally so the
722
+ # log row carries this for soft-preamble mode too (the
723
+ # whole point is to instrument binding rate over time).
724
+ source_topic_summary = _summarise_source_topics(results)
725
+
726
+ # PR6b — optional hard topic filter. Default soft-preamble (no-op);
727
+ # operator flips HINDSIGHT_TOPIC_FILTER_MODE=hard-filter when
728
+ # binding failures are observed. See _filter_by_active_topic and
729
+ # the TOPIC_FILTER_MODE_ENV comment block above for design notes.
730
+ topic_filter_mode = _topic_filter_mode()
731
+ topic_dropped = 0
732
+ if topic_filter_mode == "hard-filter":
733
+ results, topic_dropped = _filter_by_active_topic(results, active_thread_id)
734
+ if topic_dropped > 0:
735
+ debug_log(
736
+ config,
737
+ f"Topic hard-filter dropped {topic_dropped} cross-topic "
738
+ f"memories (active_thread_id={active_thread_id})",
739
+ )
740
+
615
741
  # Switchroom #475 — lexical-overlap relevance gate. Drops memories
616
742
  # whose Jaccard overlap with the query is below
617
743
  # `recallMinOverlap` (default 0.0 = disabled). Runs after the
@@ -660,9 +786,29 @@ def main():
660
786
  memories_formatted = format_memories(results)
661
787
  preamble = config.get("recallPromptPreamble", "")
662
788
  current_time = format_current_time()
789
+
790
+ # PR6 — supergroup-mode topic preamble (neutral tone per
791
+ # 2026-05-27 product decision). Only added when we know the
792
+ # active topic AND any of the recalled memories carries a
793
+ # thread_id tag — i.e. we have something for the model to
794
+ # be "topic-aware" about. Fleet-shared / DM agents never
795
+ # see this line.
796
+ topic_line = ""
797
+ if active_thread_id is not None and any(
798
+ isinstance(m.get("metadata"), dict)
799
+ and m["metadata"].get("thread_id") is not None
800
+ for m in results
801
+ ):
802
+ topic_label = active_topic_alias or f"thread {active_thread_id}"
803
+ topic_line = (
804
+ f"Current topic: {topic_label}. Recalled memories are "
805
+ f"tagged with their source topic.\n"
806
+ )
807
+
663
808
  memories_block = (
664
809
  f"<hindsight_memories>\n"
665
810
  f"{preamble}\n"
811
+ f"{topic_line}"
666
812
  f"Current time - {current_time}\n\n"
667
813
  f"{memories_formatted}\n"
668
814
  f"</hindsight_memories>"
@@ -732,6 +878,20 @@ def main():
732
878
  if isinstance(m, dict) and m.get("id")
733
879
  ],
734
880
  "cache_hit": False,
881
+ # PR6 — instrumentation for binding-failure analysis.
882
+ # `active_thread_id`: the current prompt's topic (null on
883
+ # DM / fleet-shared). `source_topics`: distribution of
884
+ # source thread_ids in the recall set (before optional
885
+ # hard-filter). `topic_filter_mode`: "soft-preamble" or
886
+ # "hard-filter". `topic_dropped`: count dropped by hard
887
+ # filter. From these fields we can derive the cross-topic
888
+ # recall rate over time and decide whether to flip to
889
+ # hard-filter mode based on real data.
890
+ "active_thread_id": active_thread_id,
891
+ "active_topic_alias": active_topic_alias,
892
+ "source_topics": source_topic_summary,
893
+ "topic_filter_mode": topic_filter_mode,
894
+ "topic_dropped": topic_dropped,
735
895
  })
736
896
 
737
897
  # Output JSON for Claude Code hook system
@@ -225,6 +225,58 @@ def run_retain(hook_input: dict, force: bool = False) -> dict:
225
225
  for k, v in config.get("retainMetadata", {}).items():
226
226
  metadata[k] = _resolve_template(str(v))
227
227
 
228
+ # Switchroom PR6a — topic tagging for supergroup-mode agents.
229
+ # Scan the messages we're retaining for the latest `<channel
230
+ # chat_id=... message_thread_id=...>` envelope and stamp the
231
+ # tuple into metadata. Downstream (recall.py) uses this to log
232
+ # active-vs-source topic for binding-failure analysis and to
233
+ # support hard-filter mode when an operator opts in.
234
+ #
235
+ # No-op for fleet-shared / DM topology where every inbound from
236
+ # this agent carries the same chat_id (or no chat envelope at all
237
+ # for interactive / cron-only sessions) — the metadata is added
238
+ # but doesn't change behaviour.
239
+ try:
240
+ from lib.gateway_ipc import extract_topic_from_prompt
241
+ topic_chat_id = None
242
+ topic_thread_id = None
243
+ # Walk in reverse — most recent user message is the authoritative
244
+ # "active topic" at retain time.
245
+ for m in reversed(messages_to_retain):
246
+ if not isinstance(m, dict) or m.get("role") != "user":
247
+ continue
248
+ content = m.get("content")
249
+ text = content if isinstance(content, str) else (
250
+ # Claude Code list-content shape: [{type:"text", text:"..."}, ...]
251
+ next((p.get("text", "") for p in content if isinstance(p, dict) and p.get("type") == "text"), "")
252
+ if isinstance(content, list) else ""
253
+ )
254
+ c_id, t_id = extract_topic_from_prompt(text)
255
+ if c_id is not None:
256
+ topic_chat_id, topic_thread_id = c_id, t_id
257
+ break
258
+ if topic_chat_id is not None:
259
+ metadata["chat_id"] = topic_chat_id
260
+ if topic_thread_id is not None:
261
+ metadata["thread_id"] = topic_thread_id
262
+ # Resolve alias from operator-injected env map.
263
+ aliases_json = os.environ.get("HINDSIGHT_TOPIC_ALIASES_JSON", "")
264
+ if aliases_json:
265
+ try:
266
+ aliases = json.loads(aliases_json)
267
+ # aliases is {alias_name: thread_id_int_or_str}; build
268
+ # the inverse lookup once.
269
+ if isinstance(aliases, dict):
270
+ inverse = {str(v): k for k, v in aliases.items()}
271
+ alias = inverse.get(str(topic_thread_id))
272
+ if alias:
273
+ metadata["topic_alias"] = alias
274
+ except (json.JSONDecodeError, ValueError, TypeError):
275
+ pass # malformed env is non-fatal
276
+ except Exception as e:
277
+ # Topic tagging is best-effort — never fail a retain over it.
278
+ debug_log(config, f"Topic tagging skipped: {e}")
279
+
228
280
  debug_log(
229
281
  config, f"Retaining to bank '{bank_id}', doc '{document_id}', {message_count} messages, {len(transcript)} chars"
230
282
  )
@@ -25,11 +25,53 @@ if SCRIPTS_DIR not in sys.path:
25
25
 
26
26
  from lib.gateway_ipc import ( # noqa: E402
27
27
  extract_chat_id_from_prompt,
28
+ extract_topic_from_prompt,
28
29
  gateway_socket_path,
29
30
  update_placeholder,
30
31
  )
31
32
 
32
33
 
34
+ class ExtractTopicTests(unittest.TestCase):
35
+ """PR6a — (chat_id, message_thread_id) extraction for supergroup mode."""
36
+
37
+ def test_dm_returns_chat_id_thread_none(self):
38
+ # DM and fleet-shared envelopes carry chat_id only.
39
+ prompt = '<channel source="switchroom-telegram" chat_id="12345">hi</channel>'
40
+ self.assertEqual(extract_topic_from_prompt(prompt), ("12345", None))
41
+
42
+ def test_supergroup_topic_returns_both(self):
43
+ prompt = (
44
+ '<channel source="switchroom-telegram" '
45
+ 'chat_id="-1001234" message_thread_id="17">hi</channel>'
46
+ )
47
+ self.assertEqual(extract_topic_from_prompt(prompt), ("-1001234", "17"))
48
+
49
+ def test_attribute_order_independent(self):
50
+ prompt = (
51
+ '<channel message_thread_id="42" chat_id="999" '
52
+ 'source="x">hi</channel>'
53
+ )
54
+ self.assertEqual(extract_topic_from_prompt(prompt), ("999", "42"))
55
+
56
+ def test_single_quoted_thread_id(self):
57
+ prompt = "<channel chat_id='1' message_thread_id='7'>hi</channel>"
58
+ self.assertEqual(extract_topic_from_prompt(prompt), ("1", "7"))
59
+
60
+ def test_no_channel_envelope_returns_none_pair(self):
61
+ self.assertEqual(extract_topic_from_prompt("plain prompt"), (None, None))
62
+
63
+ def test_empty_thread_id_collapses_to_none(self):
64
+ # Defensive against malformed envelopes that include the attribute
65
+ # but with no value.
66
+ prompt = '<channel chat_id="1" message_thread_id="">hi</channel>'
67
+ self.assertEqual(extract_topic_from_prompt(prompt), ("1", None))
68
+
69
+ def test_only_inspects_first_kb(self):
70
+ # Pad BEFORE the envelope; both chat_id AND thread_id should be lost.
71
+ prompt = ("x" * 2000) + '<channel chat_id="1" message_thread_id="7">hi</channel>'
72
+ self.assertEqual(extract_topic_from_prompt(prompt), (None, None))
73
+
74
+
33
75
  class ExtractChatIdTests(unittest.TestCase):
34
76
  def test_double_quoted_attribute(self):
35
77
  prompt = '<channel source="switchroom-telegram" chat_id="12345" thread_id="-">\nhi\n</channel>'