npm - switchroom - Versions diffs - 0.14.30 → 0.14.32 - Mend

switchroom 0.14.30 → 0.14.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/dist/agent-scheduler/index.js +80 -80
package/dist/auth-broker/index.js +80 -80
package/dist/cli/drive-write-pretool.mjs +10 -10
package/dist/cli/notion-write-pretool.mjs +82 -82
package/dist/cli/skill-validate-pretool.mjs +72 -72
package/dist/cli/switchroom.js +453 -366
package/dist/host-control/main.js +235 -157
package/dist/vault/approvals/kernel-server.js +82 -82
package/dist/vault/broker/server.js +83 -83
package/package.json +1 -1
package/telegram-plugin/dist/bridge/bridge.js +112 -112
package/telegram-plugin/dist/gateway/gateway.js +357 -213
package/telegram-plugin/dist/server.js +160 -160
package/telegram-plugin/gateway/gateway.ts +83 -9
package/telegram-plugin/hooks/hooks.json +9 -0
package/telegram-plugin/hooks/sentinel-reply-guard-pretool.mjs +114 -0
package/telegram-plugin/hooks/silent-end-scan.mjs +61 -5
package/telegram-plugin/registry/turns-schema.test.ts +34 -0
package/telegram-plugin/registry/turns-schema.ts +18 -0
package/telegram-plugin/secret-detect/generic-entropy.ts +87 -0
package/telegram-plugin/secret-detect/index.ts +42 -23
package/telegram-plugin/secret-detect/patterns.ts +64 -2
package/telegram-plugin/secret-detect/redact.ts +10 -1
package/telegram-plugin/tests/secret-detect-generic-entropy.test.ts +94 -0
package/telegram-plugin/tests/secret-detect-providers.test.ts +74 -0
package/telegram-plugin/tests/secret-detect-secretlint.test.ts +8 -4
package/telegram-plugin/tests/sentinel-reply-guard-pretool.test.ts +109 -0
package/telegram-plugin/tests/silent-end-interrupt-stop-scan.test.ts +118 -0
package/telegram-plugin/tests/turn-flush-safety.test.ts +41 -0
package/telegram-plugin/turn-flush-safety.ts +41 -0

package/telegram-plugin/gateway/gateway.ts CHANGED Viewed

@@ -427,6 +427,7 @@ import {
   recordTurnEnd,
   findLatestTurnIfInterrupted,
   findRecentTurnsForChat,
+  getTurnByKey,
 } from '../registry/turns-schema.js'
 import {
   buildResumeInterruptedInbound,
@@ -1117,6 +1118,41 @@ try {
   turnsDb = null
 }
+/**
+ * Resolve the chat/thread a background sub-agent was dispatched from, so
+ * its live worker card + handback route back to the originating
+ * conversation (group / forum topic) instead of the operator DM.
+ *
+ * Walks jsonl_agent_id → `subagents.parent_turn_key` →
+ * `turns.chat_id`/`thread_id`. Returns null on any miss so the caller
+ * keeps its existing `allowFrom[0]` DM fallback — best-effort, never
+ * throws out of the worker-card hot path. This restores the chat context
+ * the pinned-card fleet used to carry before it was removed in #1122
+ * (progressDriver is permanently null, so the old fleet lookup always
+ * yielded the DM for a Task dispatched from a group/topic).
+ */
+function resolveSubagentOriginChat(
+  agentId: string,
+): { chatId: string; threadId?: number } | null {
+  if (turnsDb == null) return null
+  try {
+    const sub = getSubagentByJsonlId(turnsDb, agentId)
+    if (sub?.parent_turn_key == null) return null
+    const turn = getTurnByKey(turnsDb, sub.parent_turn_key)
+    if (turn == null || turn.chat_id.length === 0) return null
+    const threadNum =
+      turn.thread_id != null && turn.thread_id.length > 0
+        ? Number(turn.thread_id)
+        : NaN
+    return {
+      chatId: turn.chat_id,
+      threadId: Number.isFinite(threadNum) ? threadNum : undefined,
+    }
+  } catch {
+    return null
+  }
+}
 // ─── Periodic history reaper (#1073) ──────────────────────────────────────
 // The init-time prune in history.ts only touched the `messages` table.
 // `subagents` and `turns` in registry.db grew unbounded — every Agent()
@@ -10515,6 +10551,33 @@ async function handleInbound(
     return
   }
+  // Pre-send composer clear (the marko wedge). The inbound is about to be
+  // delivered as an MCP `notifications/claude/channel` notification, which
+  // the unmodified CLI appends into its composer and auto-submits ONLY when
+  // the composer is empty + idle. The #1556 gate above guarantees idle, but
+  // NOT empty: stale typed-ahead / ghost text stranded in the composer
+  // (observed live on agent `marko`: "Yes, go ahead on both") makes the
+  // appended inbound fail to submit and silently swallows every subsequent
+  // queued inbound until a hard restart. Wipe the composer first so the
+  // notification lands at a clean line. Soft-fail by contract — a clear
+  // failure (no tmux session under legacy_pty, socket missing, timeout)
+  // must NEVER block delivery; log and proceed.
+  if (selfAgent) {
+    try {
+      const { clearAgentComposer } = await import('../../src/agents/tmux.js')
+      const cleared = clearAgentComposer({ agentName: selfAgent })
+      if ('error' in cleared) {
+        process.stderr.write(
+          `telegram gateway: pre-send composer-clear soft-failed agent=${selfAgent}: ${cleared.error} — delivering anyway\n`,
+        )
+      }
+    } catch (err) {
+      process.stderr.write(
+        `telegram gateway: pre-send composer-clear threw agent=${selfAgent}: ${(err as Error).message} — delivering anyway\n`,
+      )
+    }
+  }
   const delivered = ipcServer.sendToAgent(selfAgent, inboundMsg)
   if (delivered) markClaudeBusyForInbound(inboundMsg)
   if (!delivered) {
@@ -18344,11 +18407,15 @@ void (async () => {
                   handbackEnvValue: process.env.SWITCHROOM_SUBAGENT_HANDBACK,
                   outcome,
                   isBackground,
-                  fleetChatId,
-                  // Owner-chat fallback: if the progress-driver fleet
-                  // entry was already cleaned up, route to the owner
-                  // chat. Every switchroom fleet agent is DM-shaped, so
-                  // allowFrom[0] is the conversation that dispatched.
+                  // Route the handback (the worker's result → a synthesized
+                  // turn) back to the conversation the Task was dispatched
+                  // from, so the result lands where the user asked — not the
+                  // agent's DM. Falls back to fleetChatId/ownerChatId.
+                  fleetChatId: resolveSubagentOriginChat(agentId)?.chatId || fleetChatId,
+                  // Owner-chat fallback: if the parent-turn chat can't be
+                  // resolved, route to the owner chat. Every switchroom fleet
+                  // agent is DM-shaped, so allowFrom[0] is the conversation
+                  // that dispatched.
                   ownerChatId: loadAccess().allowFrom[0] ?? '',
                   taskDescription: description,
                   resultText,
@@ -18478,12 +18545,16 @@ void (async () => {
                 // message owns the progress beat. Push a running cue and
                 // return BEFORE the legacy bucket relay so the same activity
                 // isn't double-surfaced (in-message edit + injected
-                // "still working" inbound turn). Chat = owner DM, since the
-                // pinned-card fleet is gone and every agent is DM-shaped.
+                // "still working" inbound turn). Route to the conversation
+                // the Task was dispatched from (group / forum topic) via the
+                // parent turn; fall back to the owner DM when that can't be
+                // resolved (the pinned-card fleet that used to carry the chat
+                // is gone — see resolveSubagentOriginChat).
                 if (workerFeedEnabled) {
+                  const origin = resolveSubagentOriginChat(agentId)
                   void workerActivityFeed.update(
                     agentId,
-                    fleetChatId || (loadAccess().allowFrom[0] ?? ''),
+                    origin?.chatId || fleetChatId || (loadAccess().allowFrom[0] ?? ''),
                     {
                       description: dispatch.feedDescription,
                       lastTool,
@@ -18492,6 +18563,7 @@ void (async () => {
                       elapsedMs,
                       state: 'running',
                     },
+                    origin?.threadId,
                   )
                   return
                 }
@@ -18499,7 +18571,9 @@ void (async () => {
                 const decision = decideSubagentProgress({
                   disableEnvValue: process.env.SWITCHROOM_DISABLE_SUBAGENT_PROGRESS,
                   isBackground,
-                  fleetChatId,
+                  // Prefer the conversation the Task was dispatched from over
+                  // the owner DM (see resolveSubagentOriginChat).
+                  fleetChatId: resolveSubagentOriginChat(agentId)?.chatId || fleetChatId,
                   ownerChatId: loadAccess().allowFrom[0] ?? '',
                   subagentJsonlId: agentId,
                   taskDescription: description,

package/telegram-plugin/hooks/hooks.json CHANGED Viewed

@@ -10,6 +10,15 @@
           }
         ]
       },
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/hooks/sentinel-reply-guard-pretool.mjs\"",
+            "timeout": 5
+          }
+        ]
+      },
       {
         "matcher": "^(Agent|Task)$",
         "hooks": [

package/telegram-plugin/hooks/sentinel-reply-guard-pretool.mjs ADDED Viewed

@@ -0,0 +1,114 @@
+#!/usr/bin/env node
+/**
+ * PreToolUse hook — drops a `reply` / `stream_reply` call whose entire
+ * payload is only the silent sentinel(s) NO_REPLY / HEARTBEAT_OK.
+ *
+ * Defense-in-depth for #2053. The silent-end Stop hook and the gateway
+ * flush gate already recognise prose+trailing-NO_REPLY as "intentionally
+ * silent", but if a nag-loop (or any other path) ever pushes a
+ * sentinel-only payload through the reply tool, it must NEVER reach the
+ * Telegram chat. This guard is the last line: it intercepts the tool
+ * call itself, before the gateway sees it.
+ *
+ * Match discipline — EXACT, not substring:
+ *   - The trimmed payload must be ONLY one or more silent markers
+ *     (each on its own line, optional trailing punctuation per marker).
+ *   - A real reply that happens to mention "NO_REPLY" inside genuine
+ *     prose (e.g. "reply with exactly NO_REPLY if nothing to add") is
+ *     NOT dropped — it has non-marker content, so it is delivered.
+ *
+ * Claude Code PreToolUse protocol (v1):
+ *   Input:  JSON on stdin — { session_id, tool_name, tool_input, ... }
+ *   Output: exit 0 + empty stdout → allow.
+ *           exit 0 + JSON stdout { decision: "block", reason } → block.
+ *
+ * Fail-open on any parse/IO error — a malfunctioning guard must not wedge
+ * the reply path.
+ */
+import { readFileSync } from 'node:fs'
+import { argv } from 'node:process'
+import { fileURLToPath } from 'node:url'
+const REPLY_TOOLS = new Set([
+  'mcp__switchroom-telegram__reply',
+  'mcp__switchroom-telegram__stream_reply',
+])
+// Mirrors turn-flush-safety.ts:isSilentFlushMarker and
+// silent-end-scan.mjs:SILENT_MARKER_RE — a single bare marker with
+// optional trailing punctuation.
+const SILENT_MARKER_RE = /^(NO_REPLY|HEARTBEAT_OK)[\s.!?]*$/i
+function readStdin() {
+  try {
+    return readFileSync(0, 'utf8')
+  } catch {
+    return ''
+  }
+}
+/**
+ * True when `text` is composed ENTIRELY of silent markers — every
+ * non-empty line is a bare NO_REPLY / HEARTBEAT_OK — with at least one
+ * such line. Exact-match per line, never a substring of prose.
+ *
+ * @param {string} text
+ * @returns {boolean}
+ */
+export function isSentinelOnly(text) {
+  if (typeof text !== 'string') return false
+  const lines = text
+    .split('\n')
+    .map((l) => l.trim())
+    .filter((l) => l.length > 0)
+  if (lines.length === 0) return false
+  return lines.every((l) => SILENT_MARKER_RE.test(l))
+}
+function main() {
+  const raw = readStdin().trim()
+  if (!raw) process.exit(0)
+  let event
+  try {
+    event = JSON.parse(raw)
+  } catch {
+    process.exit(0)
+  }
+  const toolName = event?.tool_name
+  if (!REPLY_TOOLS.has(toolName)) process.exit(0)
+  const text = event?.tool_input?.text
+  if (typeof text !== 'string') process.exit(0)
+  if (isSentinelOnly(text)) {
+    process.stderr.write(
+      '[sentinel-reply-guard] dropped sentinel-only reply payload (#2053) — ' +
+        'NO_REPLY/HEARTBEAT_OK must never reach chat\n',
+    )
+    process.stdout.write(
+      JSON.stringify({
+        decision: 'block',
+        reason:
+          'This reply payload is only the silent sentinel (NO_REPLY / ' +
+          'HEARTBEAT_OK). That sentinel signals "send nothing" — it must not ' +
+          'be delivered to the user as a message. The turn is already ' +
+          'treated as intentionally silent; do not call the reply tool with ' +
+          'it. End your turn.',
+      }),
+    )
+    process.exit(0)
+  }
+  process.exit(0)
+}
+// Only run the stdin-reading entrypoint when invoked directly as the hook
+// script. When imported (e.g. by the unit test exercising `isSentinelOnly`)
+// the top-level `readFileSync(0)` would otherwise block on the importer's
+// stdin and hang the process.
+if (argv[1] && fileURLToPath(import.meta.url) === argv[1]) {
+  main()
+}

package/telegram-plugin/hooks/silent-end-scan.mjs CHANGED Viewed

@@ -43,6 +43,38 @@ const FINAL_ANSWER_MIN_CHARS = 200
 // variants like "NO_REPLY." / "no_reply").
 const SILENT_MARKER_RE = /^(NO_REPLY|HEARTBEAT_OK)[\s.!?]*$/i
+/**
+ * True when `text`'s final non-empty line is a bare silent marker
+ * (NO_REPLY / HEARTBEAT_OK + optional trailing punctuation), regardless
+ * of what precedes it. Closes #2053: a turn that emits prose then a
+ * trailing bare `NO_REPLY` line is the model explicitly signalling
+ * "intentionally silent". The anchored `SILENT_MARKER_RE` only matches
+ * when the ENTIRE trimmed output is the bare marker, so prose+NO_REPLY
+ * slipped through → the hook blocked → nag loop → sentinel leak.
+ *
+ * Approximately mirrors `turn-flush-safety.ts:endsWithSilentMarker` (TS
+ * gateway side). NOT byte-identical: this .mjs uses `SILENT_MARKER_RE`
+ * directly (no length cap, unlimited trailing punctuation), whereas the
+ * TS side delegates to `isSilentFlushMarker` (length-capped, single
+ * trailing punct). This side is intentionally the more permissive of the
+ * two; the divergence is benign in direction — both suppress the common
+ * `prose\nNO_REPLY` shape, and the extra leniency here only ever
+ * suppresses MORE (never leaks, never wrongly silences a user-awaited
+ * reply, which is gated separately).
+ *
+ * @param {string} text
+ * @returns {boolean}
+ */
+export function endsWithSilentMarker(text) {
+  if (typeof text !== 'string') return false
+  const lines = text
+    .split('\n')
+    .map((l) => l.trim())
+    .filter((l) => l.length > 0)
+  if (lines.length === 0) return false
+  return SILENT_MARKER_RE.test(lines[lines.length - 1])
+}
 /**
  * Predicate ported from `telegram-plugin/final-answer-detect.ts:78-83`.
  * Kept in this .mjs so the hook is fully self-contained (no TS import).
@@ -69,13 +101,15 @@ export function isFinalAnswerReply({ text, disableNotification, done }) {
  * @returns {{ chatId: string | null, threadId: number | null }}
  */
 function parseChannelEnvelope(content) {
-  if (typeof content !== 'string') return { chatId: null, threadId: null }
+  if (typeof content !== 'string') return { chatId: null, threadId: null, source: null }
   const chatMatch = content.match(/chat_id="([^"]+)"/)
   const threadMatch = content.match(/message_thread_id="([^"]+)"/)
+  const sourceMatch = content.match(/<channel[^>]*\bsource="([^"]+)"/)
   const threadRaw = threadMatch ? Number(threadMatch[1]) : NaN
   return {
     chatId: chatMatch ? chatMatch[1] : null,
     threadId: Number.isFinite(threadRaw) && threadRaw !== 0 ? threadRaw : null,
+    source: sourceMatch ? sourceMatch[1] : null,
   }
 }
@@ -128,7 +162,7 @@ export function scanTurnForFinalReply(jsonl) {
   // 1. Walk backward to most-recent queue-operation/enqueue.
   let startIdx = -1
-  let envelope = { chatId: null, threadId: null }
+  let envelope = { chatId: null, threadId: null, source: null }
   for (let i = lines.length - 1; i >= 0; i--) {
     const line = lines[i]
     if (!line || line[0] !== '{') continue
@@ -159,15 +193,27 @@ export function scanTurnForFinalReply(jsonl) {
     const content = obj?.message?.content
     if (!Array.isArray(content)) continue
     for (const c of content) {
+      // Plain assistant text carve-out (#2053): a turn that ends with a
+      // trailing bare NO_REPLY / HEARTBEAT_OK line — emitted as plain
+      // transcript text, NOT through the reply tool — is the model
+      // explicitly signalling "intentionally silent". The anchored
+      // SILENT_MARKER_RE below only fires when the ENTIRE reply-tool
+      // text is the bare marker, so a plain-text prose+NO_REPLY turn
+      // matched nothing here → block → nag → sentinel leak. Treat a
+      // trailing-marker text block as a valid silent end.
+      if (c?.type === 'text' && endsWithSilentMarker(String(c.text ?? ''))) {
+        return { decided: 'allow', reason: 'silent-marker-text' }
+      }
       if (c?.type !== 'tool_use') continue
       if (!REPLY_TOOLS.has(c.name)) continue
       const input = c.input ?? {}
       const text = String(input.text ?? '')
       // Silent-marker carve-out: the operator explicitly signaled
       // "intentionally silent" (cron HEARTBEAT_OK, model-driven
-      // NO_REPLY). Don't block — same posture as the gateway's
-      // silent-marker suppression at gateway.ts:6692.
-      if (SILENT_MARKER_RE.test(text.trim())) {
+      // NO_REPLY). Accept both the whole-text bare marker and the
+      // prose+trailing-marker shape (#2053). Same posture as the
+      // gateway's silent-marker suppression at gateway.ts:6692.
+      if (SILENT_MARKER_RE.test(text.trim()) || endsWithSilentMarker(text)) {
         return { decided: 'allow', reason: 'silent-marker' }
       }
       if (isFinalAnswerReply({
@@ -180,6 +226,16 @@ export function scanTurnForFinalReply(jsonl) {
     }
   }
+  // Cron-fired turns (#2053): a scheduled turn that produced no
+  // qualifying reply is NOT a delivery failure the user is waiting on —
+  // nagging it only pushes the model to escape the loop by shoving a
+  // NO_REPLY sentinel through the reply tool, which leaks to chat. A
+  // cron turn that genuinely needs to speak will have called reply
+  // (caught above); otherwise let it end silently.
+  if (envelope.source === 'cron') {
+    return { decided: 'allow', reason: 'cron-source' }
+  }
   const block = { decided: 'block', reason: 'no-final-reply' }
   if (envelope.chatId) {
     block.chatId = envelope.chatId

package/telegram-plugin/registry/turns-schema.test.ts CHANGED Viewed

@@ -20,6 +20,7 @@ import {
   recordTurnStart,
   recordTurnEnd,
   findRecentTurnsForChat,
+  getTurnByKey,
 } from './turns-schema.js'
 // ---------------------------------------------------------------------------
@@ -99,3 +100,36 @@ describe('findRecentTurnsForChat', () => {
     db.close()
   })
 })
+// ---------------------------------------------------------------------------
+// getTurnByKey — recover the dispatch chat/thread for a sub-agent's parent
+// turn (subagents.parent_turn_key -> turns.turn_key). Without this the
+// worker card / handback fall back to the operator DM (#worker-card-routing).
+// ---------------------------------------------------------------------------
+describe('getTurnByKey', () => {
+  it('returns null when the turn key does not exist', () => {
+    const db = openTurnsDbInMemory()
+    expect(getTurnByKey(db, 'nope')).toBeNull()
+    db.close()
+  })
+  it('recovers chat_id + thread_id for a group/topic turn', () => {
+    const db = openTurnsDbInMemory()
+    recordTurnStart(db, { turnKey: 'g:11', chatId: '-1001234567890', threadId: '42' })
+    const turn = getTurnByKey(db, 'g:11')
+    expect(turn?.turn_key).toBe('g:11')
+    expect(turn?.chat_id).toBe('-1001234567890')
+    expect(turn?.thread_id).toBe('42')
+    db.close()
+  })
+  it('recovers chat_id with null thread_id for a plain group/DM turn', () => {
+    const db = openTurnsDbInMemory()
+    recordTurnStart(db, { turnKey: 'dm:7', chatId: '12345' })
+    const turn = getTurnByKey(db, 'dm:7')
+    expect(turn?.chat_id).toBe('12345')
+    expect(turn?.thread_id).toBeNull()
+    db.close()
+  })
+})

package/telegram-plugin/registry/turns-schema.ts CHANGED Viewed

@@ -348,6 +348,24 @@ export function findOrphanedTurns(db: SqliteDatabase, chatId: string): Turn[] {
   return rows.map(mapRow)
 }
+/**
+ * Fetch a single turn by its primary key, or null if absent.
+ *
+ * Used to recover the chat/thread a background sub-agent was dispatched
+ * from: `subagents.parent_turn_key` is an FK-by-convention to
+ * `turns.turn_key`, so this resolves the originating conversation
+ * (chat_id + thread_id) for a worker card / handback. Without it the
+ * worker feed falls back to the operator DM (the pinned-card fleet that
+ * used to carry the chat was removed in #1122), so a Task dispatched from
+ * a group/topic posted its progress to the agent's DM instead.
+ */
+export function getTurnByKey(db: SqliteDatabase, turnKey: string): Turn | null {
+  const row = db
+    .prepare(`SELECT * FROM turns WHERE turn_key = ?`)
+    .get(turnKey) as RawTurnRow | undefined
+  return row ? mapRow(row) : null
+}
 export interface OrphanClassifyOpts {
   /**
    * `turnKey` from the on-disk `turn-active.json` marker — the single

package/telegram-plugin/secret-detect/generic-entropy.ts ADDED Viewed

@@ -0,0 +1,87 @@
+/**
+ * Generic bare-high-entropy detector — the long-tail fallback.
+ *
+ * The provider/anchored patterns only catch tokens with a known prefix
+ * (sk-, ghp_, shpat_, …) or a KEY=value context. A STANDALONE high-entropy
+ * token pasted in prose — a raw Sanctum/base62 token with no prefix —
+ * matches none of them and used to slip through (the 2026-06-01 Sanctum
+ * incident). This scanner closes that gap.
+ *
+ * Emitted at **`ambiguous`** confidence, and `redact()` deliberately
+ * EXCLUDES this rule (see redact.ts): a generic guess must never silently
+ * mask — it would corrupt agent replies and stored messages (dense
+ * identifiers look high-entropy too). Its sole job is to drive the inbound
+ * gate's "👀 looks like a high-entropy string — stash to vault or ignore?"
+ * ASK prompt, where the operator confirms.
+ *
+ * Precision (the hard part — distinguishing a random token from a long
+ * technical identifier), via three cheap, composable filters:
+ *  1. CHARSET `[A-Za-z0-9]` only — NO `_` `-` `/` `+` `=` `.` `:`. This
+ *     breaks snake_case / kebab-case / npm paths / slugs / version strings
+ *     into sub-28 runs, so identifiers like `get_user_profile_by_org`,
+ *     `flex-row-gap-4`, `@babel/plugin-transform-modules-commonjs` never
+ *     form a candidate. (Cost: base64url tokens with `-`/`_` aren't caught
+ *     here — they usually appear in Bearer/JWT/KV contexts other rules
+ *     handle.)
+ *  2. ≥18 DISTINCT chars — excludes hex hashes/SHAs (≤16), digit runs
+ *     (≤10) by construction; and since 18 distinct is unreachable with
+ *     digits alone, a passing token necessarily contains letters.
+ *  3. Contains ≥1 DIGIT — kills CamelCase-without-digits identifiers
+ *     (`AbstractSingletonProxyFactoryBeanGenerator`, `TheQuickBrownFox…`),
+ *     which are the residual no-separator FP shape. Real base62 tokens
+ *     almost always contain a digit (>99% at 28+ chars).
+ */
+import type { RawHit } from './kv-scanner.js'
+const CANDIDATE_RE = /[A-Za-z0-9]{28,}/g
+// Unreachable with digits alone (10) → excludes hex (≤16) and digit runs;
+// real base62 tokens have 24–62 distinct.
+export const GENERIC_MIN_DISTINCT = 18
+// A real message has at most a handful of credentials; bound the work on
+// pathological/junk input (the O(n²) overlap-dedup downstream is the cost).
+const MAX_GENERIC_HITS = 20
+/** True once `tok` has at least `n` distinct chars (early-exit). ASCII-only
+ *  by construction — CANDIDATE_RE admits no code point ≥ 128. */
+function hasDistinctChars(tok: string, n: number): boolean {
+  const seen = new Uint8Array(128)
+  let distinct = 0
+  for (let i = 0; i < tok.length; i++) {
+    const c = tok.charCodeAt(i)
+    if (seen[c] === 0) {
+      seen[c] = 1
+      if (++distinct >= n) return true
+    }
+  }
+  return false
+}
+function hasDigit(tok: string): boolean {
+  for (let i = 0; i < tok.length; i++) {
+    const c = tok.charCodeAt(i)
+    if (c >= 48 && c <= 57) return true
+  }
+  return false
+}
+export function scanGenericSecrets(text: string): RawHit[] {
+  const hits: RawHit[] = []
+  CANDIDATE_RE.lastIndex = 0
+  let m: RegExpExecArray | null
+  while ((m = CANDIDATE_RE.exec(text)) !== null) {
+    if (hits.length >= MAX_GENERIC_HITS) break
+    const tok = m[0]
+    if (!hasDigit(tok)) continue
+    if (!hasDistinctChars(tok, GENERIC_MIN_DISTINCT)) continue
+    hits.push({
+      rule_id: 'generic_high_entropy',
+      start: m.index,
+      end: m.index + tok.length,
+      matched_text: tok,
+      confidence: 'ambiguous',
+    })
+  }
+  return hits
+}

package/telegram-plugin/secret-detect/index.ts CHANGED Viewed

@@ -25,6 +25,7 @@
  */
 import { ALL_PATTERNS } from './patterns.js'
 import { scanKeyValue, type RawHit } from './kv-scanner.js'
+import { scanGenericSecrets } from './generic-entropy.js'
 import { shannonEntropy } from './entropy.js'
 import { chunk } from './chunker.js'
 import { isSuppressed } from './suppressor.js'
@@ -118,6 +119,14 @@ export function detectSecrets(text: string): Detection[] {
     for (const h of kvHits) {
       raw.push({ ...h, start: h.start + win.offset, end: h.end + win.offset })
     }
+    // Generic bare-high-entropy fallback (ambiguous). Catches standalone
+    // tokens no prefix/KV rule matched. dropOverlaps/dedupeRaw below prefer
+    // a high-confidence pattern hit over a generic one on the same range,
+    // so a recognized token isn't double-flagged.
+    const genHits = scanGenericSecrets(win.text)
+    for (const h of genHits) {
+      raw.push({ ...h, start: h.start + win.offset, end: h.end + win.offset })
+    }
   }
   // Dedupe by range + rule. If two rules hit the same range, prefer the
@@ -171,24 +180,28 @@ function dedupeRaw(raw: RawHit[]): RawHit[] {
 }
 /**
- * Drop hits fully contained inside another hit. Keeps the outer (typically
- * broader / higher-signal) hit — e.g. a JWT match wholly inside an
- * Authorization Bearer match keeps the Bearer.
+ * Drop an AMBIGUOUS hit that is fully contained inside another (larger)
+ * hit — e.g. a `generic_high_entropy` sub-span sitting inside a recognized
+ * high token, or inside an Authorization Bearer match. Narrow by design:
+ * it never drops a high-confidence hit and never touches high-vs-high
+ * overlaps, so it can't suppress a real detection — it only removes the
+ * redundant low-precision sub-spans the generic fallback can emit.
  */
 function dropOverlaps(hits: RawHit[]): RawHit[] {
-  const sorted = [...hits].sort((a, b) => (a.end - a.start) - (b.end - b.start))
-  const out: RawHit[] = []
-  for (const h of sorted) {
-    const contained = out.some(
-      (existing) =>
-        existing !== h &&
-        existing.start <= h.start &&
-        existing.end >= h.end &&
-        !(existing.start === h.start && existing.end === h.end),
-    )
-    if (!contained) out.push(h)
-  }
-  // Re-sort by start offset for deterministic downstream handling.
+  const out = hits.filter(
+    (h) =>
+      !(
+        h.confidence === 'ambiguous' &&
+        hits.some(
+          (o) =>
+            o !== h &&
+            o.start <= h.start &&
+            o.end >= h.end &&
+            !(o.start === h.start && o.end === h.end),
+        )
+      ),
+  )
+  // Sort by start offset for deterministic downstream handling.
   out.sort((a, b) => a.start - b.start || a.end - b.end)
   return out
 }
@@ -217,16 +230,22 @@ export async function detectSecretsAsync(text: string): Promise<Detection[]> {
     import('./secretlint-source.js').then((m) => m.detectViaSecretlint(text)),
   ])
-  // Merge with range-based dedupe. Vendored first wins on exact ties.
+  // Merge with range-based dedupe. On an exact-range tie, prefer the
+  // higher-confidence detection (else vendored-first). This matters since
+  // the vendored generic high-entropy fallback emits `ambiguous` — without
+  // the confidence tie-break it would shadow a Secretlint `high` provider
+  // hit on the same span and silently downgrade it (mirrors the sync
+  // dedupeRaw's high-over-ambiguous rule).
   const seen = new Map<string, Detection>()
-  for (const d of vendored) {
+  const consider = (d: Detection): void => {
     const key = `${d.start}:${d.end}`
-    if (!seen.has(key)) seen.set(key, d)
-  }
-  for (const d of viaSecretlint) {
-    const key = `${d.start}:${d.end}`
-    if (!seen.has(key)) seen.set(key, d)
+    const existing = seen.get(key)
+    if (!existing || (existing.confidence === 'ambiguous' && d.confidence === 'high')) {
+      seen.set(key, d)
+    }
   }
+  for (const d of vendored) consider(d)
+  for (const d of viaSecretlint) consider(d)
   // Re-derive slugs against the merged set (Secretlint and vendored each
   // had independent `existing` sets; we coalesce here).