npm - @vellumai/assistant - Versions diffs - 0.3.16 → 0.3.18 - Mend

@vellumai/assistant 0.3.16 → 0.3.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/ARCHITECTURE.md +70 -13
package/README.md +6 -0
package/docs/architecture/http-token-refresh.md +23 -1
package/package.json +1 -1
package/src/__tests__/access-request-decision.test.ts +4 -7
package/src/__tests__/channel-guardian.test.ts +3 -1
package/src/__tests__/checker.test.ts +79 -48
package/src/__tests__/config-watcher.test.ts +11 -13
package/src/__tests__/conversation-pairing.test.ts +103 -3
package/src/__tests__/guardian-action-conversation-turn.test.ts +1 -1
package/src/__tests__/guardian-action-followup-executor.test.ts +1 -1
package/src/__tests__/guardian-action-late-reply.test.ts +131 -0
package/src/__tests__/guardian-action-store.test.ts +182 -0
package/src/__tests__/guardian-dispatch.test.ts +120 -0
package/src/__tests__/ipc-snapshot.test.ts +21 -0
package/src/__tests__/non-member-access-request.test.ts +1 -2
package/src/__tests__/notification-broadcaster.test.ts +115 -4
package/src/__tests__/notification-decision-strategy.test.ts +2 -1
package/src/__tests__/notification-deep-link.test.ts +44 -1
package/src/__tests__/notification-guardian-path.test.ts +157 -0
package/src/__tests__/notification-thread-candidate-validation.test.ts +215 -0
package/src/__tests__/slack-channel-config.test.ts +3 -3
package/src/__tests__/trust-store.test.ts +21 -21
package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +5 -7
package/src/__tests__/trusted-contact-multichannel.test.ts +2 -6
package/src/__tests__/trusted-contact-verification.test.ts +9 -9
package/src/__tests__/update-bulletin-state.test.ts +1 -1
package/src/__tests__/update-bulletin.test.ts +66 -3
package/src/__tests__/update-template-contract.test.ts +6 -11
package/src/__tests__/voice-session-bridge.test.ts +109 -9
package/src/calls/call-controller.ts +129 -8
package/src/calls/guardian-action-sweep.ts +1 -1
package/src/calls/guardian-dispatch.ts +8 -0
package/src/calls/voice-session-bridge.ts +4 -2
package/src/cli/core-commands.ts +41 -1
package/src/config/templates/UPDATES.md +5 -6
package/src/config/update-bulletin-format.ts +2 -0
package/src/config/update-bulletin-state.ts +1 -1
package/src/config/update-bulletin-template-path.ts +6 -0
package/src/config/update-bulletin.ts +21 -6
package/src/daemon/config-watcher.ts +3 -2
package/src/daemon/daemon-control.ts +64 -10
package/src/daemon/handlers/config-slack-channel.ts +1 -1
package/src/daemon/handlers/identity.ts +45 -25
package/src/daemon/handlers/sessions.ts +1 -1
package/src/daemon/ipc-contract/sessions.ts +1 -1
package/src/daemon/ipc-contract/workspace.ts +12 -1
package/src/daemon/ipc-contract-inventory.json +1 -0
package/src/daemon/lifecycle.ts +8 -0
package/src/daemon/server.ts +25 -3
package/src/daemon/session-process.ts +438 -184
package/src/daemon/tls-certs.ts +17 -12
package/src/daemon/tool-side-effects.ts +1 -1
package/src/memory/channel-delivery-store.ts +18 -20
package/src/memory/channel-guardian-store.ts +39 -42
package/src/memory/conversation-crud.ts +2 -2
package/src/memory/conversation-queries.ts +2 -2
package/src/memory/conversation-store.ts +24 -25
package/src/memory/db-init.ts +9 -1
package/src/memory/fts-reconciler.ts +41 -26
package/src/memory/guardian-action-store.ts +57 -7
package/src/memory/guardian-verification.ts +1 -0
package/src/memory/jobs-worker.ts +2 -2
package/src/memory/migrations/032-guardian-delivery-conversation-index.ts +15 -0
package/src/memory/migrations/032-notification-delivery-thread-decision.ts +20 -0
package/src/memory/migrations/index.ts +4 -2
package/src/memory/schema-migration.ts +1 -0
package/src/memory/schema.ts +6 -1
package/src/memory/search/semantic.ts +3 -3
package/src/notifications/README.md +158 -17
package/src/notifications/broadcaster.ts +68 -50
package/src/notifications/conversation-pairing.ts +96 -18
package/src/notifications/decision-engine.ts +6 -3
package/src/notifications/deliveries-store.ts +12 -0
package/src/notifications/emit-signal.ts +1 -0
package/src/notifications/thread-candidates.ts +60 -25
package/src/notifications/types.ts +2 -1
package/src/permissions/checker.ts +1 -16
package/src/permissions/defaults.ts +14 -4
package/src/runtime/guardian-action-followup-executor.ts +1 -1
package/src/runtime/http-server.ts +11 -11
package/src/runtime/routes/access-request-decision.ts +1 -1
package/src/runtime/routes/debug-routes.ts +4 -4
package/src/runtime/routes/guardian-approval-interception.ts +4 -4
package/src/runtime/routes/inbound-message-handler.ts +6 -6
package/src/runtime/routes/integration-routes.ts +2 -2
package/src/tools/permission-checker.ts +1 -2
package/src/tools/secret-detection-handler.ts +1 -1
package/src/tools/system/voice-config.ts +1 -1
package/src/version.ts +29 -2

package/ARCHITECTURE.md CHANGED Viewed

@@ -218,7 +218,7 @@ The app token is validated by format only — it must start with `xapp-`.
 **Connection status:**
-The `GET` endpoint reports `connected: true` only when both `hasBotToken` and `hasAppToken` are true. If only one token is stored, a `warning` field describes which token is missing.
+Both `GET` and `POST` endpoints report `connected: true` only when both `hasBotToken` and `hasAppToken` are true. The `POST` endpoint additionally returns a `warning` field when only one token is stored, describing which token is missing.
 **Key source files:**
@@ -276,6 +276,33 @@ External users who are not the guardian can gain access to the assistant through
 | `src/memory/channel-guardian-store.ts` | Approval request and verification challenge persistence |
 | `src/config/vellum-skills/trusted-contacts/SKILL.md` | Skill teaching the assistant to manage contacts via HTTP API |
+### Update Bulletin System
+Release-driven update notification system that surfaces release notes to the assistant via the system prompt.
+**Data flow:**
+1. **Bundled template** (`src/config/templates/UPDATES.md`) — source of release notes, maintained per-release in the repo.
+2. **Startup sync** (`syncUpdateBulletinOnStartup()` in `src/config/update-bulletin.ts`) — materializes the bundled template into the workspace `UPDATES.md` on daemon boot. Uses atomic write (temp + rename) for crash safety.
+3. **System prompt injection** — `buildSystemPrompt()` reads workspace `UPDATES.md` and injects it as a `## Recent Updates` section with judgment-based handling instructions.
+4. **Completion by deletion** — the assistant deletes `UPDATES.md` when it has actioned all updates. Next startup detects the deletion and marks those releases as completed in checkpoint state.
+5. **Cross-release merge** — if pending updates from a prior release exist when a new release lands, both release blocks coexist in the same file.
+**Checkpoint keys** (in `memory_checkpoints` table):
+- `updates:active_releases` — JSON array of version strings currently active.
+- `updates:completed_releases` — JSON array of version strings already completed.
+**Key source files:**
+| File | Purpose |
+|------|---------|
+| `src/config/templates/UPDATES.md` | Bundled release-note template |
+| `src/config/update-bulletin.ts` | Startup sync logic (materialize, delete-complete, merge) |
+| `src/config/update-bulletin-format.ts` | Release block formatter/parser helpers |
+| `src/config/update-bulletin-state.ts` | Checkpoint state helpers for active/completed releases |
+| `src/config/system-prompt.ts` | Prompt injection of updates section |
+| `src/daemon/config-watcher.ts` | File watcher — evicts sessions on UPDATES.md changes |
+| `src/permissions/defaults.ts` | Auto-allow rules for file_read/write/edit + rm UPDATES.md |
 ---
@@ -1543,9 +1570,10 @@ Keep-alive heartbeats (every 30 s by default):
 The notification module (`assistant/src/notifications/`) uses a signal-based architecture where producers emit free-form events and an LLM-backed decision engine determines whether, where, and how to notify the user. See `assistant/src/notifications/README.md` for the full developer guide.
 ```
-Producer → NotificationSignal → Decision Engine (LLM) → Deterministic Checks → Broadcaster → Conversation Pairing → Adapters → Delivery
-                                       ↑                                                            ↓
-                               Preference Summary                                    notification_thread_created IPC
+Producer → NotificationSignal → Candidate Generation → Decision Engine (LLM) → Deterministic Checks → Broadcaster → Conversation Pairing → Adapters → Delivery
+                                                              ↑                                                            ↓
+                                                      Preference Summary                                    notification_thread_created IPC
+                                                      Thread Candidates                                     (creation-only — not emitted on reuse)
 ```
 ### Channel Policy Registry
@@ -1560,13 +1588,18 @@ Producer → NotificationSignal → Decision Engine (LLM) → Deterministic Chec
 Helper functions: `getDeliverableChannels()`, `getChannelPolicy()`, `isNotificationDeliverable()`, `getConversationStrategy()`.
-### Conversation Pairing
+### Conversation Pairing and Thread Routing
+Every notification delivery materializes a conversation + seed message **before** the adapter sends it (`conversation-pairing.ts`). The pairing function now accepts a `threadAction` from the decision engine:
-Every notification delivery materializes a conversation + seed message **before** the adapter sends it (`conversation-pairing.ts`). This ensures:
+- **`reuse_existing`**: Looks up the target conversation. If valid (exists with `source: 'notification'`), the seed message is appended to the existing thread. If invalid, falls back to creating a new conversation with `threadDecisionFallbackUsed: true`.
+- **`start_new` (default)**: Creates a fresh conversation per delivery.
+This ensures:
 1. Every delivery has an auditable conversation trail in the conversations table
 2. The macOS/iOS client can deep-link directly into the notification thread
-3. Delivery audit rows in `notification_deliveries` carry `conversation_id`, `message_id`, and `conversation_strategy` columns
+3. Delivery audit rows in `notification_deliveries` carry `conversation_id`, `message_id`, `conversation_strategy`, `thread_action`, `thread_target_conversation_id`, and `thread_decision_fallback_used` columns
 The pairing function (`pairDeliveryWithConversation`) is resilient — errors are caught and logged without breaking the delivery pipeline.
@@ -1577,19 +1610,42 @@ The notification pipeline uses a single conversation materialization path across
 1. **Canonical pipeline** (`emitNotificationSignal` → decision engine → broadcaster → conversation pairing → adapters): The broadcaster pairs each delivery with a conversation, then dispatches a `notification_intent` IPC event via the Vellum adapter. The IPC payload includes `deepLinkMetadata` (e.g. `{ conversationId }`) so the macOS/iOS client can deep-link to the relevant context when the user taps the notification.
 2. **Guardian bookkeeping** (`dispatchGuardianQuestion`): Guardian dispatch creates `guardian_action_request` / `guardian_action_delivery` audit rows derived from pipeline delivery results and the per-dispatch `onThreadCreated` callback — there is no separate thread-creation path.
-### Thread Surfacing via `notification_thread_created` IPC
+### Thread Surfacing via `notification_thread_created` IPC (Creation-Only)
+The `notification_thread_created` IPC event is emitted **only when a brand-new conversation is created** by the broadcaster. Reusing an existing thread does not trigger this event — the macOS/iOS client already knows about the conversation from the original creation. This is enforced in `broadcaster.ts` by gating on `pairing.createdNewConversation === true`.
-When a vellum notification thread is paired with a conversation (strategy `start_new_conversation`), the broadcaster emits a `notification_thread_created` IPC event **immediately** (before waiting for slower channel deliveries like Telegram). This pushes the thread to the macOS/iOS client so it can display the notification thread in the sidebar and deep-link to it.
+When a new vellum notification thread is created (strategy `start_new_conversation`), the broadcaster emits the IPC event **immediately** (before waiting for slower channel deliveries like Telegram). This pushes the thread to the macOS/iOS client so it can display the notification thread in the sidebar and deep-link to it.
 ### IPC Thread-Created Events
 Two IPC push events surface new threads in the macOS/iOS client sidebar:
-- **`notification_thread_created`** — Emitted by `broadcaster.ts` when a notification delivery creates a vellum conversation (strategy `start_new_conversation`). Payload: `{ conversationId, title, sourceEventName }`.
+- **`notification_thread_created`** — Emitted by `broadcaster.ts` when a notification delivery **creates** a new vellum conversation (strategy `start_new_conversation`, `createdNewConversation: true`). **Not** emitted when a thread is reused. Payload: `{ conversationId, title, sourceEventName }`.
 - **`task_run_thread_created`** — Emitted by `work-item-runner.ts` when a task run creates a conversation. Payload: `{ conversationId, workItemId, title }`.
 All events follow the same pattern: the daemon creates a server-side conversation, persists an initial message, and broadcasts the IPC event so the macOS `ThreadManager` can create a visible thread in the sidebar.
+### Thread Routing Decision Flow
+The decision engine produces per-channel thread actions using a candidate-driven approach:
+1. **Candidate generation** (`thread-candidates.ts`): Queries recent notification-sourced conversations (24-hour window, up to 5 per channel) and enriches them with guardian context (pending request counts).
+2. **LLM decision**: The candidate set is serialized into the system prompt. The LLM chooses `start_new` or `reuse_existing` (with a candidate `conversationId`) per channel.
+3. **Strict validation** (`validateThreadActions`): Reuse targets must exist in the candidate set. Invalid targets are downgraded to `start_new`.
+4. **Pairing execution**: `pairDeliveryWithConversation` executes the thread action — appending to an existing conversation on reuse, creating a new one otherwise.
+5. **IPC gating**: `notification_thread_created` fires only on actual creation, not on reuse.
+6. **Audit trail**: Thread actions are persisted in both `notification_decisions.validation_results` and `notification_deliveries` columns (`thread_action`, `thread_target_conversation_id`, `thread_decision_fallback_used`).
+### Guardian Multi-Request Disambiguation in Reused Threads
+When the decision engine routes multiple guardian questions to the same conversation (via `reuse_existing`), those questions share a single thread. The guardian disambiguates which question they are answering using **request-code prefixes**:
+- **Single pending delivery**: Matched automatically (single-match fast path).
+- **Multiple pending deliveries**: The guardian must prefix their reply with the 6-char hex request code (e.g. `A1B2C3 yes, allow it`). Case-insensitive matching.
+- **No match**: A disambiguation message is sent listing all active request codes.
+This invariant is enforced identically on mac/vellum (`session-process.ts`), Telegram, and SMS (`inbound-message-handler.ts`). All disambiguation messages are generated through the guardian action message composer (LLM with deterministic fallback).
 ### Reminder Routing Metadata
 Reminders carry optional `routingIntent` (`single_channel` | `multi_channel` | `all_channels`) and free-form `routingHints` metadata. When a reminder fires, this metadata flows through the notification signal into a post-decision enforcement step (`enforceRoutingIntent()` in `decision-engine.ts`) that overrides the LLM's channel selection to match the requested coverage. This enables single-reminder fanout: one reminder can produce multi-channel delivery without duplicate reminders. See `assistant/docs/architecture/scheduling.md` for the full trigger-time data flow.
@@ -1612,8 +1668,9 @@ Connected channels are resolved at signal emission time: vellum is always includ
 | `assistant/src/notifications/emit-signal.ts` | Single entry point for all producers; orchestrates the full pipeline |
 | `assistant/src/notifications/decision-engine.ts` | LLM-based routing decisions with deterministic fallback |
 | `assistant/src/notifications/deterministic-checks.ts` | Hard invariant checks (dedupe, source-active suppression, channel availability) |
-| `assistant/src/notifications/broadcaster.ts` | Dispatches decisions to channel adapters; emits `notification_thread_created` IPC |
-| `assistant/src/notifications/conversation-pairing.ts` | Materializes conversation + message per delivery based on channel strategy |
+| `assistant/src/notifications/broadcaster.ts` | Dispatches decisions to channel adapters; emits `notification_thread_created` IPC (creation-only) |
+| `assistant/src/notifications/conversation-pairing.ts` | Materializes conversation + message per delivery; executes thread reuse decisions |
+| `assistant/src/notifications/thread-candidates.ts` | Builds per-channel candidate set of recent conversations for the decision engine |
 | `assistant/src/notifications/adapters/macos.ts` | Vellum adapter — broadcasts `notification_intent` via IPC with deep-link metadata |
 | `assistant/src/notifications/adapters/telegram.ts` | Telegram adapter — POSTs to gateway `/deliver/telegram` |
 | `assistant/src/notifications/adapters/sms.ts` | SMS adapter — POSTs to gateway `/deliver/sms` via Twilio Messages API |
@@ -1624,7 +1681,7 @@ Connected channels are resolved at signal emission time: vellum is always includ
 | `assistant/src/config/bundled-skills/messaging/tools/send-notification.ts` | Explicit producer tool for user-requested notifications; emits signals into the same routing pipeline |
 | `assistant/src/calls/guardian-dispatch.ts` | Guardian question dispatch that reuses canonical notification pairing and records guardian delivery bookkeeping from pipeline results |
-**Audit trail (SQLite):** `notification_events` → `notification_decisions` → `notification_deliveries` (with `conversation_id`, `message_id`, `conversation_strategy`)
+**Audit trail (SQLite):** `notification_events` → `notification_decisions` (with `threadActions` in validation results) → `notification_deliveries` (with `conversation_id`, `message_id`, `conversation_strategy`, `thread_action`, `thread_target_conversation_id`, `thread_decision_fallback_used`)
 **Configuration:** `notifications.decisionModelIntent` in `config.json`.

package/README.md CHANGED Viewed

@@ -50,6 +50,12 @@ cp .env.example .env
 | `RUNTIME_GATEWAY_ORIGIN_SECRET` | No | — | Dedicated secret for the `X-Gateway-Origin` proof header on `/channels/inbound`. When not set, falls back to the bearer token. Both gateway and runtime must share the same value. |
 | `VELLUM_DAEMON_SOCKET` | No | `~/.vellum/vellum.sock` | Override the daemon socket path |
+## Update Bulletin
+When a release includes relevant updates, the daemon materializes release notes from the bundled `src/config/templates/UPDATES.md` into `~/.vellum/workspace/UPDATES.md` on startup. The assistant uses judgment to surface updates to the user when relevant, and deletes the file when done.
+**For release maintainers:** Update `assistant/src/config/templates/UPDATES.md` with release notes before each relevant release. Leave the template empty (or comment-only) for releases with no user/assistant-facing changes.
 ## Usage
 ### Start the daemon

package/docs/architecture/http-token-refresh.md CHANGED Viewed

@@ -4,7 +4,7 @@ Design for how the daemon notifies clients of bearer token rotation and how clie
 ## Current State
-The daemon's HTTP bearer token is generated at startup and persisted to `~/.vellum/http-token` (mode 0600). Clients read this file at connection time:
+The daemon's HTTP bearer token is resolved at startup and persisted to `~/.vellum/http-token` (mode 0600). The startup token resolution order is: (1) the `RUNTIME_PROXY_BEARER_TOKEN` env var if set, (2) the existing token read from `~/.vellum/http-token` if the file is readable and non-empty, (3) a newly generated random token as a last resort. Clients read this file at connection time:
 - **macOS (local)**: Reads `~/.vellum/http-token` from disk via `resolveHttpTokenPath()` / `readHttpToken()`. Has direct filesystem access to the token file.
 - **iOS (remote)**: Receives the bearer token during the QR-code pairing flow. The token is stored in the iOS Keychain and used for all subsequent HTTP/SSE requests.
@@ -177,6 +177,28 @@ private rotateToken(revoke: boolean): string {
 }
 ```
+**Failure semantics — routine vs. revocation**:
+The two rotation modes have deliberately different failure ordering to match their security requirements:
+| | Routine (`revoke: false`) | Revocation (`revoke: true`) |
+|---|---|---|
+| **Order** | Persist to disk first, then update in-memory state | Update in-memory state first, then persist to disk |
+| **Disk write failure** | Rotation aborts cleanly — in-memory auth state is untouched, clients keep working with the old token | Old token is already invalidated in memory; the API endpoint returns an error to the caller |
+| **Rationale** | Availability: don't lock out clients if persistence fails | Security: a potentially compromised token must never remain valid, even briefly |
+**Revocation disk-write failure in detail**: If `writeTokenToDisk` throws after the in-memory switch during revocation, the system enters a degraded state:
+1. **In-memory state**: `currentToken` holds the new (unpersisted) token. The old token is rejected. All old-token SSE connections have been terminated.
+2. **Disk state**: `~/.vellum/http-token` still contains the old (now-invalid) token.
+3. **API response**: The `POST /v1/auth/rotate-token` endpoint returns an error indicating the persistence failure. The response body includes the new token so the caller can manually persist or distribute it if needed.
+4. **Client impact by platform**:
+   - **macOS**: Re-reading the token file yields the stale old token, which is rejected (401). Recovery requires a daemon restart (which generates a fresh token and persists it) or a successful retry of the rotation API call.
+   - **iOS**: Already disconnected (old-token SSE terminated). Cannot recover until the daemon restarts or the rotation is retried successfully, at which point re-pairing is required.
+   - **Chrome extension**: Same as iOS — the pasted token is stale and rejected.
+5. **Daemon restart recovery**: A daemon restart does **not** automatically heal this state. At startup, the daemon first checks for the `RUNTIME_PROXY_BEARER_TOKEN` env var, then tries to read the existing token from `~/.vellum/http-token`, and only generates a new random token if both are unavailable (see `assistant/src/daemon/lifecycle.ts`, lines 110-124). In the degraded state described here — where the disk still holds the old (now-invalid) token — a restart would reload that stale token, making it the active bearer token again. To actually recover, the operator must either: (a) manually delete or overwrite `~/.vellum/http-token` before restarting the daemon, (b) set `RUNTIME_PROXY_BEARER_TOKEN` to a known-good value, or (c) successfully retry the `POST /v1/auth/rotate-token` endpoint while the daemon is still running with the new in-memory token.
+6. **Why this is acceptable**: Revocation is a security-critical operation triggered when the old token is suspected compromised. The invariant — "a compromised token must not remain valid" — takes precedence over client convenience. The degraded state requires manual intervention but disk write failures are rare in practice (permissions, disk full), and the API response includes the new token so the caller can retry or manually persist it.
 **SSE event emission** (routine rotation only): The `token_rotated` event is published to `assistantEventHub` as a `ServerMessage`, reaching all connected SSE subscribers across all conversations. This event is never emitted during revocation rotations.
 ### 5. iOS Client Implementation

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vellumai/assistant",
-  "version": "0.3.16",
+  "version": "0.3.18",
   "type": "module",
   "bin": {
     "vellum": "./src/index.ts"

package/src/__tests__/access-request-decision.test.ts CHANGED Viewed

@@ -55,20 +55,18 @@ mock.module('../runtime/gateway-client.js', () => ({
 import {
   createApprovalRequest,
-  createBinding,
   getApprovalRequestById,
-  findPendingAccessRequestForRequester,
 } from '../memory/channel-guardian-store.js';
+import { getDb, initializeDb, resetDb } from '../memory/db.js';
 import {
   findActiveSession,
 } from '../runtime/channel-guardian-service.js';
-import { initializeDb, resetDb } from '../memory/db.js';
 import {
-  handleAccessRequestDecision,
   deliverVerificationCodeToGuardian,
+  handleAccessRequestDecision,
   notifyRequesterOfApproval,
-  notifyRequesterOfDenial,
   notifyRequesterOfDeliveryFailure,
+  notifyRequesterOfDenial,
 } from '../runtime/routes/access-request-decision.js';
 initializeDb();
@@ -85,7 +83,6 @@ afterAll(() => {
 const GUARDIAN_APPROVAL_TTL_MS = 5 * 60 * 1000;
 function resetState(): void {
-  const { getDb } = require('../memory/db.js');
   const db = getDb();
   db.run('DELETE FROM channel_guardian_approval_requests');
   db.run('DELETE FROM channel_guardian_bindings');
@@ -215,7 +212,7 @@ describe('access request decision handler', () => {
       'guardian-user-789',
     );
     expect(result1.type).toBe('approved');
-    const sessionId1 = result1.verificationSessionId;
+    const _sessionId1 = result1.verificationSessionId;
     // Approve again — should be idempotent (already resolved with same decision)
     const result2 = handleAccessRequestDecision(

package/src/__tests__/channel-guardian.test.ts CHANGED Viewed

@@ -2743,7 +2743,9 @@ describe('outbound SMS verification', () => {
       // Guardian outbound sessions (no verificationPurpose override) create
       // guardian bindings on success
       expect(result.verificationType).toBe('guardian');
-      expect(result.bindingId).toBeDefined();
+      if (result.verificationType === 'guardian') {
+        expect(result.bindingId).toBeDefined();
+      }
     }
   });

package/src/__tests__/checker.test.ts CHANGED Viewed

@@ -272,8 +272,8 @@ describe('Permission Checker', () => {
         expect(await classifyRisk('bash', { command: 'some_custom_tool' })).toBe(RiskLevel.Medium);
       });
-      test('rm (without -r) is medium risk', async () => {
-        expect(await classifyRisk('bash', { command: 'rm file.txt' })).toBe(RiskLevel.Medium);
+      test('rm (without -r) is high risk', async () => {
+        expect(await classifyRisk('bash', { command: 'rm file.txt' })).toBe(RiskLevel.High);
       });
       test('chmod is medium risk', async () => {
@@ -374,7 +374,7 @@ describe('Permission Checker', () => {
       expect(high.matchedRule?.id).toBe('default:allow-bash-global');
       // Medium risk
-      const med = await check('bash', { command: 'rm file.txt' }, '/tmp');
+      const med = await check('bash', { command: 'curl https://example.com' }, '/tmp');
       expect(med.decision).toBe('allow');
       expect(med.matchedRule?.id).toBe('default:allow-bash-global');
@@ -391,7 +391,7 @@ describe('Permission Checker', () => {
         const high = await check('bash', { command: 'sudo rm -rf /' }, '/tmp');
         expect(high.decision).toBe('prompt');
-        const med = await check('bash', { command: 'rm file.txt' }, '/tmp');
+        const med = await check('bash', { command: 'curl https://example.com' }, '/tmp');
         expect(med.decision).toBe('prompt');
         // Low risk still auto-allows via the normal risk-based fallback
@@ -409,17 +409,31 @@ describe('Permission Checker', () => {
       expect(result.decision).toBe('prompt');
     });
-    test('host_bash medium risk with no matching rule → prompt', async () => {
+    test('host_bash rm is always high risk → prompt', async () => {
       const result = await check('host_bash', { command: 'rm file.txt' }, '/tmp');
       expect(result.decision).toBe('prompt');
+      expect(result.reason).toContain('High risk');
+    });
+    test('plain rm (without -rf) is high risk and prompts despite default allow rule', async () => {
+      // Validates that ALL rm commands are escalated to High risk, not just rm -rf.
+      // The default allow rule for host_bash auto-approves Low/Medium risk but
+      // High risk always prompts.
+      const result = await check('host_bash', { command: 'rm single-file.txt' }, '/tmp');
+      expect(result.decision).toBe('prompt');
+      expect(result.reason).toContain('High risk');
+      // Also verify rm -rf still prompts
+      const rfResult = await check('host_bash', { command: 'rm -rf /tmp/dir' }, '/tmp');
+      expect(rfResult.decision).toBe('prompt');
+      expect(rfResult.reason).toContain('High risk');
     });
-    test('medium risk with matching trust rule → allow', async () => {
+    test('rm is high risk even with matching trust rule → prompt', async () => {
       addRule('bash', 'rm *', '/tmp');
       const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
-      expect(result.decision).toBe('allow');
-      expect(result.reason).toContain('Matched trust rule');
-      expect(result.matchedRule).toBeDefined();
+      expect(result.decision).toBe('prompt');
+      expect(result.reason).toContain('High risk');
     });
     test('file_read → auto-allow', async () => {
@@ -489,11 +503,11 @@ describe('Permission Checker', () => {
       expect(result.matchedRule?.id).toBe('default:ask-host_file_edit-global');
     });
-    test('host_bash prompts by default via host ask rule', async () => {
+    test('host_bash auto-allows low risk via default allow rule', async () => {
       const result = await check('host_bash', { command: 'ls' }, '/tmp');
-      expect(result.decision).toBe('prompt');
-      expect(result.reason).toContain('ask rule');
-      expect(result.matchedRule?.id).toBe('default:ask-host_bash-global');
+      expect(result.decision).toBe('allow');
+      expect(result.reason).toContain('Matched trust rule');
+      expect(result.matchedRule?.id).toBe('default:allow-host_bash-global');
     });
     test('scaffold_managed_skill prompts by default via managed skill ask rule', async () => {
@@ -597,7 +611,7 @@ describe('Permission Checker', () => {
     });
     // Deny rule tests
-    test('deny rule blocks medium-risk command', async () => {
+    test('deny rule blocks high-risk command', async () => {
       addRule('bash', 'rm *', '/tmp', 'deny');
       const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
       expect(result.decision).toBe('deny');
@@ -764,16 +778,16 @@ describe('Permission Checker', () => {
     // Priority-based rule resolution
     test('higher-priority allow rule overrides lower-priority deny rule', async () => {
-      addRule('bash', 'rm *', '/tmp', 'deny', 0);
-      addRule('bash', 'rm *', '/tmp', 'allow', 100);
-      const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
+      addRule('bash', 'chmod *', '/tmp', 'deny', 0);
+      addRule('bash', 'chmod *', '/tmp', 'allow', 100);
+      const result = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
       expect(result.decision).toBe('allow');
     });
     test('higher-priority deny rule overrides lower-priority allow rule', async () => {
-      addRule('bash', 'rm *', '/tmp', 'allow', 0);
-      addRule('bash', 'rm *', '/tmp', 'deny', 100);
-      const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
+      addRule('bash', 'chmod *', '/tmp', 'allow', 0);
+      addRule('bash', 'chmod *', '/tmp', 'deny', 100);
+      const result = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
       expect(result.decision).toBe('deny');
     });
@@ -1465,13 +1479,14 @@ describe('Permission Checker', () => {
       expect(result.matchedRule?.id).toBe('default:allow-bash-global');
     });
-    test('host_bash with no user rule returns prompt in strict mode', async () => {
+    test('host_bash auto-allows low risk in strict mode (default allow rule is a matching rule)', async () => {
       testConfig.permissions.mode = 'strict';
       const result = await check('host_bash', { command: 'ls' }, '/tmp');
-      expect(result.decision).toBe('prompt');
+      expect(result.decision).toBe('allow');
+      expect(result.matchedRule?.id).toBe('default:allow-host_bash-global');
     });
-    test('medium-risk host_bash with no matching rule returns prompt in strict mode', async () => {
+    test('high-risk host_bash (rm) with no matching rule returns prompt in strict mode', async () => {
       testConfig.permissions.mode = 'strict';
       const result = await check('host_bash', { command: 'rm file.txt' }, '/tmp');
       expect(result.decision).toBe('prompt');
@@ -1568,8 +1583,8 @@ describe('Permission Checker', () => {
     });
     test('medium-risk tool with allow rule is NOT affected by allowHighRisk', async () => {
-      addRule('bash', 'rm *', '/tmp', 'allow', 100);
-      const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
+      addRule('bash', 'chmod *', '/tmp', 'allow', 100);
+      const result = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
       expect(result.decision).toBe('allow');
       expect(result.reason).toContain('Matched trust rule');
       // No mention of high-risk in the reason
@@ -1639,8 +1654,8 @@ describe('Permission Checker', () => {
     test('strict mode: medium-risk with matching allow rule auto-allows', async () => {
       testConfig.permissions.mode = 'strict';
-      addRule('bash', 'rm *', '/tmp', 'allow');
-      const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
+      addRule('bash', 'chmod *', '/tmp', 'allow');
+      const result = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
       expect(result.decision).toBe('allow');
       expect(result.reason).toContain('Matched trust rule');
     });
@@ -2416,10 +2431,11 @@ describe('Permission Checker', () => {
         expect(result.matchedRule?.id).toBe('default:allow-bash-global');
       });
-      test('low-risk host_bash with no user rule prompts in strict mode', async () => {
+      test('low-risk host_bash auto-allows in strict mode (default allow rule is a matching rule)', async () => {
         testConfig.permissions.mode = 'strict';
         const result = await check('host_bash', { command: 'echo hello' }, '/tmp');
-        expect(result.decision).toBe('prompt');
+        expect(result.decision).toBe('allow');
+        expect(result.matchedRule?.id).toBe('default:allow-host_bash-global');
       });
       test('low-risk file_read with no rule prompts in strict mode', async () => {
@@ -2481,10 +2497,10 @@ describe('Permission Checker', () => {
     //    target-scoped. ───────────────────────────────────────────────
     describe('Invariant 4: host execution approvals are explicit and target-scoped', () => {
-      test('host_bash prompts by default (no implicit allow)', async () => {
+      test('host_bash auto-allows low risk via default allow rule', async () => {
         const result = await check('host_bash', { command: 'ls' }, '/tmp');
-        expect(result.decision).toBe('prompt');
-        expect(result.matchedRule?.id).toBe('default:ask-host_bash-global');
+        expect(result.decision).toBe('allow');
+        expect(result.matchedRule?.id).toBe('default:allow-host_bash-global');
       });
       test('host_file_read prompts by default (no implicit allow)', async () => {
@@ -2531,11 +2547,11 @@ describe('Permission Checker', () => {
         expect(matchResult.matchedRule?.id).toBe('inv4-target-scoped');
         // Different target — the target-scoped rule should NOT match;
-        // falls back to the default host_bash ask rule (prompt)
+        // falls back to the default host_bash allow rule (auto-allows medium risk)
         const noMatchResult = await check('host_bash', { command: 'run script.js' }, '/tmp', {
           executionTarget: '/usr/local/bin/bun',
         });
-        expect(noMatchResult.decision).toBe('prompt');
+        expect(noMatchResult.decision).toBe('allow');
         expect(noMatchResult.matchedRule?.id).not.toBe('inv4-target-scoped');
       });
     });
@@ -2605,7 +2621,7 @@ describe('Permission Checker', () => {
       test('wildcard allow rule matches any command in legacy mode', async () => {
         testConfig.permissions.mode = 'legacy';
         addRule('bash', '*', 'everywhere');
-        const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
+        const result = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
         expect(result.decision).toBe('allow');
         expect(result.matchedRule).toBeDefined();
       });
@@ -2613,7 +2629,7 @@ describe('Permission Checker', () => {
       test('wildcard allow rule matches any command in strict mode', async () => {
         testConfig.permissions.mode = 'strict';
         addRule('bash', '*', 'everywhere');
-        const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
+        const result = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
         expect(result.decision).toBe('allow');
         expect(result.matchedRule).toBeDefined();
       });
@@ -2724,12 +2740,27 @@ describe('Permission Checker', () => {
     );
     test('getDefaultRuleTemplates has no extra rules when extraDirs is empty', () => {
-      // Default testConfig has no skills property → getConfig returns default
-      // with extraDirs: []
       const templates = getDefaultRuleTemplates();
       const extraRules = templates.filter((t) => t.id.includes('extra-'));
       expect(extraRules.length).toBe(0);
     });
+    test('getDefaultRuleTemplates tolerates partial config mocks', () => {
+      const originalSkills = testConfig.skills;
+      const originalSandbox = testConfig.sandbox;
+      try {
+        testConfig.skills = {} as any;
+        testConfig.sandbox = {} as any;
+        const templates = getDefaultRuleTemplates();
+        expect(Array.isArray(templates)).toBe(true);
+        expect(templates.some((t) => t.id.includes('extra-'))).toBe(false);
+        expect(templates.some((t) => t.id === 'default:allow-bash-global')).toBe(true);
+      } finally {
+        testConfig.skills = originalSkills;
+        testConfig.sandbox = originalSandbox;
+      }
+    });
   });
   // ── backslash normalization gated to Windows (PR 3558 follow-up) ──
@@ -2952,8 +2983,8 @@ describe('bash network_mode=proxied force prompt', () => {
   });
   test('non-proxied bash with trust rule follows normal flow', async () => {
-    addRule('bash', 'rm *', '/tmp');
-    const result = await check('bash', { command: 'rm file.txt' }, '/tmp');
+    addRule('bash', 'chmod *', '/tmp');
+    const result = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
     expect(result.decision).toBe('allow');
     expect(result.reason).not.toContain('Proxied network mode');
   });
@@ -3245,10 +3276,10 @@ describe('workspace mode — auto-allow workspace-scoped operations', () => {
     expect(result.reason).toContain('ask rule');
   });
-  test('host_bash → prompt (default ask rule matches)', async () => {
+  test('host_bash → allow (default allow rule matches)', async () => {
     const result = await check('host_bash', { command: 'ls' }, workspaceDir);
-    expect(result.decision).toBe('prompt');
-    expect(result.reason).toContain('ask rule');
+    expect(result.decision).toBe('allow');
+    expect(result.reason).toContain('Matched trust rule');
   });
   // ── explicit rules still take precedence in workspace mode ──
@@ -3428,20 +3459,20 @@ describe('integration regressions (PR 11)', () => {
   });
   test('raw legacy rule still works alongside new action key system', async () => {
-    // Use medium-risk commands (rm) so they aren't auto-allowed by low-risk classification.
+    // Use medium-risk commands (chmod) so they aren't auto-allowed by low-risk classification.
     // Disable sandbox so the catch-all "**" rule doesn't interfere.
     testConfig.sandbox.enabled = false;
     try { rmSync(join(checkerTestDir, 'protected', 'trust.json')); } catch { /* may not exist */ }
     clearCache();
     try {
-      addRule('bash', 'rm file.txt', 'everywhere');
+      addRule('bash', 'chmod 644 file.txt', 'everywhere');
       // Exact match still works
-      const r1 = await check('bash', { command: 'rm file.txt' }, '/tmp');
+      const r1 = await check('bash', { command: 'chmod 644 file.txt' }, '/tmp');
       expect(r1.decision).toBe('allow');
-      // Different rm argument should not match this exact raw rule
-      const r2 = await check('bash', { command: 'rm other.txt' }, '/tmp');
+      // Different chmod argument should not match this exact raw rule
+      const r2 = await check('bash', { command: 'chmod 755 other.txt' }, '/tmp');
       expect(r2.decision).not.toBe('allow');
     } finally {
       testConfig.sandbox.enabled = true;