npm - @vellumai/assistant - Versions diffs - 0.3.2 → 0.3.4 - Mend

@vellumai/assistant 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

package/README.md +82 -21
package/package.json +1 -1
package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +16 -0
package/src/__tests__/app-git-history.test.ts +22 -27
package/src/__tests__/app-git-service.test.ts +44 -78
package/src/__tests__/call-orchestrator.test.ts +321 -0
package/src/__tests__/channel-approval-routes.test.ts +1267 -93
package/src/__tests__/channel-approval.test.ts +2 -0
package/src/__tests__/channel-approvals.test.ts +51 -2
package/src/__tests__/channel-delivery-store.test.ts +130 -1
package/src/__tests__/channel-guardian.test.ts +371 -1
package/src/__tests__/config-schema.test.ts +1 -1
package/src/__tests__/credential-security-invariants.test.ts +1 -0
package/src/__tests__/daemon-lifecycle.test.ts +635 -0
package/src/__tests__/daemon-server-session-init.test.ts +5 -0
package/src/__tests__/gateway-only-enforcement.test.ts +106 -21
package/src/__tests__/handlers-telegram-config.test.ts +82 -0
package/src/__tests__/handlers-twilio-config.test.ts +738 -5
package/src/__tests__/ingress-url-consistency.test.ts +64 -0
package/src/__tests__/ipc-snapshot.test.ts +10 -0
package/src/__tests__/run-orchestrator.test.ts +1 -1
package/src/__tests__/secret-scanner.test.ts +223 -0
package/src/__tests__/session-process-bridge.test.ts +2 -0
package/src/__tests__/shell-parser-property.test.ts +357 -2
package/src/__tests__/system-prompt.test.ts +25 -1
package/src/__tests__/tool-executor-lifecycle-events.test.ts +34 -1
package/src/__tests__/tool-permission-simulate-handler.test.ts +2 -2
package/src/__tests__/user-reference.test.ts +68 -0
package/src/calls/call-orchestrator.ts +63 -11
package/src/calls/twilio-config.ts +10 -1
package/src/calls/twilio-rest.ts +70 -0
package/src/cli/map.ts +6 -0
package/src/commands/__tests__/cc-command-registry.test.ts +67 -0
package/src/commands/cc-command-registry.ts +14 -1
package/src/config/bundled-skills/claude-code/TOOLS.json +10 -3
package/src/config/bundled-skills/email-setup/SKILL.md +56 -0
package/src/config/bundled-skills/messaging/SKILL.md +4 -0
package/src/config/bundled-skills/subagent/SKILL.md +4 -0
package/src/config/bundled-skills/subagent/TOOLS.json +4 -0
package/src/config/defaults.ts +1 -1
package/src/config/schema.ts +6 -3
package/src/config/skills.ts +5 -32
package/src/config/system-prompt.ts +16 -0
package/src/config/user-reference.ts +29 -0
package/src/config/vellum-skills/catalog.json +52 -0
package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -1
package/src/config/vellum-skills/twilio-setup/SKILL.md +49 -4
package/src/daemon/auth-manager.ts +103 -0
package/src/daemon/computer-use-session.ts +8 -1
package/src/daemon/config-watcher.ts +253 -0
package/src/daemon/handlers/config.ts +193 -17
package/src/daemon/handlers/sessions.ts +5 -3
package/src/daemon/handlers/skills.ts +60 -17
package/src/daemon/ipc-contract-inventory.json +4 -0
package/src/daemon/ipc-contract.ts +16 -0
package/src/daemon/ipc-handler.ts +87 -0
package/src/daemon/lifecycle.ts +16 -4
package/src/daemon/ride-shotgun-handler.ts +11 -1
package/src/daemon/server.ts +105 -502
package/src/daemon/session-agent-loop.ts +9 -14
package/src/daemon/session-process.ts +20 -3
package/src/daemon/session-runtime-assembly.ts +60 -44
package/src/daemon/session-slash.ts +50 -2
package/src/daemon/session-surfaces.ts +17 -1
package/src/daemon/session.ts +8 -1
package/src/inbound/public-ingress-urls.ts +20 -3
package/src/index.ts +1 -23
package/src/memory/app-git-service.ts +24 -0
package/src/memory/app-store.ts +0 -21
package/src/memory/channel-delivery-store.ts +74 -3
package/src/memory/channel-guardian-store.ts +54 -26
package/src/memory/conversation-key-store.ts +20 -0
package/src/memory/conversation-store.ts +14 -2
package/src/memory/db-connection.ts +28 -0
package/src/memory/db-init.ts +1019 -0
package/src/memory/db.ts +2 -1995
package/src/memory/embedding-backend.ts +79 -11
package/src/memory/indexer.ts +2 -0
package/src/memory/job-utils.ts +64 -4
package/src/memory/jobs-worker.ts +7 -1
package/src/memory/recall-cache.ts +107 -0
package/src/memory/retriever.ts +30 -1
package/src/memory/schema-migration.ts +984 -0
package/src/memory/schema.ts +6 -0
package/src/memory/search/types.ts +2 -0
package/src/permissions/prompter.ts +14 -3
package/src/permissions/trust-store.ts +7 -0
package/src/runtime/channel-approvals.ts +17 -3
package/src/runtime/gateway-client.ts +2 -1
package/src/runtime/http-server.ts +28 -9
package/src/runtime/routes/channel-routes.ts +279 -100
package/src/runtime/routes/run-routes.ts +7 -1
package/src/runtime/run-orchestrator.ts +8 -1
package/src/security/secret-scanner.ts +218 -0
package/src/skills/clawhub.ts +6 -2
package/src/skills/frontmatter.ts +63 -0
package/src/skills/slash-commands.ts +23 -0
package/src/skills/vellum-catalog-remote.ts +107 -0
package/src/subagent/manager.ts +4 -1
package/src/subagent/types.ts +2 -0
package/src/tools/browser/auto-navigate.ts +132 -24
package/src/tools/browser/browser-manager.ts +67 -61
package/src/tools/claude-code/claude-code.ts +55 -3
package/src/tools/executor.ts +10 -2
package/src/tools/skills/vellum-catalog.ts +75 -127
package/src/tools/subagent/spawn.ts +2 -0
package/src/tools/terminal/parser.ts +21 -5
package/src/util/platform.ts +8 -1
package/src/util/retry.ts +4 -4

package/README.md CHANGED Viewed

@@ -45,6 +45,7 @@ cp .env.example .env
 | `OLLAMA_API_KEY` | No | — | API key for authenticated Ollama deployments |
 | `OLLAMA_BASE_URL` | No | `http://127.0.0.1:11434/v1` | Ollama base URL |
 | `RUNTIME_HTTP_PORT` | No | — | Enable the HTTP server (required for gateway/web) |
+| `RUNTIME_GATEWAY_ORIGIN_SECRET` | No | — | Dedicated secret for the `X-Gateway-Origin` proof header on `/channels/inbound`. When not set, falls back to the bearer token. Both gateway and runtime must share the same value. |
 | `VELLUM_DAEMON_SOCKET` | No | `~/.vellum/vellum.sock` | Override the daemon socket path |
 ## Usage
@@ -122,7 +123,7 @@ assistant/
 ## Channel Approval Flow
-When the assistant needs tool-use confirmation during a channel session (e.g., Telegram), the approval flow intercepts the run and surfaces an interactive prompt to the user. This is gated behind the `CHANNEL_APPROVALS_ENABLED=true` environment variable.
+When the assistant needs tool-use confirmation during a channel session (e.g., Telegram), the approval flow intercepts the run and surfaces an interactive prompt to the user. This approval-aware path is always enabled whenever orchestrator + callback context are available.
 ### How it works
@@ -131,6 +132,24 @@ When the assistant needs tool-use confirmation during a channel session (e.g., T
 3. **Decision** — The user's decision is mapped to the permission system (`allow` or `deny`) and applied to the pending run. For `approve_always`, a trust rule is persisted so future invocations of the same tool are auto-approved.
 4. **Reminder** — If the user sends a non-decision message while an approval is pending, a reminder prompt is re-sent with the approval buttons.
+### Delivery Semantics
+**Single final output guarantee (deliver-once guard):** Both the main poll (`processChannelMessageWithApprovals`) and the post-decision poll (`schedulePostDecisionDelivery`) race to deliver the final assistant reply when a run reaches terminal state. The `claimRunDelivery()` function in `channel-delivery-store.ts` ensures at-most-one delivery per run using an in-memory `Set<string>`. The first caller to claim the run ID proceeds with delivery; the other silently skips. This guard is sufficient because both racing pollers execute within the same process.
+**Stale callback blocking:** When inbound callback data (e.g., a Telegram button press) does not match any pending approval, the runtime returns `stale_ignored` and does not process the payload as a regular message. This prevents stale button presses from old approval prompts from triggering unrelated agent loops.
+### Prompt Delivery Failure Policy (Fail-Closed)
+All approval prompt delivery paths use a **fail-closed** policy -- if the prompt cannot be delivered, the run is auto-denied rather than left in a silent wait state:
+- **Standard (self-approval) prompt:** If `deliverApprovalPrompt()` fails, the run is immediately auto-denied via `handleChannelDecision(reject)`. No silent `needs_confirmation` hang.
+- **Guardian-routed prompt:** If the approval prompt cannot be delivered to the guardian's chat, the guardian approval record is marked `denied`, the underlying run is rejected, and the requester is notified that the action was denied because the prompt could not reach the guardian.
+- **Unverified channel (no guardian binding):** Sensitive actions are auto-denied immediately without attempting prompt delivery. The requester is notified that no guardian has been configured.
+### Plain-Text Fallback for Non-Rich Channels
+Channels that do not support rich inline approval UI (e.g., inline keyboards) receive plain-text instructions embedded in the message body. The `channelSupportsRichApprovalUI()` check determines whether to send the structured `promptText` (for rich channels like Telegram) or the `plainTextFallback` string (for all other channels, e.g., SMS). The fallback text includes instructions like "Reply yes/no/always" so the user can respond via text.
 ### Key modules
 | File | Purpose |
@@ -144,32 +163,33 @@ When the assistant needs tool-use confirmation during a channel session (e.g., T
 ### Enabling
-Set the environment variable before starting the daemon:
-```bash
-CHANNEL_APPROVALS_ENABLED=true
-```
-When disabled (the default), channel messages follow the standard fire-and-forget processing path without approval interception.
+Channel approvals are always enabled for channel traffic when orchestrator + callback context are available.
 ### Guardian-Specific Behavior
-When `CHANNEL_APPROVALS_ENABLED=true`, the channel guardian system adds a trust layer:
+Guardian actor-role *classification* (determining whether a sender is guardian, non-guardian, or unverified) runs unconditionally. Guardian *enforcement* for non-guardian/unverified actors (`forceStrictSideEffects`, fail-closed denial for unverified channels, and approval prompt routing to guardians) is always active when orchestrator + callback context are available.
 | Flag / Behavior | Description |
 |-----------------|-------------|
-| `CHANNEL_APPROVALS_ENABLED=true` | Enables the approval flow and guardian role resolution on channel inbound messages |
-| `forceStrictSideEffects` | Automatically set on runs triggered by non-guardian or unverified-channel senders so all side-effect tools require approval |
-| **Fail-closed no-binding** | When no guardian binding exists for a channel, the sender is classified as `unverified_channel`. Any sensitive action is auto-denied with a notice that no guardian has been configured. This prevents unverified senders from self-approving actions. |
+| `forceStrictSideEffects` | Automatically set on runs triggered by non-guardian or unverified-channel senders so all side-effect tools require approval. |
+| **Fail-closed no-binding** | When no guardian binding exists for a channel, the sender is classified as `unverified_channel`. Any sensitive action is auto-denied with a notice that no guardian has been configured. |
+| **Fail-closed no-identity** | When `senderExternalUserId` is absent, the actor is classified as `unverified_channel` (even if no guardian binding exists yet). |
 | **Guardian-only approval** | Non-guardian senders cannot approve their own pending actions. Only the verified guardian can approve or deny. |
-| **Expired approval auto-deny** | If a guardian approval request expires (30-minute TTL) without a decision, the action is auto-denied when the non-guardian sender next interacts. |
+| **Expired approval auto-deny** | A proactive sweep runs every 60 seconds to find expired guardian approval requests (30-minute TTL). Expired approvals are auto-denied, and both the requester and guardian are notified. If a non-guardian interacts before the sweep runs, the expiry is also detected reactively. |
+### Ingress Boundary Guarantees (Gateway-Only Mode)
+The runtime operates in **gateway-only mode**: all public-facing webhook paths are blocked at the runtime level. Direct access to Twilio webhook routes (`/webhooks/twilio/voice`, `/webhooks/twilio/status`, `/webhooks/twilio/connect-action`, `/webhooks/twilio/sms`) and their legacy equivalents (`/v1/calls/twilio/*`) returns `410 GATEWAY_ONLY`. This ensures external webhook traffic (including SMS) can only reach the runtime through the gateway, which performs signature validation before forwarding.
+Internal forwarding routes (`/v1/internal/twilio/*`) are unaffected — these accept pre-validated payloads from the gateway over the private network.
 ### Gateway-Origin Ingress Contract
-The `/channels/inbound` endpoint requires a valid `X-Gateway-Origin` header that matches the configured bearer token. This ensures channel messages can only be submitted via the gateway (which performs webhook-level verification) and not via direct HTTP calls that bypass signature checks.
+The `/channels/inbound` endpoint requires a valid `X-Gateway-Origin` header to prove the request originated from the gateway. This ensures channel messages can only arrive via the gateway (which performs webhook-level verification) and not via direct HTTP calls that bypass signature checks.
-- **With bearer token configured:** Requests must include `X-Gateway-Origin` with the shared secret. Missing or invalid values return `403 GATEWAY_ORIGIN_REQUIRED`.
-- **Without bearer token:** Gateway-origin validation is skipped (local dev without auth).
+- **Dedicated secret (`RUNTIME_GATEWAY_ORIGIN_SECRET`):** When set, this is the expected value for the `X-Gateway-Origin` header. Both the gateway and the runtime must share this secret.
+- **Bearer token fallback:** When `RUNTIME_GATEWAY_ORIGIN_SECRET` is not set, the runtime falls back to validating against the bearer token for backward compatibility.
+- **Without any secret:** When neither a dedicated secret nor a bearer token is configured (local dev), gateway-origin validation is skipped entirely.
 - **Auth layer order:** Bearer token authentication (`Authorization` header) is checked first. Gateway-origin validation runs inside the handler.
 ## Twilio Setup Primitive
@@ -184,16 +204,48 @@ The daemon handles `twilio_config` messages with the following actions:
 |--------|-------------|
 | `get` | Returns current state: `hasCredentials` (boolean) and `phoneNumber` (if assigned) |
 | `set_credentials` | Validates and stores Account SID and Auth Token in secure storage (Keychain / encrypted file). Credentials are retrieved from the credential store internally. |
-| `clear_credentials` | Removes stored Account SID, Auth Token, and phone number from secure storage. |
-| `provision_number` | Purchases a new phone number via the Twilio API. Accepts optional `areaCode` and `country` (ISO 3166-1 alpha-2, default `US`). Returns the purchased number but does not assign it — call `assign_number` separately to persist it. |
-| `assign_number` | Assigns an existing Twilio phone number (E.164 format) to the assistant |
+| `clear_credentials` | Removes stored Account SID and Auth Token from secure storage. Preserves the phone number in both config (`sms.phoneNumber`) and secure key (`credential:twilio:phone_number`) so that re-entering credentials resumes working without needing to reassign the number. |
+| `provision_number` | Purchases a new phone number via the Twilio API. Accepts optional `areaCode` and `country` (ISO 3166-1 alpha-2, default `US`). Auto-assigns the number to the assistant (persists to config and secure storage) and configures Twilio webhooks (voice, status callback, SMS) when a public ingress URL is available. |
+| `assign_number` | Assigns an existing Twilio phone number (E.164 format) to the assistant and auto-configures webhooks when ingress is available |
 | `list_numbers` | Lists all incoming phone numbers on the Twilio account with their capabilities (voice, SMS) |
-Response type: `twilio_config_response` with `success`, `hasCredentials`, optional `phoneNumber`, optional `numbers` array, and optional `error`.
+Response type: `twilio_config_response` with `success`, `hasCredentials`, optional `phoneNumber`, optional `numbers` array, optional `error`, and optional `warning` (for non-fatal webhook sync failures).
+### Ingress Webhook Reconciliation
+When the public ingress URL is changed via the Settings UI (`ingress_config` set action), the daemon automatically reconciles Twilio webhooks in addition to triggering a Telegram webhook reconcile on the gateway. If all of the following conditions are met, the daemon pushes updated webhook URLs (voice, status callback, SMS) to Twilio:
+1. Ingress is being **enabled** (not disabled)
+2. Twilio **credentials** are configured (Account SID + Auth Token in secure storage)
+3. A phone number is **assigned** (persisted in `sms.phoneNumber` config)
+This reconciliation is **best-effort and fire-and-forget** -- failures are logged but do not block the ingress config save or produce an error response. This ensures that changing a tunnel URL (e.g., restarting ngrok) automatically updates Twilio's webhook routing without requiring manual re-assignment of the phone number.
 ### Single-Number-Per-Assistant Model
-Each assistant is assigned a single Twilio phone number that is shared between voice calls and SMS. The number is stored in the assistant's config at `sms.phoneNumber` and used as the `From` for outbound SMS via the gateway's `/deliver/sms` endpoint. The same credentials (Account SID, Auth Token) are used for both voice and SMS operations.
+Each assistant is assigned a single Twilio phone number that is shared between voice calls and SMS. The number is stored in the assistant's config at `sms.phoneNumber` (legacy global field) and used as the `From` for outbound SMS via the gateway's `/deliver/sms` endpoint. The same credentials (Account SID, Auth Token) are used for both voice and SMS operations.
+#### Assistant-Scoped Phone Numbers
+When `assistantId` is provided in the `twilio_config` request, the `provision_number` and `assign_number` actions persist the phone number into a per-assistant mapping at `sms.assistantPhoneNumbers` (a `Record<string, string>` keyed by assistant ID). The legacy `sms.phoneNumber` field is always updated for backward compatibility.
+The `get` action, when called with `assistantId`, resolves the phone number by checking `sms.assistantPhoneNumbers[assistantId]` first, falling back to `sms.phoneNumber`. This allows multiple assistants to have distinct phone numbers while preserving existing behavior for single-assistant setups.
+The per-assistant mapping is propagated to the gateway via the config file watcher, enabling phone-number-based routing at the gateway boundary (see Gateway README).
+### Phone Number Resolution Order
+At runtime, `getTwilioConfig()` resolves the phone number using this priority chain:
+1. **`TWILIO_PHONE_NUMBER` env var** — highest priority, explicit override for dev/CI.
+2. **`sms.phoneNumber` in config** — the primary source of truth, written by `provision_number` and `assign_number`.
+3. **`credential:twilio:phone_number` secure key** — backward-compatible fallback for setups that predate the config-first model.
+If no number is found after all three sources, an error is thrown.
+### Assistant-Scoped Guardian State
+Guardian bindings, verification challenges, and approval requests are all scoped to an `(assistantId, channel)` pair. The `assistantId` parameter flows through `handleChannelInbound`, `validateAndConsumeChallenge`, `isGuardian`, `getGuardianBinding`, and `createApprovalRequest`. This means each assistant has its own independent guardian binding per channel -- verifying as guardian on one assistant does not grant guardian status on another.
 ### Channel-Aware Guardian Challenges
@@ -228,6 +280,15 @@ The image runs as non-root user `assistant` (uid 1001) and exposes port `3001`.
 ## Troubleshooting
+### Guardian and gateway-origin issues
+| Symptom | Cause | Resolution |
+|---------|-------|------------|
+| 403 `GATEWAY_ORIGIN_REQUIRED` on `/channels/inbound` | Missing or invalid `X-Gateway-Origin` header | Ensure `RUNTIME_GATEWAY_ORIGIN_SECRET` is set to the same value on both gateway and runtime. If not using a dedicated secret, ensure the bearer token (`RUNTIME_BEARER_TOKEN` or `~/.vellum/http-token`) is shared. |
+| Non-guardian actions silently denied | No guardian binding for the channel. The system is fail-closed for unverified channels. | Run the guardian verification flow from the desktop UI to bind a guardian. |
+| Guardian approval expired | The 30-minute TTL elapsed. The proactive sweep auto-denied the approval and notified both parties. | The requester must re-trigger the action. |
+| `forceStrictSideEffects` unexpectedly active | The sender is classified as `non-guardian` or `unverified_channel` | Verify the sender's `externalUserId` matches the guardian binding, or set up a guardian binding for the channel. |
 ### Invalid RRULE set expressions
 If `schedule_create` rejects an RRULE expression, check the following:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vellumai/assistant",
-  "version": "0.3.2",
+  "version": "0.3.4",
   "type": "module",
   "bin": {
     "vellum": "./src/index.ts"

package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap CHANGED Viewed

@@ -594,6 +594,13 @@ exports[`IPC message snapshots ClientMessage types telegram_config serializes to
 }
 `;
+exports[`IPC message snapshots ClientMessage types twilio_config serializes to expected JSON 1`] = `
+{
+  "action": "get",
+  "type": "twilio_config",
+}
+`;
 exports[`IPC message snapshots ClientMessage types guardian_verification serializes to expected JSON 1`] = `
 {
   "action": "create_challenge",
@@ -1920,6 +1927,15 @@ exports[`IPC message snapshots ServerMessage types telegram_config_response seri
 }
 `;
+exports[`IPC message snapshots ServerMessage types twilio_config_response serializes to expected JSON 1`] = `
+{
+  "hasCredentials": true,
+  "phoneNumber": "+15551234567",
+  "success": true,
+  "type": "twilio_config_response",
+}
+`;
 exports[`IPC message snapshots ServerMessage types guardian_verification_response serializes to expected JSON 1`] = `
 {
   "instruction": "Send this code to the Telegram bot",

package/src/__tests__/app-git-history.test.ts CHANGED Viewed

@@ -14,8 +14,8 @@ mock.module('../util/platform.js', () => ({
 }));
 // Re-import after mocking so modules use our temp dir
-const { createApp, updateApp, deleteApp: _deleteApp, writeAppFile: _writeAppFile, editAppFile: _editAppFile, getAppsDir } = await import('../memory/app-store.js');
-const { getAppHistory, getAppDiff, getAppFileAtVersion, restoreAppVersion, commitAppChange: _commitAppChange } = await import('../memory/app-git-service.js');
+const { createApp, updateApp, getAppsDir } = await import('../memory/app-store.js');
+const { getAppHistory, getAppDiff, getAppFileAtVersion, restoreAppVersion, commitAppTurnChanges } = await import('../memory/app-git-service.js');
 describe('App Git History', () => {
   beforeEach(() => {
@@ -31,27 +31,20 @@ describe('App Git History', () => {
     }
   });
-  /** Wait for fire-and-forget commits to complete. */
-  async function waitForCommits(): Promise<void> {
-    await new Promise(resolve => setTimeout(resolve, 500));
-  }
   test('getAppHistory returns commits for a specific app', async () => {
     const app = createApp({
       name: 'History App',
       schemaJson: '{}',
       htmlDefinition: '<h1>v1</h1>',
     });
-    await waitForCommits();
+    await commitAppTurnChanges('session-1', 1);
     updateApp(app.id, { htmlDefinition: '<h1>v2</h1>' });
-    await waitForCommits();
+    await commitAppTurnChanges('session-1', 2);
     const history = await getAppHistory(app.id);
     expect(history.length).toBeGreaterThanOrEqual(2);
-    expect(history[0].message).toContain('Update app');
-    // The create commit may be absorbed into the "Initial commit" on a fresh repo
-    expect(history[history.length - 1].message).toMatch(/Create app|Initial commit/);
+    expect(history[0].message).toContain('Turn 2');
     expect(history[0].commitHash).toMatch(/^[0-9a-f]+$/);
     expect(history[0].timestamp).toBeGreaterThan(0);
   });
@@ -62,22 +55,24 @@ describe('App Git History', () => {
       schemaJson: '{}',
       htmlDefinition: '<p>one</p>',
     });
-    await waitForCommits();
+    await commitAppTurnChanges('session-1', 1);
     const app2 = createApp({
       name: 'App Two',
       schemaJson: '{}',
       htmlDefinition: '<p>two</p>',
     });
-    await waitForCommits();
+    await commitAppTurnChanges('session-1', 2);
     const history1 = await getAppHistory(app1.id);
     const history2 = await getAppHistory(app2.id);
-    // App1's history should only contain its own commits
-    expect(history1.every(v => v.message.includes('App One') || v.message.includes('Initial commit'))).toBe(true);
-    // App2's history should only contain its own commits
-    expect(history2.every(v => v.message.includes('App Two') || v.message.includes('Initial commit'))).toBe(true);
+    // App1 should have history from turn 1 (or initial commit)
+    expect(history1.length).toBeGreaterThanOrEqual(1);
+    // App2 should have history from turn 2 (or initial commit)
+    expect(history2.length).toBeGreaterThanOrEqual(1);
+    // App2's commits should not include app1-only turn commits
+    // (turn 2 created app2, so app2 history should not have turn 1 unless initial commit)
   });
   test('getAppHistory respects limit', async () => {
@@ -86,13 +81,13 @@ describe('App Git History', () => {
       schemaJson: '{}',
       htmlDefinition: '<p>v1</p>',
     });
-    await waitForCommits();
+    await commitAppTurnChanges('session-1', 1);
     updateApp(app.id, { htmlDefinition: '<p>v2</p>' });
-    await waitForCommits();
+    await commitAppTurnChanges('session-1', 2);
     updateApp(app.id, { htmlDefinition: '<p>v3</p>' });
-    await waitForCommits();
+    await commitAppTurnChanges('session-1', 3);
     const limited = await getAppHistory(app.id, 2);
     expect(limited.length).toBe(2);
@@ -104,13 +99,13 @@ describe('App Git History', () => {
       schemaJson: '{}',
       htmlDefinition: '<p>original</p>',
     });
-    await waitForCommits();
+    await commitAppTurnChanges('session-1', 1);
     const history1 = await getAppHistory(app.id);
     const createHash = history1[0].commitHash;
     updateApp(app.id, { htmlDefinition: '<p>modified</p>' });
-    await waitForCommits();
+    await commitAppTurnChanges('session-1', 2);
     const history2 = await getAppHistory(app.id);
     const updateHash = history2[0].commitHash;
@@ -126,13 +121,13 @@ describe('App Git History', () => {
       schemaJson: '{}',
       htmlDefinition: '<p>version one</p>',
     });
-    await waitForCommits();
+    await commitAppTurnChanges('session-1', 1);
     const history1 = await getAppHistory(app.id);
     const v1Hash = history1[0].commitHash;
     updateApp(app.id, { htmlDefinition: '<p>version two</p>' });
-    await waitForCommits();
+    await commitAppTurnChanges('session-1', 2);
     // Get the file at v1 — should show old content
     const v1Content = await getAppFileAtVersion(app.id, 'index.html', v1Hash);
@@ -150,13 +145,13 @@ describe('App Git History', () => {
       schemaJson: '{}',
       htmlDefinition: '<p>original content</p>',
     });
-    await waitForCommits();
+    await commitAppTurnChanges('session-1', 1);
     const history1 = await getAppHistory(app.id);
     const originalHash = history1[0].commitHash;
     updateApp(app.id, { htmlDefinition: '<p>new content</p>' });
-    await waitForCommits();
+    await commitAppTurnChanges('session-1', 2);
     // Verify current content is "new content"
     let current = readFileSync(join(getAppsDir(), app.id, 'index.html'), 'utf-8');

package/src/__tests__/app-git-service.test.ts CHANGED Viewed

@@ -4,7 +4,7 @@ import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 import { execFileSync } from 'node:child_process';
 import { _resetGitServiceRegistry } from '../workspace/git-service.js';
-import { commitAppChange, _resetAppGitState } from '../memory/app-git-service.js';
+import { commitAppTurnChanges, _resetAppGitState } from '../memory/app-git-service.js';
 // Mock getDataDir to use a temp directory
 let testDataDir: string;
@@ -43,127 +43,93 @@ describe('App Git Service', () => {
     }
   }
-  test('initializes git repo in apps directory on first commit', async () => {
-    const appsDir = getAppsDir();
-    expect(existsSync(join(appsDir, '.git'))).toBe(false);
-    await commitAppChange('test commit');
-    expect(existsSync(join(appsDir, '.git'))).toBe(true);
-  });
   test('.gitignore excludes preview files and records', async () => {
     const appsDir = getAppsDir();
-    await commitAppChange('test commit');
+    await commitAppTurnChanges('test-session', 1);
     const gitignore = readFileSync(join(appsDir, '.gitignore'), 'utf-8');
     expect(gitignore).toContain('*.preview');
     expect(gitignore).toContain('*/records/');
   });
-  test('createApp produces a commit', async () => {
+  test('mutations do not auto-commit', async () => {
     createApp({
       name: 'Test App',
       schemaJson: '{}',
       htmlDefinition: '<h1>Hello</h1>',
     });
-    // Give the fire-and-forget commit time to complete
+    // Wait to make sure no fire-and-forget commit happens
     await new Promise(resolve => setTimeout(resolve, 500));
     const appsDir = getAppsDir();
-    const commits = getGitLog(appsDir);
-    expect(commits.some(c => c.includes('Create app: Test App'))).toBe(true);
+    // No git repo should exist yet since no turn commit was triggered
+    expect(existsSync(join(appsDir, '.git'))).toBe(false);
   });
-  test('updateApp produces a commit with changed fields', async () => {
+  test('commitAppTurnChanges creates a single commit for multiple mutations', async () => {
     const app = createApp({
-      name: 'My App',
+      name: 'Multi Edit App',
       schemaJson: '{}',
       htmlDefinition: '<p>v1</p>',
     });
-    await new Promise(resolve => setTimeout(resolve, 500));
-    updateApp(app.id, { name: 'My App v2', htmlDefinition: '<p>v2</p>' });
-    await new Promise(resolve => setTimeout(resolve, 500));
-    const appsDir = getAppsDir();
-    const commits = getGitLog(appsDir);
-    expect(commits.some(c => c.includes('Update app: My App v2'))).toBe(true);
-  });
-  test('deleteApp produces a commit with app name', async () => {
-    const app = createApp({
-      name: 'Doomed App',
-      schemaJson: '{}',
-      htmlDefinition: '<p>bye</p>',
-    });
-    await new Promise(resolve => setTimeout(resolve, 500));
+    updateApp(app.id, { htmlDefinition: '<p>v2</p>' });
+    writeAppFile(app.id, 'styles.css', 'body { color: red; }');
+    editAppFile(app.id, 'index.html', 'v2', 'v3');
-    deleteApp(app.id);
-    await new Promise(resolve => setTimeout(resolve, 500));
+    // All mutations happened, now commit at turn boundary
+    await commitAppTurnChanges('session-1', 1);
     const appsDir = getAppsDir();
     const commits = getGitLog(appsDir);
-    expect(commits.some(c => c.includes('Delete app: Doomed App'))).toBe(true);
+    // On a fresh repo the first turn's files may be absorbed into the
+    // "Initial commit" created by WorkspaceGitService.ensureInitialized.
+    // Either way there should be at most 2 commits, not one per mutation.
+    expect(commits.length).toBeLessThanOrEqual(2);
+    // The turn commit message should appear (or files are in the initial commit)
+    expect(commits.some(c => c.includes('Turn 1') || c.includes('Initial commit'))).toBe(true);
   });
-  test('writeAppFile produces a commit', async () => {
-    const app = createApp({
-      name: 'File App',
+  test('commitAppTurnChanges does not commit when nothing changed', async () => {
+    // Trigger initial commit by creating and committing an app
+    createApp({
+      name: 'Static App',
       schemaJson: '{}',
       htmlDefinition: '<p>hi</p>',
     });
-    await new Promise(resolve => setTimeout(resolve, 500));
-    writeAppFile(app.id, 'styles.css', 'body { color: red; }');
-    await new Promise(resolve => setTimeout(resolve, 500));
+    await commitAppTurnChanges('session-1', 1);
     const appsDir = getAppsDir();
-    const commits = getGitLog(appsDir);
-    expect(commits.some(c => c.includes('Write styles.css in app'))).toBe(true);
-  });
+    const commitsBefore = getGitLog(appsDir);
-  test('editAppFile produces a commit on success', async () => {
-    const app = createApp({
-      name: 'Edit App',
-      schemaJson: '{}',
-      htmlDefinition: '<p>old text</p>',
-    });
-    await new Promise(resolve => setTimeout(resolve, 500));
+    // No mutations — turn commit should be a no-op
+    await commitAppTurnChanges('session-1', 2);
-    const result = editAppFile(app.id, 'index.html', 'old text', 'new text');
-    expect(result.ok).toBe(true);
-    await new Promise(resolve => setTimeout(resolve, 500));
+    const commitsAfter = getGitLog(appsDir);
+    expect(commitsAfter.length).toBe(commitsBefore.length);
+  });
-    const appsDir = getAppsDir();
-    const commits = getGitLog(appsDir);
-    expect(commits.some(c => c.includes('Edit index.html in app'))).toBe(true);
+  test('commitAppTurnChanges swallows errors gracefully', async () => {
+    _resetAppGitState();
+    // This should not throw
+    await commitAppTurnChanges('test', 1);
   });
-  test('editAppFile does not commit on failure', async () => {
+  test('deleteApp changes are captured by turn commit', async () => {
     const app = createApp({
-      name: 'No Edit App',
+      name: 'Doomed App',
       schemaJson: '{}',
-      htmlDefinition: '<p>content</p>',
+      htmlDefinition: '<p>bye</p>',
     });
-    await new Promise(resolve => setTimeout(resolve, 500));
-    const commitsBefore = getGitLog(getAppsDir());
-    const result = editAppFile(app.id, 'index.html', 'nonexistent string', 'replacement');
-    expect(result.ok).toBe(false);
-    await new Promise(resolve => setTimeout(resolve, 500));
-    const commitsAfter = getGitLog(getAppsDir());
-    // No new commits should have been created for the failed edit
-    expect(commitsAfter.length).toBe(commitsBefore.length);
-  });
+    await commitAppTurnChanges('session-1', 1);
-  test('commitAppChange swallows errors gracefully', async () => {
-    _resetAppGitState();
+    deleteApp(app.id);
+    await commitAppTurnChanges('session-1', 2);
-    // This should not throw
-    await commitAppChange('test');
+    const appsDir = getAppsDir();
+    const commits = getGitLog(appsDir);
+    expect(commits[0]).toContain('Turn 2: app changes');
   });
 });