@vellumai/assistant 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +82 -21
  2. package/package.json +1 -1
  3. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +16 -0
  4. package/src/__tests__/app-git-history.test.ts +22 -27
  5. package/src/__tests__/app-git-service.test.ts +44 -78
  6. package/src/__tests__/call-orchestrator.test.ts +321 -0
  7. package/src/__tests__/channel-approval-routes.test.ts +1267 -93
  8. package/src/__tests__/channel-approval.test.ts +2 -0
  9. package/src/__tests__/channel-approvals.test.ts +51 -2
  10. package/src/__tests__/channel-delivery-store.test.ts +130 -1
  11. package/src/__tests__/channel-guardian.test.ts +371 -1
  12. package/src/__tests__/config-schema.test.ts +1 -1
  13. package/src/__tests__/credential-security-invariants.test.ts +1 -0
  14. package/src/__tests__/daemon-lifecycle.test.ts +635 -0
  15. package/src/__tests__/daemon-server-session-init.test.ts +5 -0
  16. package/src/__tests__/gateway-only-enforcement.test.ts +106 -21
  17. package/src/__tests__/handlers-telegram-config.test.ts +82 -0
  18. package/src/__tests__/handlers-twilio-config.test.ts +738 -5
  19. package/src/__tests__/ingress-url-consistency.test.ts +64 -0
  20. package/src/__tests__/ipc-snapshot.test.ts +10 -0
  21. package/src/__tests__/run-orchestrator.test.ts +1 -1
  22. package/src/__tests__/secret-scanner.test.ts +223 -0
  23. package/src/__tests__/session-process-bridge.test.ts +2 -0
  24. package/src/__tests__/shell-parser-property.test.ts +357 -2
  25. package/src/__tests__/system-prompt.test.ts +25 -1
  26. package/src/__tests__/tool-executor-lifecycle-events.test.ts +34 -1
  27. package/src/__tests__/tool-permission-simulate-handler.test.ts +2 -2
  28. package/src/__tests__/user-reference.test.ts +68 -0
  29. package/src/calls/call-orchestrator.ts +63 -11
  30. package/src/calls/twilio-config.ts +10 -1
  31. package/src/calls/twilio-rest.ts +70 -0
  32. package/src/cli/map.ts +6 -0
  33. package/src/commands/__tests__/cc-command-registry.test.ts +67 -0
  34. package/src/commands/cc-command-registry.ts +14 -1
  35. package/src/config/bundled-skills/claude-code/TOOLS.json +10 -3
  36. package/src/config/bundled-skills/email-setup/SKILL.md +56 -0
  37. package/src/config/bundled-skills/messaging/SKILL.md +4 -0
  38. package/src/config/bundled-skills/subagent/SKILL.md +4 -0
  39. package/src/config/bundled-skills/subagent/TOOLS.json +4 -0
  40. package/src/config/defaults.ts +1 -1
  41. package/src/config/schema.ts +6 -3
  42. package/src/config/skills.ts +5 -32
  43. package/src/config/system-prompt.ts +16 -0
  44. package/src/config/user-reference.ts +29 -0
  45. package/src/config/vellum-skills/catalog.json +52 -0
  46. package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -1
  47. package/src/config/vellum-skills/twilio-setup/SKILL.md +49 -4
  48. package/src/daemon/auth-manager.ts +103 -0
  49. package/src/daemon/computer-use-session.ts +8 -1
  50. package/src/daemon/config-watcher.ts +253 -0
  51. package/src/daemon/handlers/config.ts +193 -17
  52. package/src/daemon/handlers/sessions.ts +5 -3
  53. package/src/daemon/handlers/skills.ts +60 -17
  54. package/src/daemon/ipc-contract-inventory.json +4 -0
  55. package/src/daemon/ipc-contract.ts +16 -0
  56. package/src/daemon/ipc-handler.ts +87 -0
  57. package/src/daemon/lifecycle.ts +16 -4
  58. package/src/daemon/ride-shotgun-handler.ts +11 -1
  59. package/src/daemon/server.ts +105 -502
  60. package/src/daemon/session-agent-loop.ts +9 -14
  61. package/src/daemon/session-process.ts +20 -3
  62. package/src/daemon/session-runtime-assembly.ts +60 -44
  63. package/src/daemon/session-slash.ts +50 -2
  64. package/src/daemon/session-surfaces.ts +17 -1
  65. package/src/daemon/session.ts +8 -1
  66. package/src/inbound/public-ingress-urls.ts +20 -3
  67. package/src/index.ts +1 -23
  68. package/src/memory/app-git-service.ts +24 -0
  69. package/src/memory/app-store.ts +0 -21
  70. package/src/memory/channel-delivery-store.ts +74 -3
  71. package/src/memory/channel-guardian-store.ts +54 -26
  72. package/src/memory/conversation-key-store.ts +20 -0
  73. package/src/memory/conversation-store.ts +14 -2
  74. package/src/memory/db-connection.ts +28 -0
  75. package/src/memory/db-init.ts +1019 -0
  76. package/src/memory/db.ts +2 -1995
  77. package/src/memory/embedding-backend.ts +79 -11
  78. package/src/memory/indexer.ts +2 -0
  79. package/src/memory/job-utils.ts +64 -4
  80. package/src/memory/jobs-worker.ts +7 -1
  81. package/src/memory/recall-cache.ts +107 -0
  82. package/src/memory/retriever.ts +30 -1
  83. package/src/memory/schema-migration.ts +984 -0
  84. package/src/memory/schema.ts +6 -0
  85. package/src/memory/search/types.ts +2 -0
  86. package/src/permissions/prompter.ts +14 -3
  87. package/src/permissions/trust-store.ts +7 -0
  88. package/src/runtime/channel-approvals.ts +17 -3
  89. package/src/runtime/gateway-client.ts +2 -1
  90. package/src/runtime/http-server.ts +28 -9
  91. package/src/runtime/routes/channel-routes.ts +279 -100
  92. package/src/runtime/routes/run-routes.ts +7 -1
  93. package/src/runtime/run-orchestrator.ts +8 -1
  94. package/src/security/secret-scanner.ts +218 -0
  95. package/src/skills/clawhub.ts +6 -2
  96. package/src/skills/frontmatter.ts +63 -0
  97. package/src/skills/slash-commands.ts +23 -0
  98. package/src/skills/vellum-catalog-remote.ts +107 -0
  99. package/src/subagent/manager.ts +4 -1
  100. package/src/subagent/types.ts +2 -0
  101. package/src/tools/browser/auto-navigate.ts +132 -24
  102. package/src/tools/browser/browser-manager.ts +67 -61
  103. package/src/tools/claude-code/claude-code.ts +55 -3
  104. package/src/tools/executor.ts +10 -2
  105. package/src/tools/skills/vellum-catalog.ts +75 -127
  106. package/src/tools/subagent/spawn.ts +2 -0
  107. package/src/tools/terminal/parser.ts +21 -5
  108. package/src/util/platform.ts +8 -1
  109. package/src/util/retry.ts +4 -4
package/README.md CHANGED
@@ -45,6 +45,7 @@ cp .env.example .env
45
45
  | `OLLAMA_API_KEY` | No | — | API key for authenticated Ollama deployments |
46
46
  | `OLLAMA_BASE_URL` | No | `http://127.0.0.1:11434/v1` | Ollama base URL |
47
47
  | `RUNTIME_HTTP_PORT` | No | — | Enable the HTTP server (required for gateway/web) |
48
+ | `RUNTIME_GATEWAY_ORIGIN_SECRET` | No | — | Dedicated secret for the `X-Gateway-Origin` proof header on `/channels/inbound`. When not set, falls back to the bearer token. Both gateway and runtime must share the same value. |
48
49
  | `VELLUM_DAEMON_SOCKET` | No | `~/.vellum/vellum.sock` | Override the daemon socket path |
49
50
 
50
51
  ## Usage
@@ -122,7 +123,7 @@ assistant/
122
123
 
123
124
  ## Channel Approval Flow
124
125
 
125
- When the assistant needs tool-use confirmation during a channel session (e.g., Telegram), the approval flow intercepts the run and surfaces an interactive prompt to the user. This is gated behind the `CHANNEL_APPROVALS_ENABLED=true` environment variable.
126
+ When the assistant needs tool-use confirmation during a channel session (e.g., Telegram), the approval flow intercepts the run and surfaces an interactive prompt to the user. This approval-aware path is always enabled whenever orchestrator + callback context are available.
126
127
 
127
128
  ### How it works
128
129
 
@@ -131,6 +132,24 @@ When the assistant needs tool-use confirmation during a channel session (e.g., T
131
132
  3. **Decision** — The user's decision is mapped to the permission system (`allow` or `deny`) and applied to the pending run. For `approve_always`, a trust rule is persisted so future invocations of the same tool are auto-approved.
132
133
  4. **Reminder** — If the user sends a non-decision message while an approval is pending, a reminder prompt is re-sent with the approval buttons.
133
134
 
135
+ ### Delivery Semantics
136
+
137
+ **Single final output guarantee (deliver-once guard):** Both the main poll (`processChannelMessageWithApprovals`) and the post-decision poll (`schedulePostDecisionDelivery`) race to deliver the final assistant reply when a run reaches terminal state. The `claimRunDelivery()` function in `channel-delivery-store.ts` ensures at-most-one delivery per run using an in-memory `Set<string>`. The first caller to claim the run ID proceeds with delivery; the other silently skips. This guard is sufficient because both racing pollers execute within the same process.
138
+
139
+ **Stale callback blocking:** When inbound callback data (e.g., a Telegram button press) does not match any pending approval, the runtime returns `stale_ignored` and does not process the payload as a regular message. This prevents stale button presses from old approval prompts from triggering unrelated agent loops.
140
+
141
+ ### Prompt Delivery Failure Policy (Fail-Closed)
142
+
143
+ All approval prompt delivery paths use a **fail-closed** policy -- if the prompt cannot be delivered, the run is auto-denied rather than left in a silent wait state:
144
+
145
+ - **Standard (self-approval) prompt:** If `deliverApprovalPrompt()` fails, the run is immediately auto-denied via `handleChannelDecision(reject)`. No silent `needs_confirmation` hang.
146
+ - **Guardian-routed prompt:** If the approval prompt cannot be delivered to the guardian's chat, the guardian approval record is marked `denied`, the underlying run is rejected, and the requester is notified that the action was denied because the prompt could not reach the guardian.
147
+ - **Unverified channel (no guardian binding):** Sensitive actions are auto-denied immediately without attempting prompt delivery. The requester is notified that no guardian has been configured.
148
+
149
+ ### Plain-Text Fallback for Non-Rich Channels
150
+
151
+ Channels that do not support rich inline approval UI (e.g., inline keyboards) receive plain-text instructions embedded in the message body. The `channelSupportsRichApprovalUI()` check determines whether to send the structured `promptText` (for rich channels like Telegram) or the `plainTextFallback` string (for all other channels, e.g., SMS). The fallback text includes instructions like "Reply yes/no/always" so the user can respond via text.
152
+
134
153
  ### Key modules
135
154
 
136
155
  | File | Purpose |
@@ -144,32 +163,33 @@ When the assistant needs tool-use confirmation during a channel session (e.g., T
144
163
 
145
164
  ### Enabling
146
165
 
147
- Set the environment variable before starting the daemon:
148
-
149
- ```bash
150
- CHANNEL_APPROVALS_ENABLED=true
151
- ```
152
-
153
- When disabled (the default), channel messages follow the standard fire-and-forget processing path without approval interception.
166
+ Channel approvals are always enabled for channel traffic when orchestrator + callback context are available.
154
167
 
155
168
  ### Guardian-Specific Behavior
156
169
 
157
- When `CHANNEL_APPROVALS_ENABLED=true`, the channel guardian system adds a trust layer:
170
+ Guardian actor-role *classification* (determining whether a sender is guardian, non-guardian, or unverified) runs unconditionally. Guardian *enforcement* for non-guardian/unverified actors (`forceStrictSideEffects`, fail-closed denial for unverified channels, and approval prompt routing to guardians) is always active when orchestrator + callback context are available.
158
171
 
159
172
  | Flag / Behavior | Description |
160
173
  |-----------------|-------------|
161
- | `CHANNEL_APPROVALS_ENABLED=true` | Enables the approval flow and guardian role resolution on channel inbound messages |
162
- | `forceStrictSideEffects` | Automatically set on runs triggered by non-guardian or unverified-channel senders so all side-effect tools require approval |
163
- | **Fail-closed no-binding** | When no guardian binding exists for a channel, the sender is classified as `unverified_channel`. Any sensitive action is auto-denied with a notice that no guardian has been configured. This prevents unverified senders from self-approving actions. |
174
+ | `forceStrictSideEffects` | Automatically set on runs triggered by non-guardian or unverified-channel senders so all side-effect tools require approval. |
175
+ | **Fail-closed no-binding** | When no guardian binding exists for a channel, the sender is classified as `unverified_channel`. Any sensitive action is auto-denied with a notice that no guardian has been configured. |
176
+ | **Fail-closed no-identity** | When `senderExternalUserId` is absent, the actor is classified as `unverified_channel` (even if no guardian binding exists yet). |
164
177
  | **Guardian-only approval** | Non-guardian senders cannot approve their own pending actions. Only the verified guardian can approve or deny. |
165
- | **Expired approval auto-deny** | If a guardian approval request expires (30-minute TTL) without a decision, the action is auto-denied when the non-guardian sender next interacts. |
178
+ | **Expired approval auto-deny** | A proactive sweep runs every 60 seconds to find expired guardian approval requests (30-minute TTL). Expired approvals are auto-denied, and both the requester and guardian are notified. If a non-guardian interacts before the sweep runs, the expiry is also detected reactively. |
179
+
180
+ ### Ingress Boundary Guarantees (Gateway-Only Mode)
181
+
182
+ The runtime operates in **gateway-only mode**: all public-facing webhook paths are blocked at the runtime level. Direct access to Twilio webhook routes (`/webhooks/twilio/voice`, `/webhooks/twilio/status`, `/webhooks/twilio/connect-action`, `/webhooks/twilio/sms`) and their legacy equivalents (`/v1/calls/twilio/*`) returns `410 GATEWAY_ONLY`. This ensures external webhook traffic (including SMS) can only reach the runtime through the gateway, which performs signature validation before forwarding.
183
+
184
+ Internal forwarding routes (`/v1/internal/twilio/*`) are unaffected — these accept pre-validated payloads from the gateway over the private network.
166
185
 
167
186
  ### Gateway-Origin Ingress Contract
168
187
 
169
- The `/channels/inbound` endpoint requires a valid `X-Gateway-Origin` header that matches the configured bearer token. This ensures channel messages can only be submitted via the gateway (which performs webhook-level verification) and not via direct HTTP calls that bypass signature checks.
188
+ The `/channels/inbound` endpoint requires a valid `X-Gateway-Origin` header to prove the request originated from the gateway. This ensures channel messages can only arrive via the gateway (which performs webhook-level verification) and not via direct HTTP calls that bypass signature checks.
170
189
 
171
- - **With bearer token configured:** Requests must include `X-Gateway-Origin` with the shared secret. Missing or invalid values return `403 GATEWAY_ORIGIN_REQUIRED`.
172
- - **Without bearer token:** Gateway-origin validation is skipped (local dev without auth).
190
+ - **Dedicated secret (`RUNTIME_GATEWAY_ORIGIN_SECRET`):** When set, this is the expected value for the `X-Gateway-Origin` header. Both the gateway and the runtime must share this secret.
191
+ - **Bearer token fallback:** When `RUNTIME_GATEWAY_ORIGIN_SECRET` is not set, the runtime falls back to validating against the bearer token for backward compatibility.
192
+ - **Without any secret:** When neither a dedicated secret nor a bearer token is configured (local dev), gateway-origin validation is skipped entirely.
173
193
  - **Auth layer order:** Bearer token authentication (`Authorization` header) is checked first. Gateway-origin validation runs inside the handler.
174
194
 
175
195
  ## Twilio Setup Primitive
@@ -184,16 +204,48 @@ The daemon handles `twilio_config` messages with the following actions:
184
204
  |--------|-------------|
185
205
  | `get` | Returns current state: `hasCredentials` (boolean) and `phoneNumber` (if assigned) |
186
206
  | `set_credentials` | Validates and stores Account SID and Auth Token in secure storage (Keychain / encrypted file). Credentials are retrieved from the credential store internally. |
187
- | `clear_credentials` | Removes stored Account SID, Auth Token, and phone number from secure storage. |
188
- | `provision_number` | Purchases a new phone number via the Twilio API. Accepts optional `areaCode` and `country` (ISO 3166-1 alpha-2, default `US`). Returns the purchased number but does not assign it call `assign_number` separately to persist it. |
189
- | `assign_number` | Assigns an existing Twilio phone number (E.164 format) to the assistant |
207
+ | `clear_credentials` | Removes stored Account SID and Auth Token from secure storage. Preserves the phone number in both config (`sms.phoneNumber`) and secure key (`credential:twilio:phone_number`) so that re-entering credentials resumes working without needing to reassign the number. |
208
+ | `provision_number` | Purchases a new phone number via the Twilio API. Accepts optional `areaCode` and `country` (ISO 3166-1 alpha-2, default `US`). Auto-assigns the number to the assistant (persists to config and secure storage) and configures Twilio webhooks (voice, status callback, SMS) when a public ingress URL is available. |
209
+ | `assign_number` | Assigns an existing Twilio phone number (E.164 format) to the assistant and auto-configures webhooks when ingress is available |
190
210
  | `list_numbers` | Lists all incoming phone numbers on the Twilio account with their capabilities (voice, SMS) |
191
211
 
192
- Response type: `twilio_config_response` with `success`, `hasCredentials`, optional `phoneNumber`, optional `numbers` array, and optional `error`.
212
+ Response type: `twilio_config_response` with `success`, `hasCredentials`, optional `phoneNumber`, optional `numbers` array, optional `error`, and optional `warning` (for non-fatal webhook sync failures).
213
+
214
+ ### Ingress Webhook Reconciliation
215
+
216
+ When the public ingress URL is changed via the Settings UI (`ingress_config` set action), the daemon automatically reconciles Twilio webhooks in addition to triggering a Telegram webhook reconcile on the gateway. If all of the following conditions are met, the daemon pushes updated webhook URLs (voice, status callback, SMS) to Twilio:
217
+
218
+ 1. Ingress is being **enabled** (not disabled)
219
+ 2. Twilio **credentials** are configured (Account SID + Auth Token in secure storage)
220
+ 3. A phone number is **assigned** (persisted in `sms.phoneNumber` config)
221
+
222
+ This reconciliation is **best-effort and fire-and-forget** -- failures are logged but do not block the ingress config save or produce an error response. This ensures that changing a tunnel URL (e.g., restarting ngrok) automatically updates Twilio's webhook routing without requiring manual re-assignment of the phone number.
193
223
 
194
224
  ### Single-Number-Per-Assistant Model
195
225
 
196
- Each assistant is assigned a single Twilio phone number that is shared between voice calls and SMS. The number is stored in the assistant's config at `sms.phoneNumber` and used as the `From` for outbound SMS via the gateway's `/deliver/sms` endpoint. The same credentials (Account SID, Auth Token) are used for both voice and SMS operations.
226
+ Each assistant is assigned a single Twilio phone number that is shared between voice calls and SMS. The number is stored in the assistant's config at `sms.phoneNumber` (legacy global field) and used as the `From` for outbound SMS via the gateway's `/deliver/sms` endpoint. The same credentials (Account SID, Auth Token) are used for both voice and SMS operations.
227
+
228
+ #### Assistant-Scoped Phone Numbers
229
+
230
+ When `assistantId` is provided in the `twilio_config` request, the `provision_number` and `assign_number` actions persist the phone number into a per-assistant mapping at `sms.assistantPhoneNumbers` (a `Record<string, string>` keyed by assistant ID). The legacy `sms.phoneNumber` field is always updated for backward compatibility.
231
+
232
+ The `get` action, when called with `assistantId`, resolves the phone number by checking `sms.assistantPhoneNumbers[assistantId]` first, falling back to `sms.phoneNumber`. This allows multiple assistants to have distinct phone numbers while preserving existing behavior for single-assistant setups.
233
+
234
+ The per-assistant mapping is propagated to the gateway via the config file watcher, enabling phone-number-based routing at the gateway boundary (see Gateway README).
235
+
236
+ ### Phone Number Resolution Order
237
+
238
+ At runtime, `getTwilioConfig()` resolves the phone number using this priority chain:
239
+
240
+ 1. **`TWILIO_PHONE_NUMBER` env var** — highest priority, explicit override for dev/CI.
241
+ 2. **`sms.phoneNumber` in config** — the primary source of truth, written by `provision_number` and `assign_number`.
242
+ 3. **`credential:twilio:phone_number` secure key** — backward-compatible fallback for setups that predate the config-first model.
243
+
244
+ If no number is found after all three sources, an error is thrown.
245
+
246
+ ### Assistant-Scoped Guardian State
247
+
248
+ Guardian bindings, verification challenges, and approval requests are all scoped to an `(assistantId, channel)` pair. The `assistantId` parameter flows through `handleChannelInbound`, `validateAndConsumeChallenge`, `isGuardian`, `getGuardianBinding`, and `createApprovalRequest`. This means each assistant has its own independent guardian binding per channel -- verifying as guardian on one assistant does not grant guardian status on another.
197
249
 
198
250
  ### Channel-Aware Guardian Challenges
199
251
 
@@ -228,6 +280,15 @@ The image runs as non-root user `assistant` (uid 1001) and exposes port `3001`.
228
280
 
229
281
  ## Troubleshooting
230
282
 
283
+ ### Guardian and gateway-origin issues
284
+
285
+ | Symptom | Cause | Resolution |
286
+ |---------|-------|------------|
287
+ | 403 `GATEWAY_ORIGIN_REQUIRED` on `/channels/inbound` | Missing or invalid `X-Gateway-Origin` header | Ensure `RUNTIME_GATEWAY_ORIGIN_SECRET` is set to the same value on both gateway and runtime. If not using a dedicated secret, ensure the bearer token (`RUNTIME_BEARER_TOKEN` or `~/.vellum/http-token`) is shared. |
288
+ | Non-guardian actions silently denied | No guardian binding for the channel. The system is fail-closed for unverified channels. | Run the guardian verification flow from the desktop UI to bind a guardian. |
289
+ | Guardian approval expired | The 30-minute TTL elapsed. The proactive sweep auto-denied the approval and notified both parties. | The requester must re-trigger the action. |
290
+ | `forceStrictSideEffects` unexpectedly active | The sender is classified as `non-guardian` or `unverified_channel` | Verify the sender's `externalUserId` matches the guardian binding, or set up a guardian binding for the channel. |
291
+
231
292
  ### Invalid RRULE set expressions
232
293
 
233
294
  If `schedule_create` rejects an RRULE expression, check the following:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vellumai/assistant",
3
- "version": "0.3.2",
3
+ "version": "0.3.4",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "vellum": "./src/index.ts"
@@ -594,6 +594,13 @@ exports[`IPC message snapshots ClientMessage types telegram_config serializes to
594
594
  }
595
595
  `;
596
596
 
597
+ exports[`IPC message snapshots ClientMessage types twilio_config serializes to expected JSON 1`] = `
598
+ {
599
+ "action": "get",
600
+ "type": "twilio_config",
601
+ }
602
+ `;
603
+
597
604
  exports[`IPC message snapshots ClientMessage types guardian_verification serializes to expected JSON 1`] = `
598
605
  {
599
606
  "action": "create_challenge",
@@ -1920,6 +1927,15 @@ exports[`IPC message snapshots ServerMessage types telegram_config_response seri
1920
1927
  }
1921
1928
  `;
1922
1929
 
1930
+ exports[`IPC message snapshots ServerMessage types twilio_config_response serializes to expected JSON 1`] = `
1931
+ {
1932
+ "hasCredentials": true,
1933
+ "phoneNumber": "+15551234567",
1934
+ "success": true,
1935
+ "type": "twilio_config_response",
1936
+ }
1937
+ `;
1938
+
1923
1939
  exports[`IPC message snapshots ServerMessage types guardian_verification_response serializes to expected JSON 1`] = `
1924
1940
  {
1925
1941
  "instruction": "Send this code to the Telegram bot",
@@ -14,8 +14,8 @@ mock.module('../util/platform.js', () => ({
14
14
  }));
15
15
 
16
16
  // Re-import after mocking so modules use our temp dir
17
- const { createApp, updateApp, deleteApp: _deleteApp, writeAppFile: _writeAppFile, editAppFile: _editAppFile, getAppsDir } = await import('../memory/app-store.js');
18
- const { getAppHistory, getAppDiff, getAppFileAtVersion, restoreAppVersion, commitAppChange: _commitAppChange } = await import('../memory/app-git-service.js');
17
+ const { createApp, updateApp, getAppsDir } = await import('../memory/app-store.js');
18
+ const { getAppHistory, getAppDiff, getAppFileAtVersion, restoreAppVersion, commitAppTurnChanges } = await import('../memory/app-git-service.js');
19
19
 
20
20
  describe('App Git History', () => {
21
21
  beforeEach(() => {
@@ -31,27 +31,20 @@ describe('App Git History', () => {
31
31
  }
32
32
  });
33
33
 
34
- /** Wait for fire-and-forget commits to complete. */
35
- async function waitForCommits(): Promise<void> {
36
- await new Promise(resolve => setTimeout(resolve, 500));
37
- }
38
-
39
34
  test('getAppHistory returns commits for a specific app', async () => {
40
35
  const app = createApp({
41
36
  name: 'History App',
42
37
  schemaJson: '{}',
43
38
  htmlDefinition: '<h1>v1</h1>',
44
39
  });
45
- await waitForCommits();
40
+ await commitAppTurnChanges('session-1', 1);
46
41
 
47
42
  updateApp(app.id, { htmlDefinition: '<h1>v2</h1>' });
48
- await waitForCommits();
43
+ await commitAppTurnChanges('session-1', 2);
49
44
 
50
45
  const history = await getAppHistory(app.id);
51
46
  expect(history.length).toBeGreaterThanOrEqual(2);
52
- expect(history[0].message).toContain('Update app');
53
- // The create commit may be absorbed into the "Initial commit" on a fresh repo
54
- expect(history[history.length - 1].message).toMatch(/Create app|Initial commit/);
47
+ expect(history[0].message).toContain('Turn 2');
55
48
  expect(history[0].commitHash).toMatch(/^[0-9a-f]+$/);
56
49
  expect(history[0].timestamp).toBeGreaterThan(0);
57
50
  });
@@ -62,22 +55,24 @@ describe('App Git History', () => {
62
55
  schemaJson: '{}',
63
56
  htmlDefinition: '<p>one</p>',
64
57
  });
65
- await waitForCommits();
58
+ await commitAppTurnChanges('session-1', 1);
66
59
 
67
60
  const app2 = createApp({
68
61
  name: 'App Two',
69
62
  schemaJson: '{}',
70
63
  htmlDefinition: '<p>two</p>',
71
64
  });
72
- await waitForCommits();
65
+ await commitAppTurnChanges('session-1', 2);
73
66
 
74
67
  const history1 = await getAppHistory(app1.id);
75
68
  const history2 = await getAppHistory(app2.id);
76
69
 
77
- // App1's history should only contain its own commits
78
- expect(history1.every(v => v.message.includes('App One') || v.message.includes('Initial commit'))).toBe(true);
79
- // App2's history should only contain its own commits
80
- expect(history2.every(v => v.message.includes('App Two') || v.message.includes('Initial commit'))).toBe(true);
70
+ // App1 should have history from turn 1 (or initial commit)
71
+ expect(history1.length).toBeGreaterThanOrEqual(1);
72
+ // App2 should have history from turn 2 (or initial commit)
73
+ expect(history2.length).toBeGreaterThanOrEqual(1);
74
+ // App2's commits should not include app1-only turn commits
75
+ // (turn 2 created app2, so app2 history should not have turn 1 unless initial commit)
81
76
  });
82
77
 
83
78
  test('getAppHistory respects limit', async () => {
@@ -86,13 +81,13 @@ describe('App Git History', () => {
86
81
  schemaJson: '{}',
87
82
  htmlDefinition: '<p>v1</p>',
88
83
  });
89
- await waitForCommits();
84
+ await commitAppTurnChanges('session-1', 1);
90
85
 
91
86
  updateApp(app.id, { htmlDefinition: '<p>v2</p>' });
92
- await waitForCommits();
87
+ await commitAppTurnChanges('session-1', 2);
93
88
 
94
89
  updateApp(app.id, { htmlDefinition: '<p>v3</p>' });
95
- await waitForCommits();
90
+ await commitAppTurnChanges('session-1', 3);
96
91
 
97
92
  const limited = await getAppHistory(app.id, 2);
98
93
  expect(limited.length).toBe(2);
@@ -104,13 +99,13 @@ describe('App Git History', () => {
104
99
  schemaJson: '{}',
105
100
  htmlDefinition: '<p>original</p>',
106
101
  });
107
- await waitForCommits();
102
+ await commitAppTurnChanges('session-1', 1);
108
103
 
109
104
  const history1 = await getAppHistory(app.id);
110
105
  const createHash = history1[0].commitHash;
111
106
 
112
107
  updateApp(app.id, { htmlDefinition: '<p>modified</p>' });
113
- await waitForCommits();
108
+ await commitAppTurnChanges('session-1', 2);
114
109
 
115
110
  const history2 = await getAppHistory(app.id);
116
111
  const updateHash = history2[0].commitHash;
@@ -126,13 +121,13 @@ describe('App Git History', () => {
126
121
  schemaJson: '{}',
127
122
  htmlDefinition: '<p>version one</p>',
128
123
  });
129
- await waitForCommits();
124
+ await commitAppTurnChanges('session-1', 1);
130
125
 
131
126
  const history1 = await getAppHistory(app.id);
132
127
  const v1Hash = history1[0].commitHash;
133
128
 
134
129
  updateApp(app.id, { htmlDefinition: '<p>version two</p>' });
135
- await waitForCommits();
130
+ await commitAppTurnChanges('session-1', 2);
136
131
 
137
132
  // Get the file at v1 — should show old content
138
133
  const v1Content = await getAppFileAtVersion(app.id, 'index.html', v1Hash);
@@ -150,13 +145,13 @@ describe('App Git History', () => {
150
145
  schemaJson: '{}',
151
146
  htmlDefinition: '<p>original content</p>',
152
147
  });
153
- await waitForCommits();
148
+ await commitAppTurnChanges('session-1', 1);
154
149
 
155
150
  const history1 = await getAppHistory(app.id);
156
151
  const originalHash = history1[0].commitHash;
157
152
 
158
153
  updateApp(app.id, { htmlDefinition: '<p>new content</p>' });
159
- await waitForCommits();
154
+ await commitAppTurnChanges('session-1', 2);
160
155
 
161
156
  // Verify current content is "new content"
162
157
  let current = readFileSync(join(getAppsDir(), app.id, 'index.html'), 'utf-8');
@@ -4,7 +4,7 @@ import { join } from 'node:path';
4
4
  import { tmpdir } from 'node:os';
5
5
  import { execFileSync } from 'node:child_process';
6
6
  import { _resetGitServiceRegistry } from '../workspace/git-service.js';
7
- import { commitAppChange, _resetAppGitState } from '../memory/app-git-service.js';
7
+ import { commitAppTurnChanges, _resetAppGitState } from '../memory/app-git-service.js';
8
8
 
9
9
  // Mock getDataDir to use a temp directory
10
10
  let testDataDir: string;
@@ -43,127 +43,93 @@ describe('App Git Service', () => {
43
43
  }
44
44
  }
45
45
 
46
- test('initializes git repo in apps directory on first commit', async () => {
47
- const appsDir = getAppsDir();
48
- expect(existsSync(join(appsDir, '.git'))).toBe(false);
49
-
50
- await commitAppChange('test commit');
51
-
52
- expect(existsSync(join(appsDir, '.git'))).toBe(true);
53
- });
54
-
55
46
  test('.gitignore excludes preview files and records', async () => {
56
47
  const appsDir = getAppsDir();
57
- await commitAppChange('test commit');
48
+ await commitAppTurnChanges('test-session', 1);
58
49
 
59
50
  const gitignore = readFileSync(join(appsDir, '.gitignore'), 'utf-8');
60
51
  expect(gitignore).toContain('*.preview');
61
52
  expect(gitignore).toContain('*/records/');
62
53
  });
63
54
 
64
- test('createApp produces a commit', async () => {
55
+ test('mutations do not auto-commit', async () => {
65
56
  createApp({
66
57
  name: 'Test App',
67
58
  schemaJson: '{}',
68
59
  htmlDefinition: '<h1>Hello</h1>',
69
60
  });
70
61
 
71
- // Give the fire-and-forget commit time to complete
62
+ // Wait to make sure no fire-and-forget commit happens
72
63
  await new Promise(resolve => setTimeout(resolve, 500));
73
64
 
74
65
  const appsDir = getAppsDir();
75
- const commits = getGitLog(appsDir);
76
- expect(commits.some(c => c.includes('Create app: Test App'))).toBe(true);
66
+ // No git repo should exist yet since no turn commit was triggered
67
+ expect(existsSync(join(appsDir, '.git'))).toBe(false);
77
68
  });
78
69
 
79
- test('updateApp produces a commit with changed fields', async () => {
70
+ test('commitAppTurnChanges creates a single commit for multiple mutations', async () => {
80
71
  const app = createApp({
81
- name: 'My App',
72
+ name: 'Multi Edit App',
82
73
  schemaJson: '{}',
83
74
  htmlDefinition: '<p>v1</p>',
84
75
  });
85
- await new Promise(resolve => setTimeout(resolve, 500));
86
-
87
- updateApp(app.id, { name: 'My App v2', htmlDefinition: '<p>v2</p>' });
88
- await new Promise(resolve => setTimeout(resolve, 500));
89
-
90
- const appsDir = getAppsDir();
91
- const commits = getGitLog(appsDir);
92
- expect(commits.some(c => c.includes('Update app: My App v2'))).toBe(true);
93
- });
94
76
 
95
- test('deleteApp produces a commit with app name', async () => {
96
- const app = createApp({
97
- name: 'Doomed App',
98
- schemaJson: '{}',
99
- htmlDefinition: '<p>bye</p>',
100
- });
101
- await new Promise(resolve => setTimeout(resolve, 500));
77
+ updateApp(app.id, { htmlDefinition: '<p>v2</p>' });
78
+ writeAppFile(app.id, 'styles.css', 'body { color: red; }');
79
+ editAppFile(app.id, 'index.html', 'v2', 'v3');
102
80
 
103
- deleteApp(app.id);
104
- await new Promise(resolve => setTimeout(resolve, 500));
81
+ // All mutations happened, now commit at turn boundary
82
+ await commitAppTurnChanges('session-1', 1);
105
83
 
106
84
  const appsDir = getAppsDir();
107
85
  const commits = getGitLog(appsDir);
108
- expect(commits.some(c => c.includes('Delete app: Doomed App'))).toBe(true);
86
+
87
+ // On a fresh repo the first turn's files may be absorbed into the
88
+ // "Initial commit" created by WorkspaceGitService.ensureInitialized.
89
+ // Either way there should be at most 2 commits, not one per mutation.
90
+ expect(commits.length).toBeLessThanOrEqual(2);
91
+ // The turn commit message should appear (or files are in the initial commit)
92
+ expect(commits.some(c => c.includes('Turn 1') || c.includes('Initial commit'))).toBe(true);
109
93
  });
110
94
 
111
- test('writeAppFile produces a commit', async () => {
112
- const app = createApp({
113
- name: 'File App',
95
+ test('commitAppTurnChanges does not commit when nothing changed', async () => {
96
+ // Trigger initial commit by creating and committing an app
97
+ createApp({
98
+ name: 'Static App',
114
99
  schemaJson: '{}',
115
100
  htmlDefinition: '<p>hi</p>',
116
101
  });
117
- await new Promise(resolve => setTimeout(resolve, 500));
118
-
119
- writeAppFile(app.id, 'styles.css', 'body { color: red; }');
120
- await new Promise(resolve => setTimeout(resolve, 500));
102
+ await commitAppTurnChanges('session-1', 1);
121
103
 
122
104
  const appsDir = getAppsDir();
123
- const commits = getGitLog(appsDir);
124
- expect(commits.some(c => c.includes('Write styles.css in app'))).toBe(true);
125
- });
105
+ const commitsBefore = getGitLog(appsDir);
126
106
 
127
- test('editAppFile produces a commit on success', async () => {
128
- const app = createApp({
129
- name: 'Edit App',
130
- schemaJson: '{}',
131
- htmlDefinition: '<p>old text</p>',
132
- });
133
- await new Promise(resolve => setTimeout(resolve, 500));
107
+ // No mutations turn commit should be a no-op
108
+ await commitAppTurnChanges('session-1', 2);
134
109
 
135
- const result = editAppFile(app.id, 'index.html', 'old text', 'new text');
136
- expect(result.ok).toBe(true);
137
- await new Promise(resolve => setTimeout(resolve, 500));
110
+ const commitsAfter = getGitLog(appsDir);
111
+ expect(commitsAfter.length).toBe(commitsBefore.length);
112
+ });
138
113
 
139
- const appsDir = getAppsDir();
140
- const commits = getGitLog(appsDir);
141
- expect(commits.some(c => c.includes('Edit index.html in app'))).toBe(true);
114
+ test('commitAppTurnChanges swallows errors gracefully', async () => {
115
+ _resetAppGitState();
116
+ // This should not throw
117
+ await commitAppTurnChanges('test', 1);
142
118
  });
143
119
 
144
- test('editAppFile does not commit on failure', async () => {
120
+ test('deleteApp changes are captured by turn commit', async () => {
145
121
  const app = createApp({
146
- name: 'No Edit App',
122
+ name: 'Doomed App',
147
123
  schemaJson: '{}',
148
- htmlDefinition: '<p>content</p>',
124
+ htmlDefinition: '<p>bye</p>',
149
125
  });
150
- await new Promise(resolve => setTimeout(resolve, 500));
151
-
152
- const commitsBefore = getGitLog(getAppsDir());
153
-
154
- const result = editAppFile(app.id, 'index.html', 'nonexistent string', 'replacement');
155
- expect(result.ok).toBe(false);
156
- await new Promise(resolve => setTimeout(resolve, 500));
157
-
158
- const commitsAfter = getGitLog(getAppsDir());
159
- // No new commits should have been created for the failed edit
160
- expect(commitsAfter.length).toBe(commitsBefore.length);
161
- });
126
+ await commitAppTurnChanges('session-1', 1);
162
127
 
163
- test('commitAppChange swallows errors gracefully', async () => {
164
- _resetAppGitState();
128
+ deleteApp(app.id);
129
+ await commitAppTurnChanges('session-1', 2);
165
130
 
166
- // This should not throw
167
- await commitAppChange('test');
131
+ const appsDir = getAppsDir();
132
+ const commits = getGitLog(appsDir);
133
+ expect(commits[0]).toContain('Turn 2: app changes');
168
134
  });
169
135
  });