@vellumai/assistant 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/Dockerfile +18 -27
  2. package/docs/architecture/memory.md +105 -0
  3. package/node_modules/@vellumai/ces-contracts/src/index.ts +1 -0
  4. package/node_modules/@vellumai/ces-contracts/src/trust-rules.ts +42 -0
  5. package/package.json +1 -1
  6. package/src/__tests__/archive-recall.test.ts +560 -0
  7. package/src/__tests__/conversation-clear-safety.test.ts +259 -0
  8. package/src/__tests__/conversation-switch-memory-reduction.test.ts +474 -0
  9. package/src/__tests__/credential-security-invariants.test.ts +2 -0
  10. package/src/__tests__/db-schedule-syntax-migration.test.ts +3 -0
  11. package/src/__tests__/memory-reducer-job.test.ts +538 -0
  12. package/src/__tests__/memory-reducer-scheduling.test.ts +473 -0
  13. package/src/__tests__/memory-reducer-types.test.ts +12 -4
  14. package/src/__tests__/memory-reducer.test.ts +7 -1
  15. package/src/__tests__/memory-regressions.test.ts +24 -4
  16. package/src/__tests__/memory-simplified-config.test.ts +4 -4
  17. package/src/__tests__/openai-whisper.test.ts +93 -0
  18. package/src/__tests__/simplified-memory-e2e.test.ts +666 -0
  19. package/src/__tests__/simplified-memory-runtime.test.ts +616 -0
  20. package/src/__tests__/slack-messaging-token-resolution.test.ts +319 -0
  21. package/src/__tests__/volume-security-guard.test.ts +155 -0
  22. package/src/cli/commands/conversations.ts +18 -0
  23. package/src/config/bundled-skills/messaging/tools/shared.ts +1 -0
  24. package/src/config/bundled-skills/schedule/TOOLS.json +8 -0
  25. package/src/config/bundled-skills/transcribe/tools/transcribe-media.ts +16 -37
  26. package/src/config/env-registry.ts +9 -0
  27. package/src/config/feature-flag-registry.json +8 -0
  28. package/src/config/loader.ts +0 -1
  29. package/src/config/schemas/memory-simplified.ts +1 -1
  30. package/src/credential-execution/managed-catalog.ts +5 -15
  31. package/src/daemon/config-watcher.ts +4 -1
  32. package/src/daemon/conversation-memory.ts +117 -0
  33. package/src/daemon/conversation-runtime-assembly.ts +1 -0
  34. package/src/daemon/daemon-control.ts +7 -0
  35. package/src/daemon/handlers/conversations.ts +11 -0
  36. package/src/daemon/lifecycle.ts +51 -2
  37. package/src/daemon/providers-setup.ts +2 -1
  38. package/src/hooks/manager.ts +7 -0
  39. package/src/instrument.ts +33 -1
  40. package/src/memory/archive-recall.ts +516 -0
  41. package/src/memory/brief-time.ts +5 -4
  42. package/src/memory/conversation-crud.ts +210 -0
  43. package/src/memory/conversation-key-store.ts +33 -4
  44. package/src/memory/db-init.ts +4 -0
  45. package/src/memory/embedding-local.ts +11 -5
  46. package/src/memory/job-handlers/backfill-simplified-memory.ts +462 -0
  47. package/src/memory/job-handlers/conversation-starters.ts +24 -30
  48. package/src/memory/job-handlers/reduce-conversation-memory.ts +229 -0
  49. package/src/memory/jobs-store.ts +2 -0
  50. package/src/memory/jobs-worker.ts +8 -0
  51. package/src/memory/migrations/036-normalize-phone-identities.ts +49 -14
  52. package/src/memory/migrations/135-backfill-contact-interaction-stats.ts +9 -1
  53. package/src/memory/migrations/141-rename-verification-table.ts +8 -0
  54. package/src/memory/migrations/142-rename-verification-session-id-column.ts +7 -2
  55. package/src/memory/migrations/174-rename-thread-starters-table.ts +8 -0
  56. package/src/memory/migrations/188-schedule-quiet-flag.ts +13 -0
  57. package/src/memory/migrations/index.ts +1 -0
  58. package/src/memory/reducer-scheduler.ts +242 -0
  59. package/src/memory/reducer-types.ts +9 -2
  60. package/src/memory/reducer.ts +25 -11
  61. package/src/memory/schema/infrastructure.ts +1 -0
  62. package/src/messaging/provider.ts +9 -0
  63. package/src/messaging/providers/slack/adapter.ts +29 -2
  64. package/src/oauth/connection-resolver.test.ts +22 -18
  65. package/src/oauth/connection-resolver.ts +92 -7
  66. package/src/oauth/platform-connection.test.ts +78 -69
  67. package/src/oauth/platform-connection.ts +12 -19
  68. package/src/permissions/trust-client.ts +343 -0
  69. package/src/permissions/trust-store-interface.ts +105 -0
  70. package/src/permissions/trust-store.ts +523 -36
  71. package/src/platform/client.test.ts +148 -0
  72. package/src/platform/client.ts +71 -0
  73. package/src/providers/speech-to-text/openai-whisper.test.ts +190 -0
  74. package/src/providers/speech-to-text/openai-whisper.ts +68 -0
  75. package/src/providers/speech-to-text/resolve.ts +9 -0
  76. package/src/providers/speech-to-text/types.ts +17 -0
  77. package/src/runtime/auth/route-policy.ts +10 -1
  78. package/src/runtime/http-server.ts +2 -2
  79. package/src/runtime/routes/conversation-management-routes.ts +88 -2
  80. package/src/runtime/routes/guardian-bootstrap-routes.ts +19 -7
  81. package/src/runtime/routes/inbound-message-handler.ts +27 -3
  82. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +16 -1
  83. package/src/runtime/routes/inbound-stages/transcribe-audio.test.ts +287 -0
  84. package/src/runtime/routes/inbound-stages/transcribe-audio.ts +122 -0
  85. package/src/runtime/routes/log-export-routes.ts +1 -0
  86. package/src/runtime/routes/secret-routes.ts +5 -1
  87. package/src/schedule/schedule-store.ts +7 -0
  88. package/src/schedule/scheduler.ts +6 -2
  89. package/src/security/ces-credential-client.ts +173 -0
  90. package/src/security/secure-keys.ts +65 -22
  91. package/src/signals/bash.ts +3 -0
  92. package/src/signals/cancel.ts +3 -0
  93. package/src/signals/confirm.ts +3 -0
  94. package/src/signals/conversation-undo.ts +3 -0
  95. package/src/signals/event-stream.ts +7 -0
  96. package/src/signals/shotgun.ts +3 -0
  97. package/src/signals/trust-rule.ts +3 -0
  98. package/src/telemetry/usage-telemetry-reporter.test.ts +23 -36
  99. package/src/telemetry/usage-telemetry-reporter.ts +22 -20
  100. package/src/tools/filesystem/edit.ts +6 -1
  101. package/src/tools/filesystem/read.ts +6 -1
  102. package/src/tools/filesystem/write.ts +6 -1
  103. package/src/tools/memory/handlers.ts +129 -1
  104. package/src/tools/schedule/create.ts +3 -0
  105. package/src/tools/schedule/list.ts +5 -1
  106. package/src/tools/schedule/update.ts +6 -0
  107. package/src/util/device-id.ts +70 -7
  108. package/src/util/logger.ts +35 -9
  109. package/src/util/platform.ts +29 -5
  110. package/src/workspace/migrations/migrate-to-workspace-volume.ts +113 -0
  111. package/src/workspace/migrations/registry.ts +2 -0
@@ -47,13 +47,18 @@ import {
47
47
  conversations,
48
48
  conversationStarters,
49
49
  llmRequestLogs,
50
+ memoryChunks,
50
51
  memoryEmbeddings,
52
+ memoryEpisodes,
51
53
  memoryItems,
52
54
  memoryItemSources,
55
+ memoryObservations,
53
56
  memorySegments,
54
57
  memorySummaries,
55
58
  messageAttachments,
56
59
  messages,
60
+ openLoops,
61
+ timeContexts,
57
62
  toolInvocations,
58
63
  } from "./schema.js";
59
64
  import { cancelPendingJobsForConversation } from "./task-memory-cleanup.js";
@@ -550,6 +555,9 @@ export function deleteConversation(id: string): DeletedMemoryIds {
550
555
  segmentIds: [],
551
556
  orphanedItemIds: [],
552
557
  deletedSummaryIds: [],
558
+ deletedObservationIds: [],
559
+ deletedChunkIds: [],
560
+ deletedEpisodeIds: [],
553
561
  };
554
562
 
555
563
  // Capture createdAt before the transaction deletes the row — needed to
@@ -703,6 +711,75 @@ export function deleteConversation(id: string): DeletedMemoryIds {
703
711
  tx.delete(conversationStarters)
704
712
  .where(eq(conversationStarters.scopeId, memoryScopeId))
705
713
  .run();
714
+
715
+ // Sweep brief-state tables scoped to this private conversation.
716
+ tx.delete(timeContexts)
717
+ .where(eq(timeContexts.scopeId, memoryScopeId))
718
+ .run();
719
+ tx.delete(openLoops).where(eq(openLoops.scopeId, memoryScopeId)).run();
720
+ }
721
+
722
+ // Collect archive table IDs before the cascade delete removes them.
723
+ // Observations and episodes reference conversations with ON DELETE CASCADE,
724
+ // and chunks cascade from observations.
725
+ const observationRows = tx
726
+ .select({ id: memoryObservations.id })
727
+ .from(memoryObservations)
728
+ .where(eq(memoryObservations.conversationId, id))
729
+ .all();
730
+ const observationIds = observationRows.map((r) => r.id);
731
+
732
+ if (observationIds.length > 0) {
733
+ // Collect chunk IDs before observations cascade-delete them.
734
+ const chunkRows = tx
735
+ .select({ id: memoryChunks.id })
736
+ .from(memoryChunks)
737
+ .where(inArray(memoryChunks.observationId, observationIds))
738
+ .all();
739
+ const chunkIds = chunkRows.map((r) => r.id);
740
+
741
+ // Clean up embeddings for chunks.
742
+ if (chunkIds.length > 0) {
743
+ tx.delete(memoryEmbeddings)
744
+ .where(
745
+ and(
746
+ eq(memoryEmbeddings.targetType, "chunk"),
747
+ inArray(memoryEmbeddings.targetId, chunkIds),
748
+ ),
749
+ )
750
+ .run();
751
+ result.deletedChunkIds.push(...chunkIds);
752
+ }
753
+
754
+ // Clean up embeddings for observations.
755
+ tx.delete(memoryEmbeddings)
756
+ .where(
757
+ and(
758
+ eq(memoryEmbeddings.targetType, "observation"),
759
+ inArray(memoryEmbeddings.targetId, observationIds),
760
+ ),
761
+ )
762
+ .run();
763
+ result.deletedObservationIds.push(...observationIds);
764
+ }
765
+
766
+ const episodeRows = tx
767
+ .select({ id: memoryEpisodes.id })
768
+ .from(memoryEpisodes)
769
+ .where(eq(memoryEpisodes.conversationId, id))
770
+ .all();
771
+ const episodeIds = episodeRows.map((r) => r.id);
772
+
773
+ if (episodeIds.length > 0) {
774
+ tx.delete(memoryEmbeddings)
775
+ .where(
776
+ and(
777
+ eq(memoryEmbeddings.targetType, "episode"),
778
+ inArray(memoryEmbeddings.targetId, episodeIds),
779
+ ),
780
+ )
781
+ .run();
782
+ result.deletedEpisodeIds.push(...episodeIds);
706
783
  }
707
784
 
708
785
  tx.delete(conversations).where(eq(conversations.id, id)).run();
@@ -928,6 +1005,9 @@ export function purgePrivateConversations(): {
928
1005
  segmentIds: [],
929
1006
  orphanedItemIds: [],
930
1007
  deletedSummaryIds: [],
1008
+ deletedObservationIds: [],
1009
+ deletedChunkIds: [],
1010
+ deletedEpisodeIds: [],
931
1011
  },
932
1012
  };
933
1013
  }
@@ -935,12 +1015,18 @@ export function purgePrivateConversations(): {
935
1015
  const allSegmentIds: string[] = [];
936
1016
  const allOrphanedItemIds: string[] = [];
937
1017
  const allDeletedSummaryIds: string[] = [];
1018
+ const allDeletedObservationIds: string[] = [];
1019
+ const allDeletedChunkIds: string[] = [];
1020
+ const allDeletedEpisodeIds: string[] = [];
938
1021
 
939
1022
  for (const conv of privateConvs) {
940
1023
  const deleted = deleteConversation(conv.id);
941
1024
  allSegmentIds.push(...deleted.segmentIds);
942
1025
  allOrphanedItemIds.push(...deleted.orphanedItemIds);
943
1026
  allDeletedSummaryIds.push(...deleted.deletedSummaryIds);
1027
+ allDeletedObservationIds.push(...deleted.deletedObservationIds);
1028
+ allDeletedChunkIds.push(...deleted.deletedChunkIds);
1029
+ allDeletedEpisodeIds.push(...deleted.deletedEpisodeIds);
944
1030
  }
945
1031
 
946
1032
  return {
@@ -949,6 +1035,9 @@ export function purgePrivateConversations(): {
949
1035
  segmentIds: allSegmentIds,
950
1036
  orphanedItemIds: allOrphanedItemIds,
951
1037
  deletedSummaryIds: allDeletedSummaryIds,
1038
+ deletedObservationIds: allDeletedObservationIds,
1039
+ deletedChunkIds: allDeletedChunkIds,
1040
+ deletedEpisodeIds: allDeletedEpisodeIds,
952
1041
  },
953
1042
  };
954
1043
  }
@@ -1226,6 +1315,15 @@ export function clearAll(): { conversations: number; messages: number } {
1226
1315
  rawExec("DELETE FROM messages");
1227
1316
  rawExec("DELETE FROM conversations");
1228
1317
 
1318
+ // Record audit event — lifecycle_events is NOT deleted by clearAll(),
1319
+ // so this survives the wipe and provides a permanent trail.
1320
+ rawRun(
1321
+ `INSERT INTO lifecycle_events (id, event_name, created_at) VALUES (?, ?, ?)`,
1322
+ uuid(),
1323
+ "conversations_clear_all",
1324
+ Date.now(),
1325
+ );
1326
+
1229
1327
  // Rebuild corrupted FTS tables and restore triggers after all base-table
1230
1328
  // DELETEs have completed. Dropping the virtual table clears the corruption,
1231
1329
  // and recreating it + triggers means subsequent writes maintain FTS
@@ -1333,6 +1431,9 @@ export interface DeletedMemoryIds {
1333
1431
  segmentIds: string[];
1334
1432
  orphanedItemIds: string[];
1335
1433
  deletedSummaryIds: string[];
1434
+ deletedObservationIds: string[];
1435
+ deletedChunkIds: string[];
1436
+ deletedEpisodeIds: string[];
1336
1437
  }
1337
1438
 
1338
1439
  export interface WipeConversationResult extends DeletedMemoryIds {
@@ -1406,6 +1507,9 @@ export function deleteMessageById(messageId: string): DeletedMemoryIds {
1406
1507
  segmentIds: [],
1407
1508
  orphanedItemIds: [],
1408
1509
  deletedSummaryIds: [],
1510
+ deletedObservationIds: [],
1511
+ deletedChunkIds: [],
1512
+ deletedEpisodeIds: [],
1409
1513
  };
1410
1514
 
1411
1515
  // Collect attachment IDs linked to this message before cascade-delete
@@ -1499,6 +1603,12 @@ export function deleteMessageById(messageId: string): DeletedMemoryIds {
1499
1603
  * message. Sets `memoryDirtyTailSinceMessageId` only when it is currently
1500
1604
  * null so the earliest unreduced boundary is preserved across multiple
1501
1605
  * messages — later messages must not clobber the original dirty marker.
1606
+ *
1607
+ * Also upserts a pending `reduce_conversation_memory` job scheduled at
1608
+ * `now + idleDelayMs`. If a pending job for this conversation already exists,
1609
+ * its `runAfter` is pushed forward (rescheduled) so the reducer waits for
1610
+ * the full idle window after the *latest* message — avoiding premature runs
1611
+ * while the user is still actively typing.
1502
1612
  */
1503
1613
  export function markConversationMemoryDirty(
1504
1614
  conversationId: string,
@@ -1514,6 +1624,106 @@ export function markConversationMemoryDirty(
1514
1624
  ),
1515
1625
  )
1516
1626
  .run();
1627
+
1628
+ // Schedule (or reschedule) a deferred reducer job for this conversation.
1629
+ scheduleReducerJob(conversationId);
1630
+ }
1631
+
1632
+ /**
1633
+ * Upsert a pending `reduce_conversation_memory` job for the given
1634
+ * conversation, scheduled `idleDelayMs` from now. If one already exists in
1635
+ * pending state, its `runAfter` is pushed forward to restart the idle timer.
1636
+ * This ensures exactly one pending reducer job per conversation — new
1637
+ * messages reschedule rather than duplicate.
1638
+ */
1639
+ export function scheduleReducerJob(
1640
+ conversationId: string,
1641
+ runAfter?: number,
1642
+ ): void {
1643
+ const idleDelayMs = getReducerIdleDelayMs();
1644
+ const scheduledAt = runAfter ?? Date.now() + idleDelayMs;
1645
+
1646
+ const existing = rawGet<{ id: string; status: string }>(
1647
+ `SELECT id, status FROM memory_jobs
1648
+ WHERE type = 'reduce_conversation_memory'
1649
+ AND json_extract(payload, '$.conversationId') = ?
1650
+ AND status = 'pending'
1651
+ LIMIT 1`,
1652
+ conversationId,
1653
+ );
1654
+
1655
+ if (existing) {
1656
+ // Reschedule: push runAfter forward so the idle timer resets.
1657
+ rawRun(
1658
+ `UPDATE memory_jobs SET run_after = ?, updated_at = ? WHERE id = ?`,
1659
+ scheduledAt,
1660
+ Date.now(),
1661
+ existing.id,
1662
+ );
1663
+ } else {
1664
+ enqueueMemoryJob(
1665
+ "reduce_conversation_memory",
1666
+ { conversationId },
1667
+ scheduledAt,
1668
+ );
1669
+ }
1670
+ }
1671
+
1672
+ /**
1673
+ * Startup sweep: find conversations that are marked dirty and whose tail
1674
+ * message is already older than the idle delay. For these conversations the
1675
+ * reducer should have run but didn't (daemon was down). Enqueue immediate
1676
+ * reducer jobs for each so they are processed on the next worker tick.
1677
+ *
1678
+ * Conversations whose tail is still within the idle window are skipped —
1679
+ * the normal `markConversationMemoryDirty` path will schedule them when
1680
+ * new messages arrive (or on the next conversation interaction).
1681
+ *
1682
+ * Returns the number of jobs enqueued.
1683
+ */
1684
+ export function sweepStaleReducerJobs(): number {
1685
+ const idleDelayMs = getReducerIdleDelayMs();
1686
+ const cutoff = Date.now() - idleDelayMs;
1687
+
1688
+ // Find dirty conversations whose latest message is older than the idle
1689
+ // window AND that don't already have a pending reducer job.
1690
+ const stale = rawAll<{ conversationId: string }>(
1691
+ `SELECT c.id AS conversationId
1692
+ FROM conversations c
1693
+ WHERE c.memory_dirty_tail_since_message_id IS NOT NULL
1694
+ AND NOT EXISTS (
1695
+ SELECT 1 FROM memory_jobs mj
1696
+ WHERE mj.type = 'reduce_conversation_memory'
1697
+ AND json_extract(mj.payload, '$.conversationId') = c.id
1698
+ AND mj.status IN ('pending', 'running')
1699
+ )
1700
+ AND (
1701
+ SELECT MAX(m.created_at) FROM messages m
1702
+ WHERE m.conversation_id = c.id
1703
+ ) <= ?`,
1704
+ cutoff,
1705
+ );
1706
+
1707
+ for (const { conversationId } of stale) {
1708
+ enqueueMemoryJob("reduce_conversation_memory", { conversationId });
1709
+ }
1710
+
1711
+ return stale.length;
1712
+ }
1713
+
1714
+ function getReducerIdleDelayMs(): number {
1715
+ // Some test suites mock getConfig() with partial objects; fall back to the
1716
+ // schema default so reducer scheduling stays stable outside full config load.
1717
+ const config = getConfig() as {
1718
+ memory?: {
1719
+ simplified?: {
1720
+ reducer?: {
1721
+ idleDelayMs?: number;
1722
+ };
1723
+ };
1724
+ };
1725
+ };
1726
+ return config.memory?.simplified?.reducer?.idleDelayMs ?? 30_000;
1517
1727
  }
1518
1728
 
1519
1729
  export function setConversationOriginChannelIfUnset(
@@ -134,6 +134,7 @@ export function getOrCreateConversation(
134
134
  opts?: { conversationType?: "standard" | "private" },
135
135
  ): {
136
136
  conversationId: string;
137
+ conversationType: string;
137
138
  created: boolean;
138
139
  } {
139
140
  const db = getDb();
@@ -147,7 +148,16 @@ export function getOrCreateConversation(
147
148
  .get();
148
149
 
149
150
  if (existing) {
150
- return { conversationId: existing.conversationId, created: false as const };
151
+ const conv = tx
152
+ .select({ conversationType: conversations.conversationType })
153
+ .from(conversations)
154
+ .where(eq(conversations.id, existing.conversationId))
155
+ .get();
156
+ return {
157
+ conversationId: existing.conversationId,
158
+ conversationType: conv?.conversationType ?? "standard",
159
+ created: false as const,
160
+ };
151
161
  }
152
162
 
153
163
  // Check if the conversationKey itself is an existing conversation ID.
@@ -168,7 +178,16 @@ export function getOrCreateConversation(
168
178
  createdAt: Date.now(),
169
179
  })
170
180
  .run();
171
- return { conversationId: existingConversation.id, created: false as const };
181
+ const conv = tx
182
+ .select({ conversationType: conversations.conversationType })
183
+ .from(conversations)
184
+ .where(eq(conversations.id, existingConversation.id))
185
+ .get();
186
+ return {
187
+ conversationId: existingConversation.id,
188
+ conversationType: conv?.conversationType ?? "standard",
189
+ created: false as const,
190
+ };
172
191
  }
173
192
 
174
193
  const now = Date.now();
@@ -205,8 +224,14 @@ export function getOrCreateConversation(
205
224
 
206
225
  return {
207
226
  conversationId,
227
+ conversationType,
208
228
  created: true as const,
209
- conversation: { id: conversationId, title, createdAt: now, conversationType },
229
+ conversation: {
230
+ id: conversationId,
231
+ title,
232
+ createdAt: now,
233
+ conversationType,
234
+ },
210
235
  };
211
236
  });
212
237
 
@@ -214,5 +239,9 @@ export function getOrCreateConversation(
214
239
  initConversationDir({ ...result.conversation, originChannel: null });
215
240
  }
216
241
 
217
- return { conversationId: result.conversationId, created: result.created };
242
+ return {
243
+ conversationId: result.conversationId,
244
+ conversationType: result.conversationType,
245
+ created: result.created,
246
+ };
218
247
  }
@@ -111,6 +111,7 @@ import {
111
111
  migrateRenameVerificationTable,
112
112
  migrateRenameVoiceToPhone,
113
113
  migrateScheduleOneShotRouting,
114
+ migrateScheduleQuietFlag,
114
115
  migrateSchemaIndexesAndColumns,
115
116
  migrateUsageDashboardIndexes,
116
117
  migrateVoiceInviteColumns,
@@ -496,6 +497,9 @@ export function initializeDb(): void {
496
497
  // 87. Add memory reducer checkpoint columns to conversations
497
498
  migrateMemoryReducerCheckpoints(database);
498
499
 
500
+ // 88. Add quiet flag to schedule jobs
501
+ migrateScheduleQuietFlag(database);
502
+
499
503
  validateMigrationState(database);
500
504
 
501
505
  if (process.env.BUN_TEST === "1") {
@@ -1,6 +1,7 @@
1
1
  import { existsSync, unlinkSync, writeFileSync } from "node:fs";
2
2
  import { join } from "node:path";
3
3
 
4
+ import { getIsContainerized } from "../config/env-registry.js";
4
5
  import { getLogger } from "../util/logger.js";
5
6
  import { getEmbeddingModelsDir, getRootDir } from "../util/platform.js";
6
7
  import { PromiseGuard } from "../util/promise-guard.js";
@@ -353,12 +354,17 @@ export class LocalEmbeddingBackend implements EmbeddingBackend {
353
354
 
354
355
  private static readonly PID_FILENAME = "embed-worker.pid";
355
356
 
357
+ /** PID files are process-local state — store in /tmp when containerized to keep shared volumes clean. */
358
+ private getPidFilePath(): string {
359
+ if (getIsContainerized()) {
360
+ return join("/tmp", LocalEmbeddingBackend.PID_FILENAME);
361
+ }
362
+ return join(getRootDir(), LocalEmbeddingBackend.PID_FILENAME);
363
+ }
364
+
356
365
  private writePidFile(pid: number): void {
357
366
  try {
358
- writeFileSync(
359
- join(getRootDir(), LocalEmbeddingBackend.PID_FILENAME),
360
- String(pid),
361
- );
367
+ writeFileSync(this.getPidFilePath(), String(pid));
362
368
  } catch {
363
369
  // Best-effort — doesn't affect functionality
364
370
  }
@@ -366,7 +372,7 @@ export class LocalEmbeddingBackend implements EmbeddingBackend {
366
372
 
367
373
  private removePidFile(): void {
368
374
  try {
369
- unlinkSync(join(getRootDir(), LocalEmbeddingBackend.PID_FILENAME));
375
+ unlinkSync(this.getPidFilePath());
370
376
  } catch {
371
377
  // Best-effort
372
378
  }