@vellumai/assistant 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (487) hide show
  1. package/README.md +51 -0
  2. package/eslint.config.mjs +31 -0
  3. package/package.json +1 -1
  4. package/scripts/ipc/check-swift-decoder-drift.ts +4 -1
  5. package/scripts/ipc/generate-swift.ts +18 -2
  6. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +338 -1
  7. package/src/__tests__/approval-conversation-turn.test.ts +214 -0
  8. package/src/__tests__/browser-manager.test.ts +1 -0
  9. package/src/__tests__/call-conversation-messages.test.ts +130 -0
  10. package/src/__tests__/call-orchestrator.test.ts +752 -271
  11. package/src/__tests__/call-pointer-messages.test.ts +148 -0
  12. package/src/__tests__/call-recovery.test.ts +3 -0
  13. package/src/__tests__/call-routes-http.test.ts +5 -0
  14. package/src/__tests__/call-store.test.ts +3 -0
  15. package/src/__tests__/channel-approval-routes.test.ts +1260 -85
  16. package/src/__tests__/channel-approval.test.ts +37 -0
  17. package/src/__tests__/channel-approvals.test.ts +4 -65
  18. package/src/__tests__/channel-guardian.test.ts +556 -0
  19. package/src/__tests__/channel-readiness-service.test.ts +74 -7
  20. package/src/__tests__/checker.test.ts +14 -7
  21. package/src/__tests__/clarification-resolver.test.ts +44 -24
  22. package/src/__tests__/commit-message-enrichment-service.test.ts +9 -4
  23. package/src/__tests__/computer-use-session-working-dir.test.ts +8 -0
  24. package/src/__tests__/config-schema.test.ts +12 -7
  25. package/src/__tests__/context-window-manager.test.ts +30 -2
  26. package/src/__tests__/contradiction-checker.test.ts +20 -5
  27. package/src/__tests__/credential-security-invariants.test.ts +6 -2
  28. package/src/__tests__/db-migration-rollback.test.ts +752 -0
  29. package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +2 -0
  30. package/src/__tests__/fuzzy-match-property.test.ts +5 -5
  31. package/src/__tests__/guardian-action-store.test.ts +123 -0
  32. package/src/__tests__/guardian-action-sweep.test.ts +277 -0
  33. package/src/__tests__/guardian-dispatch.test.ts +389 -0
  34. package/src/__tests__/guardian-question-copy.test.ts +47 -0
  35. package/src/__tests__/handlers-telegram-config.test.ts +4 -2
  36. package/src/__tests__/handlers-twilio-config.test.ts +126 -0
  37. package/src/__tests__/intent-routing.test.ts +2 -0
  38. package/src/__tests__/ipc-snapshot.test.ts +228 -1
  39. package/src/__tests__/memory-upsert-concurrency.test.ts +828 -0
  40. package/src/__tests__/model-intents.test.ts +96 -0
  41. package/src/__tests__/no-direct-anthropic-sdk-imports.test.ts +42 -0
  42. package/src/__tests__/oauth2-gateway-transport.test.ts +130 -0
  43. package/src/__tests__/onboarding-starter-tasks.test.ts +2 -0
  44. package/src/__tests__/provider-commit-message-generator.test.ts +89 -13
  45. package/src/__tests__/provider-error-scenarios.test.ts +621 -0
  46. package/src/__tests__/provider-fail-open-selection.test.ts +119 -0
  47. package/src/__tests__/qdrant-manager.test.ts +27 -20
  48. package/src/__tests__/relay-server.test.ts +779 -40
  49. package/src/__tests__/run-orchestrator-assistant-events.test.ts +2 -0
  50. package/src/__tests__/run-orchestrator.test.ts +20 -4
  51. package/src/__tests__/runtime-runs-http.test.ts +17 -1
  52. package/src/__tests__/runtime-runs.test.ts +16 -0
  53. package/src/__tests__/schedule-store.test.ts +18 -4
  54. package/src/__tests__/scheduler-recurrence.test.ts +13 -4
  55. package/src/__tests__/session-abort-tool-results.test.ts +6 -0
  56. package/src/__tests__/session-agent-loop.test.ts +857 -0
  57. package/src/__tests__/session-conflict-gate.test.ts +6 -0
  58. package/src/__tests__/session-pre-run-repair.test.ts +6 -0
  59. package/src/__tests__/session-profile-injection.test.ts +6 -0
  60. package/src/__tests__/session-provider-retry-repair.test.ts +6 -0
  61. package/src/__tests__/session-queue.test.ts +6 -0
  62. package/src/__tests__/session-runtime-assembly.test.ts +237 -13
  63. package/src/__tests__/session-slash-known.test.ts +6 -0
  64. package/src/__tests__/session-slash-queue.test.ts +6 -0
  65. package/src/__tests__/session-slash-unknown.test.ts +6 -0
  66. package/src/__tests__/session-surfaces-task-progress.test.ts +2 -0
  67. package/src/__tests__/session-tool-setup-app-refresh.test.ts +1 -0
  68. package/src/__tests__/session-tool-setup-memory-scope.test.ts +1 -0
  69. package/src/__tests__/session-tool-setup-side-effect-flag.test.ts +1 -0
  70. package/src/__tests__/session-workspace-injection.test.ts +6 -0
  71. package/src/__tests__/session-workspace-tool-tracking.test.ts +6 -0
  72. package/src/__tests__/skills.test.ts +2 -0
  73. package/src/__tests__/sms-messaging-provider.test.ts +2 -1
  74. package/src/__tests__/starter-task-flow.test.ts +2 -0
  75. package/src/__tests__/swarm-dag-pathological.test.ts +535 -0
  76. package/src/__tests__/system-prompt.test.ts +2 -0
  77. package/src/__tests__/task-management-tools.test.ts +2 -2
  78. package/src/__tests__/task-runner.test.ts +14 -4
  79. package/src/__tests__/terminal-tools.test.ts +25 -19
  80. package/src/__tests__/tool-execution-abort-cleanup.test.ts +545 -0
  81. package/src/__tests__/tool-executor-shell-integration.test.ts +11 -11
  82. package/src/__tests__/tool-executor.test.ts +23 -24
  83. package/src/__tests__/trust-store.test.ts +3 -3
  84. package/src/__tests__/twilio-rest.test.ts +29 -0
  85. package/src/__tests__/twilio-routes-elevenlabs.test.ts +3 -0
  86. package/src/__tests__/twilio-routes-twiml.test.ts +11 -0
  87. package/src/__tests__/twilio-routes.test.ts +141 -21
  88. package/src/__tests__/user-reference.test.ts +2 -0
  89. package/src/__tests__/voice-quality.test.ts +222 -0
  90. package/src/__tests__/web-search.test.ts +45 -29
  91. package/src/agent/loop.ts +1 -1
  92. package/src/agent-heartbeat/agent-heartbeat-service.ts +2 -10
  93. package/src/amazon/client.ts +1418 -0
  94. package/src/amazon/request-extractor.ts +135 -0
  95. package/src/amazon/session.ts +109 -0
  96. package/src/autonomy/autonomy-store.ts +5 -5
  97. package/src/browser-extension-relay/client.ts +124 -0
  98. package/src/browser-extension-relay/protocol.ts +63 -0
  99. package/src/browser-extension-relay/server.ts +177 -0
  100. package/src/bundler/app-bundler.ts +3 -3
  101. package/src/bundler/bundle-signer.ts +1 -1
  102. package/src/bundler/signature-verifier.ts +1 -1
  103. package/src/calls/call-conversation-messages.ts +33 -0
  104. package/src/calls/call-domain.ts +106 -5
  105. package/src/calls/call-orchestrator.ts +252 -54
  106. package/src/calls/call-pointer-messages.ts +53 -0
  107. package/src/calls/call-recovery.ts +3 -8
  108. package/src/calls/call-store.ts +69 -87
  109. package/src/calls/elevenlabs-config.ts +3 -2
  110. package/src/calls/guardian-action-sweep.ts +105 -0
  111. package/src/calls/guardian-dispatch.ts +203 -0
  112. package/src/calls/guardian-question-copy.ts +133 -0
  113. package/src/calls/relay-server.ts +466 -8
  114. package/src/calls/speaker-identification.ts +1 -1
  115. package/src/calls/twilio-config.ts +7 -5
  116. package/src/calls/twilio-provider.ts +6 -4
  117. package/src/calls/twilio-rest.ts +40 -15
  118. package/src/calls/twilio-routes.ts +60 -45
  119. package/src/calls/types.ts +3 -1
  120. package/src/channels/types.ts +25 -0
  121. package/src/cli/amazon.ts +815 -0
  122. package/src/cli/config-commands.ts +2 -2
  123. package/src/cli/core-commands.ts +4 -3
  124. package/src/cli/influencer.ts +244 -0
  125. package/src/cli/map.ts +89 -6
  126. package/src/cli.ts +1 -1
  127. package/src/config/agent-schema.ts +171 -0
  128. package/src/config/bundled-skills/amazon/SKILL.md +127 -0
  129. package/src/config/bundled-skills/amazon/icon.svg +13 -0
  130. package/src/config/bundled-skills/api-mapping/SKILL.md +78 -0
  131. package/src/config/bundled-skills/browser/SKILL.md +1 -0
  132. package/src/config/bundled-skills/browser/TOOLS.json +17 -0
  133. package/src/config/bundled-skills/browser/tools/browser-wait-for-download.ts +25 -0
  134. package/src/config/bundled-skills/doordash/SKILL.md +51 -51
  135. package/src/config/bundled-skills/email-setup/SKILL.md +14 -5
  136. package/src/config/bundled-skills/google-oauth-setup/SKILL.md +183 -0
  137. package/src/config/bundled-skills/influencer/SKILL.md +144 -0
  138. package/src/config/bundled-skills/macos-automation/icon.svg +12 -0
  139. package/src/config/bundled-skills/media-processing/SKILL.md +72 -95
  140. package/src/config/bundled-skills/media-processing/TOOLS.json +57 -147
  141. package/src/config/bundled-skills/media-processing/__tests__/concurrency-pool.test.ts +77 -0
  142. package/src/config/bundled-skills/media-processing/__tests__/cost-tracker.test.ts +69 -0
  143. package/src/config/bundled-skills/media-processing/__tests__/preprocess.test.ts +303 -0
  144. package/src/config/bundled-skills/media-processing/services/concurrency-pool.ts +55 -0
  145. package/src/config/bundled-skills/media-processing/services/cost-tracker.ts +86 -0
  146. package/src/config/bundled-skills/media-processing/services/gemini-map.ts +339 -0
  147. package/src/config/bundled-skills/media-processing/services/preprocess.ts +551 -0
  148. package/src/config/bundled-skills/media-processing/services/processing-pipeline.ts +7 -9
  149. package/src/config/bundled-skills/media-processing/services/reduce.ts +197 -0
  150. package/src/config/bundled-skills/media-processing/tools/analyze-keyframes.ts +88 -253
  151. package/src/config/bundled-skills/media-processing/tools/extract-keyframes.ts +22 -153
  152. package/src/config/bundled-skills/media-processing/tools/generate-clip.ts +2 -2
  153. package/src/config/bundled-skills/media-processing/tools/media-diagnostics.ts +28 -51
  154. package/src/config/bundled-skills/media-processing/tools/query-media-events.ts +35 -270
  155. package/src/config/bundled-skills/messaging/SKILL.md +12 -2
  156. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +4 -7
  157. package/src/config/bundled-skills/messaging/tools/messaging-reply.ts +2 -1
  158. package/src/config/bundled-skills/phone-calls/SKILL.md +86 -21
  159. package/src/config/bundled-skills/twitter/icon.svg +14 -0
  160. package/src/config/bundled-tool-registry.ts +310 -0
  161. package/src/config/calls-schema.ts +181 -0
  162. package/src/config/core-schema.ts +309 -0
  163. package/src/config/defaults.ts +27 -3
  164. package/src/config/env-registry.ts +169 -0
  165. package/src/config/env.ts +175 -0
  166. package/src/config/loader.ts +6 -6
  167. package/src/config/memory-schema.ts +528 -0
  168. package/src/config/sandbox-schema.ts +55 -0
  169. package/src/config/schema.ts +157 -1138
  170. package/src/config/skill-state.ts +1 -1
  171. package/src/config/skills-schema.ts +32 -0
  172. package/src/config/skills.ts +35 -24
  173. package/src/config/system-prompt.ts +107 -56
  174. package/src/config/templates/SOUL.md +1 -1
  175. package/src/config/types.ts +1 -0
  176. package/src/config/user-reference.ts +4 -9
  177. package/src/config/vellum-skills/catalog.json +0 -7
  178. package/src/config/vellum-skills/chatgpt-import/tools/chatgpt-import.ts +5 -1
  179. package/src/config/vellum-skills/slack-oauth-setup/SKILL.md +1 -0
  180. package/src/config/vellum-skills/sms-setup/SKILL.md +112 -14
  181. package/src/context/window-manager.ts +27 -7
  182. package/src/daemon/approval-generators.ts +186 -0
  183. package/src/daemon/approved-devices-store.ts +140 -0
  184. package/src/daemon/assistant-attachments.ts +1 -1
  185. package/src/daemon/classifier.ts +35 -32
  186. package/src/daemon/config-watcher.ts +1 -1
  187. package/src/daemon/daemon-control.ts +254 -0
  188. package/src/daemon/handlers/apps.ts +2 -3
  189. package/src/daemon/handlers/config-channels.ts +158 -0
  190. package/src/daemon/handlers/config-inbox.ts +540 -0
  191. package/src/daemon/handlers/config-ingress.ts +231 -0
  192. package/src/daemon/handlers/config-integrations.ts +258 -0
  193. package/src/daemon/handlers/config-model.ts +143 -0
  194. package/src/daemon/handlers/config-parental.ts +163 -0
  195. package/src/daemon/handlers/config-scheduling.ts +172 -0
  196. package/src/daemon/handlers/config-slack.ts +92 -0
  197. package/src/daemon/handlers/config-telegram.ts +301 -0
  198. package/src/daemon/handlers/config-tools.ts +177 -0
  199. package/src/daemon/handlers/config-trust.ts +104 -0
  200. package/src/daemon/handlers/config-twilio.ts +1080 -0
  201. package/src/daemon/handlers/config.ts +53 -2463
  202. package/src/daemon/handlers/diagnostics.ts +1 -1
  203. package/src/daemon/handlers/dictation.ts +4 -6
  204. package/src/daemon/handlers/documents.ts +18 -32
  205. package/src/daemon/handlers/index.ts +9 -0
  206. package/src/daemon/handlers/misc.ts +3 -5
  207. package/src/daemon/handlers/pairing.ts +98 -0
  208. package/src/daemon/handlers/sessions.ts +74 -5
  209. package/src/daemon/handlers/shared.ts +3 -1
  210. package/src/daemon/handlers/skills.ts +1 -1
  211. package/src/daemon/handlers/twitter-auth.ts +2 -0
  212. package/src/daemon/handlers/work-items.ts +2 -2
  213. package/src/daemon/handlers/workspace-files.ts +4 -3
  214. package/src/daemon/install-cli-launchers.ts +113 -0
  215. package/src/daemon/ipc-contract/apps.ts +356 -0
  216. package/src/daemon/ipc-contract/browser.ts +74 -0
  217. package/src/daemon/ipc-contract/computer-use.ts +151 -0
  218. package/src/daemon/ipc-contract/diagnostics.ts +56 -0
  219. package/src/daemon/ipc-contract/documents.ts +74 -0
  220. package/src/daemon/ipc-contract/inbox.ts +209 -0
  221. package/src/daemon/ipc-contract/integrations.ts +284 -0
  222. package/src/daemon/ipc-contract/memory.ts +48 -0
  223. package/src/daemon/ipc-contract/messages.ts +211 -0
  224. package/src/daemon/ipc-contract/pairing.ts +45 -0
  225. package/src/daemon/ipc-contract/parental-control.ts +95 -0
  226. package/src/daemon/ipc-contract/schedules.ts +97 -0
  227. package/src/daemon/ipc-contract/sessions.ts +321 -0
  228. package/src/daemon/ipc-contract/shared.ts +42 -0
  229. package/src/daemon/ipc-contract/skills.ts +120 -0
  230. package/src/daemon/ipc-contract/subagents.ts +58 -0
  231. package/src/daemon/ipc-contract/surfaces.ts +250 -0
  232. package/src/daemon/ipc-contract/trust.ts +60 -0
  233. package/src/daemon/ipc-contract/work-items.ts +225 -0
  234. package/src/daemon/ipc-contract/workspace.ts +113 -0
  235. package/src/daemon/ipc-contract-inventory.json +62 -0
  236. package/src/daemon/ipc-contract-inventory.ts +55 -29
  237. package/src/daemon/ipc-contract.ts +227 -2527
  238. package/src/daemon/ipc-protocol.ts +1 -1
  239. package/src/daemon/ipc-validate.ts +7 -0
  240. package/src/daemon/lifecycle.ts +97 -379
  241. package/src/daemon/pairing-store.ts +177 -0
  242. package/src/daemon/providers-setup.ts +43 -0
  243. package/src/daemon/ride-shotgun-handler.ts +67 -2
  244. package/src/daemon/server.ts +60 -44
  245. package/src/daemon/session-agent-loop-handlers.ts +421 -0
  246. package/src/daemon/session-agent-loop.ts +113 -275
  247. package/src/daemon/session-dynamic-profile.ts +1 -1
  248. package/src/daemon/session-history.ts +1 -1
  249. package/src/daemon/session-media-retry.ts +1 -1
  250. package/src/daemon/session-messaging.ts +37 -2
  251. package/src/daemon/session-notifiers.ts +5 -25
  252. package/src/daemon/session-process.ts +99 -59
  253. package/src/daemon/session-queue-manager.ts +98 -4
  254. package/src/daemon/session-runtime-assembly.ts +149 -15
  255. package/src/daemon/session-surfaces.ts +26 -4
  256. package/src/daemon/session-tool-setup.ts +28 -30
  257. package/src/daemon/session-workspace.ts +1 -1
  258. package/src/daemon/session.ts +24 -1
  259. package/src/daemon/shutdown-handlers.ts +122 -0
  260. package/src/daemon/trace-emitter.ts +1 -1
  261. package/src/daemon/watch-handler.ts +36 -33
  262. package/src/doordash/cart-queries.ts +787 -0
  263. package/src/doordash/client.ts +144 -127
  264. package/src/doordash/order-queries.ts +85 -0
  265. package/src/doordash/queries.ts +10 -1308
  266. package/src/doordash/search-queries.ts +203 -0
  267. package/src/doordash/session.ts +3 -2
  268. package/src/doordash/store-queries.ts +246 -0
  269. package/src/doordash/types.ts +367 -0
  270. package/src/email/providers/agentmail.ts +2 -1
  271. package/src/email/providers/index.ts +3 -2
  272. package/src/email/service.ts +3 -2
  273. package/src/errors.ts +43 -0
  274. package/src/home-base/prebuilt/seed.ts +1 -1
  275. package/src/hooks/cli.ts +6 -5
  276. package/src/hooks/config.ts +6 -8
  277. package/src/hooks/discovery.ts +6 -5
  278. package/src/hooks/manager.ts +4 -3
  279. package/src/hooks/runner.ts +2 -2
  280. package/src/hooks/templates.ts +5 -5
  281. package/src/inbound/public-ingress-urls.ts +3 -1
  282. package/src/index.ts +4 -2
  283. package/src/influencer/client.ts +1104 -0
  284. package/src/instrument.ts +4 -3
  285. package/src/logfire.ts +4 -3
  286. package/src/memory/admin.ts +25 -35
  287. package/src/memory/attachments-store.ts +4 -7
  288. package/src/memory/channel-delivery-store.ts +30 -1
  289. package/src/memory/channel-guardian-store.ts +200 -1
  290. package/src/memory/clarification-resolver.ts +37 -33
  291. package/src/memory/conflict-store.ts +67 -61
  292. package/src/memory/contradiction-checker.ts +141 -117
  293. package/src/memory/conversation-store.ts +335 -51
  294. package/src/memory/db-connection.ts +27 -4
  295. package/src/memory/db-init.ts +121 -4
  296. package/src/memory/db.ts +14 -1
  297. package/src/memory/embedding-backend.ts +27 -5
  298. package/src/memory/embedding-ollama.ts +2 -1
  299. package/src/memory/entity-extractor.ts +38 -35
  300. package/src/memory/guardian-action-store.ts +430 -0
  301. package/src/memory/inbox-escalation-projection.ts +59 -0
  302. package/src/memory/inbox-thread-store.ts +218 -0
  303. package/src/memory/ingress-invite-store.ts +338 -0
  304. package/src/memory/ingress-member-store.ts +350 -0
  305. package/src/memory/items-extractor.ts +91 -97
  306. package/src/memory/job-handlers/index-maintenance.ts +3 -3
  307. package/src/memory/job-handlers/media-processing.ts +11 -42
  308. package/src/memory/job-handlers/summarization.ts +32 -26
  309. package/src/memory/job-utils.ts +3 -10
  310. package/src/memory/jobs-store.ts +6 -9
  311. package/src/memory/jobs-worker.ts +51 -36
  312. package/src/memory/migrations/001-job-deferrals.ts +45 -0
  313. package/src/memory/migrations/002-tool-invocations-fk.ts +43 -0
  314. package/src/memory/migrations/003-memory-fts-backfill.ts +24 -0
  315. package/src/memory/migrations/004-entity-relation-dedup.ts +87 -0
  316. package/src/memory/migrations/005-fingerprint-scope-unique.ts +80 -0
  317. package/src/memory/migrations/006-scope-salted-fingerprints.ts +62 -0
  318. package/src/memory/migrations/007-assistant-id-to-self.ts +254 -0
  319. package/src/memory/migrations/008-remove-assistant-id-columns.ts +208 -0
  320. package/src/memory/migrations/009-llm-usage-events-drop-assistant-id.ts +83 -0
  321. package/src/memory/migrations/010-ext-conv-bindings-channel-chat-unique.ts +56 -0
  322. package/src/memory/migrations/011-call-sessions-provider-sid-dedup.ts +63 -0
  323. package/src/memory/migrations/012-call-sessions-add-initiated-from.ts +19 -0
  324. package/src/memory/migrations/013-guardian-action-tables.ts +68 -0
  325. package/src/memory/migrations/014-backfill-inbox-thread-state.ts +76 -0
  326. package/src/memory/migrations/015-drop-active-search-index.ts +27 -0
  327. package/src/memory/migrations/016-memory-segments-indexes.ts +11 -0
  328. package/src/memory/migrations/017-memory-items-indexes.ts +12 -0
  329. package/src/memory/migrations/018-remaining-table-indexes.ts +13 -0
  330. package/src/memory/migrations/index.ts +24 -0
  331. package/src/memory/migrations/registry.ts +79 -0
  332. package/src/memory/migrations/validate-migration-state.ts +69 -0
  333. package/src/memory/qdrant-manager.ts +49 -8
  334. package/src/memory/query-builder.ts +1 -1
  335. package/src/memory/raw-query.ts +119 -0
  336. package/src/memory/recall-cache.ts +4 -1
  337. package/src/memory/retriever.ts +163 -47
  338. package/src/memory/schema-migration.ts +25 -984
  339. package/src/memory/schema.ts +130 -7
  340. package/src/memory/search/entity.ts +10 -19
  341. package/src/memory/search/lexical.ts +81 -52
  342. package/src/memory/search/ranking.ts +21 -22
  343. package/src/memory/search/semantic.ts +157 -19
  344. package/src/memory/shared-app-links-store.ts +4 -5
  345. package/src/memory/validation.ts +19 -0
  346. package/src/messaging/draft-store.ts +5 -6
  347. package/src/messaging/providers/sms/adapter.ts +3 -6
  348. package/src/messaging/providers/telegram-bot/adapter.ts +2 -5
  349. package/src/messaging/providers/whatsapp/adapter.ts +136 -0
  350. package/src/messaging/providers/whatsapp/client.ts +67 -0
  351. package/src/messaging/style-analyzer.ts +5 -4
  352. package/src/messaging/thread-summarizer.ts +61 -69
  353. package/src/messaging/triage-engine.ts +62 -71
  354. package/src/migrations/config-merge.ts +53 -0
  355. package/src/migrations/data-layout.ts +68 -0
  356. package/src/migrations/data-merge.ts +33 -0
  357. package/src/migrations/hooks-merge.ts +90 -0
  358. package/src/migrations/index.ts +6 -0
  359. package/src/migrations/log.ts +23 -0
  360. package/src/migrations/skills-merge.ts +33 -0
  361. package/src/migrations/workspace-layout.ts +79 -0
  362. package/src/permissions/checker.ts +126 -11
  363. package/src/permissions/prompter.ts +14 -0
  364. package/src/permissions/shell-identity.ts +31 -1
  365. package/src/permissions/trust-store.ts +21 -1
  366. package/src/providers/anthropic/client.ts +4 -4
  367. package/src/providers/failover.ts +2 -2
  368. package/src/providers/model-intents.ts +70 -0
  369. package/src/providers/ollama/client.ts +2 -1
  370. package/src/providers/provider-send-message.ts +176 -0
  371. package/src/providers/registry.ts +71 -30
  372. package/src/providers/retry.ts +35 -1
  373. package/src/providers/types.ts +12 -1
  374. package/src/runtime/approval-conversation-turn.ts +97 -0
  375. package/src/runtime/approval-message-composer.ts +115 -5
  376. package/src/runtime/assistant-event-hub.ts +3 -1
  377. package/src/runtime/channel-approval-parser.ts +36 -2
  378. package/src/runtime/channel-approvals.ts +0 -21
  379. package/src/runtime/channel-guardian-service.ts +48 -7
  380. package/src/runtime/channel-readiness-service.ts +160 -34
  381. package/src/runtime/channel-readiness-types.ts +10 -4
  382. package/src/runtime/channel-retry-sweep.ts +184 -0
  383. package/src/runtime/guardian-context-resolver.ts +108 -0
  384. package/src/runtime/http-server.ts +289 -745
  385. package/src/runtime/http-types.ts +56 -3
  386. package/src/runtime/middleware/auth.ts +116 -0
  387. package/src/runtime/middleware/error-handler.ts +33 -0
  388. package/src/runtime/middleware/twilio-validation.ts +127 -0
  389. package/src/runtime/routes/app-routes.ts +1 -1
  390. package/src/runtime/routes/call-routes.ts +49 -6
  391. package/src/runtime/routes/channel-delivery-routes.ts +170 -0
  392. package/src/runtime/routes/channel-guardian-routes.ts +1191 -0
  393. package/src/runtime/routes/channel-inbound-routes.ts +1152 -0
  394. package/src/runtime/routes/channel-route-shared.ts +144 -0
  395. package/src/runtime/routes/channel-routes.ts +32 -1634
  396. package/src/runtime/routes/conversation-routes.ts +50 -7
  397. package/src/runtime/routes/events-routes.ts +2 -2
  398. package/src/runtime/routes/identity-routes.ts +126 -0
  399. package/src/runtime/routes/pairing-routes.ts +144 -0
  400. package/src/runtime/routes/run-routes.ts +15 -1
  401. package/src/runtime/run-orchestrator.ts +52 -34
  402. package/src/schedule/schedule-store.ts +36 -32
  403. package/src/schedule/scheduler.ts +3 -3
  404. package/src/security/encrypted-store.ts +5 -7
  405. package/src/security/oauth2.ts +45 -15
  406. package/src/security/parental-control-store.ts +183 -0
  407. package/src/security/secret-allowlist.ts +4 -3
  408. package/src/security/secret-scanner.ts +5 -5
  409. package/src/security/secure-keys.ts +1 -1
  410. package/src/security/token-manager.ts +3 -2
  411. package/src/services/vercel-deploy.ts +6 -2
  412. package/src/skills/tool-manifest.ts +3 -3
  413. package/src/skills/vellum-catalog-remote.ts +75 -16
  414. package/src/slack/slack-webhook.ts +2 -1
  415. package/src/swarm/orchestrator.ts +92 -1
  416. package/src/swarm/router-planner.ts +6 -9
  417. package/src/swarm/worker-prompts.ts +9 -12
  418. package/src/tasks/task-compiler.ts +19 -28
  419. package/src/tasks/task-runner.ts +1 -1
  420. package/src/tools/assets/search.ts +15 -14
  421. package/src/tools/browser/__tests__/auth-detector.test.ts +1 -0
  422. package/src/tools/browser/auto-navigate.ts +1 -0
  423. package/src/tools/browser/browser-execution.ts +13 -1
  424. package/src/tools/browser/browser-manager.ts +119 -4
  425. package/src/tools/browser/network-recorder.ts +5 -0
  426. package/src/tools/credentials/broker.ts +11 -2
  427. package/src/tools/credentials/metadata-store.ts +18 -14
  428. package/src/tools/credentials/post-connect-hooks.ts +61 -0
  429. package/src/tools/credentials/vault.ts +49 -23
  430. package/src/tools/executor.ts +80 -18
  431. package/src/tools/host-terminal/cli-discover.ts +1 -1
  432. package/src/tools/network/script-proxy/http-forwarder.ts +1 -1
  433. package/src/tools/network/script-proxy/mitm-handler.ts +1 -1
  434. package/src/tools/network/script-proxy/server.ts +1 -1
  435. package/src/tools/network/script-proxy/session-manager.ts +6 -5
  436. package/src/tools/network/web-fetch.ts +18 -2
  437. package/src/tools/network/web-search.ts +7 -3
  438. package/src/tools/reminder/reminder-store.ts +14 -15
  439. package/src/tools/schedule/create.ts +1 -0
  440. package/src/tools/schedule/list.ts +2 -1
  441. package/src/tools/shared/filesystem/file-ops-service.ts +5 -7
  442. package/src/tools/skills/skill-script-runner.ts +24 -9
  443. package/src/tools/skills/skill-tool-factory.ts +1 -0
  444. package/src/tools/tasks/work-item-enqueue.ts +2 -2
  445. package/src/tools/terminal/evaluate-typescript.ts +21 -12
  446. package/src/tools/terminal/parser.ts +50 -0
  447. package/src/tools/watcher/delete.ts +6 -0
  448. package/src/tools/weather/service.ts +1 -1
  449. package/src/twitter/client.ts +190 -24
  450. package/src/twitter/session.ts +4 -3
  451. package/src/util/clipboard.ts +1 -1
  452. package/src/util/errors.ts +65 -8
  453. package/src/util/fs.ts +40 -0
  454. package/src/util/json.ts +10 -0
  455. package/src/util/log-redact.ts +189 -0
  456. package/src/util/logger.ts +25 -18
  457. package/src/util/object.ts +3 -0
  458. package/src/util/platform.ts +72 -365
  459. package/src/util/pricing.ts +1 -1
  460. package/src/util/promise-guard.ts +1 -1
  461. package/src/util/retry.ts +19 -0
  462. package/src/util/row-mapper.ts +79 -0
  463. package/src/util/silently.ts +21 -0
  464. package/src/watcher/engine.ts +5 -1
  465. package/src/watcher/provider-types.ts +20 -0
  466. package/src/watcher/providers/github.ts +156 -0
  467. package/src/watcher/providers/gmail.ts +1 -0
  468. package/src/watcher/providers/google-calendar.ts +1 -0
  469. package/src/watcher/providers/linear.ts +460 -0
  470. package/src/watcher/providers/slack.ts +1 -0
  471. package/src/work-items/work-item-runner.ts +1 -1
  472. package/src/workspace/git-service.ts +1 -1
  473. package/src/workspace/provider-commit-message-generator.ts +51 -22
  474. package/src/__tests__/call-bridge.test.ts +0 -517
  475. package/src/__tests__/session-process-bridge.test.ts +0 -244
  476. package/src/calls/call-bridge.ts +0 -168
  477. package/src/config/bundled-skills/media-processing/services/capability-registry.ts +0 -137
  478. package/src/config/bundled-skills/media-processing/services/event-detection-service.ts +0 -280
  479. package/src/config/bundled-skills/media-processing/services/feedback-aggregation.ts +0 -144
  480. package/src/config/bundled-skills/media-processing/services/feedback-store.ts +0 -136
  481. package/src/config/bundled-skills/media-processing/services/retrieval-service.ts +0 -95
  482. package/src/config/bundled-skills/media-processing/services/timeline-service.ts +0 -267
  483. package/src/config/bundled-skills/media-processing/tools/detect-events.ts +0 -110
  484. package/src/config/bundled-skills/media-processing/tools/recalibrate.ts +0 -235
  485. package/src/config/bundled-skills/media-processing/tools/select-tracking-profile.ts +0 -142
  486. package/src/config/bundled-skills/media-processing/tools/submit-feedback.ts +0 -150
  487. package/src/config/vellum-skills/google-oauth-setup/SKILL.md +0 -199
@@ -0,0 +1,183 @@
1
+ ---
2
+ name: "Google OAuth Setup"
3
+ description: "Set up Google Cloud OAuth credentials for Gmail and Calendar using browser automation"
4
+ user-invocable: true
5
+ includes: ["browser", "public-ingress"]
6
+ metadata: {"vellum": {"emoji": "\ud83d\udd11"}}
7
+ ---
8
+
9
+ You are helping your user set up Google Cloud OAuth credentials so Gmail and Google Calendar integrations can connect. You will automate the entire GCP setup via the browser while the user watches via screencast. The user's only manual action is signing in to their Google account — everything else is fully automated.
10
+
11
+ ## Client Check
12
+
13
+ If the user is on Telegram (or any non-macOS client without browser automation):
14
+
15
+ > "Gmail setup requires browser automation, which is available on the macOS app. Please open the Vellum app on your Mac and ask me to connect Gmail there — I'll handle the rest automatically."
16
+
17
+ Stop here. Do not attempt a manual walkthrough.
18
+
19
+ ## Prerequisites
20
+
21
+ Before starting, check that `ingress.publicBaseUrl` is configured (`INGRESS_PUBLIC_BASE_URL` env var or workspace config). If it is not set, load and execute the **public-ingress** skill first (`skill_load` with `skill: "public-ingress"`) to set up an ngrok tunnel and persist the public URL. The OAuth redirect URI depends on this value.
22
+
23
+ ## Step 1: Single Upfront Confirmation
24
+
25
+ Use `ui_show` with `surface_type: "confirmation"` and this message:
26
+
27
+ > **Set up Google Cloud for Gmail & Calendar**
28
+ >
29
+ > Here's what will happen:
30
+ > 1. **A browser opens** — you sign in to your Google account
31
+ > 2. **I automate everything** — project creation, APIs, OAuth config, credentials
32
+ > 3. **You enter credentials** from a downloaded file (secure prompt — I never see them)
33
+ > 4. **You authorize Vellum** with one click
34
+ >
35
+ > The whole thing takes 2-3 minutes. Ready?
36
+
37
+ If the user declines, acknowledge and stop. No further confirmations are needed after this point.
38
+
39
+ ## Step 2: Open Google Cloud Console
40
+
41
+ Use `browser_navigate` to go to `https://console.cloud.google.com/`.
42
+
43
+ Take a `browser_screenshot` and `browser_snapshot` to check the page state:
44
+ - **If a sign-in page appears:** Tell the user: "Please sign in to your Google account in the browser preview panel (or the Chrome window that just opened)." Then **auto-detect sign-in completion** by polling `browser_snapshot` every 5-10 seconds. Check if the current URL has moved away from `accounts.google.com` to `console.cloud.google.com`. Do NOT ask the user to "let me know when you're done" — detect it automatically. Once sign-in is detected, tell the user: "Signed in! Starting the automated setup now..."
45
+ - **If already signed in** (URL is already `console.cloud.google.com`): Tell the user: "Already signed in — starting setup now..." and continue immediately.
46
+ - **If a CAPTCHA appears:** The browser automation's built-in handoff will handle this. If it persists, tell the user: "There's a CAPTCHA in the browser — please complete it and I'll continue automatically."
47
+ - **If the console dashboard loads:** Continue to Step 3.
48
+
49
+ ## Step 3: Create or Select a Project
50
+
51
+ Tell the user: "Creating Google Cloud project 'Vellum Assistant'..."
52
+
53
+ Navigate to `https://console.cloud.google.com/projectcreate`.
54
+
55
+ Take a `browser_snapshot`. Fill in the project name:
56
+ - Use `browser_type` to set the project name to "Vellum Assistant"
57
+ - Use `browser_click` to submit the "Create" button
58
+
59
+ Wait a few seconds, take a `browser_screenshot` and `browser_snapshot` to confirm. If the project already exists, navigate to its dashboard. Note the project ID for subsequent steps.
60
+
61
+ Tell the user: "Project created!"
62
+
63
+ ## Step 4: Enable Gmail and Calendar APIs
64
+
65
+ Tell the user: "Enabling Gmail and Calendar APIs..."
66
+
67
+ Navigate to `https://console.cloud.google.com/apis/library/gmail.googleapis.com?project=PROJECT_ID` (substitute actual project ID).
68
+
69
+ Take a `browser_snapshot`:
70
+ - If already enabled (shows "API enabled" or "Manage" button): skip.
71
+ - If not: click the "Enable" button and wait.
72
+
73
+ Then navigate to `https://console.cloud.google.com/apis/library/calendar-json.googleapis.com?project=PROJECT_ID`.
74
+
75
+ Same check — enable if needed.
76
+
77
+ Take a `browser_screenshot` to show result. Tell the user: "APIs enabled!"
78
+
79
+ ## Step 5: Configure OAuth Consent Screen
80
+
81
+ Tell the user: "Configuring OAuth consent screen — this is the longest step, but it's fully automated..."
82
+
83
+ Navigate to `https://console.cloud.google.com/apis/credentials/consent?project=PROJECT_ID`.
84
+
85
+ Take a `browser_snapshot`:
86
+ - If consent screen is already configured: skip to Step 6.
87
+ - If user type selection appears: select "External" and click "Create".
88
+
89
+ Fill in the consent screen form:
90
+ 1. **App name:** "Vellum Assistant"
91
+ 2. **User support email:** Select the user's email from the dropdown
92
+ 3. **Developer contact email:** Type the user's email address
93
+ 4. Leave other fields as defaults
94
+
95
+ Navigate through the wizard pages:
96
+ - App information page: Fill fields, click "Save and Continue"
97
+ - Scopes page: Click "Add or Remove Scopes", search for and select:
98
+ - `https://www.googleapis.com/auth/gmail.readonly`
99
+ - `https://www.googleapis.com/auth/gmail.modify`
100
+ - `https://www.googleapis.com/auth/gmail.send`
101
+ - `https://www.googleapis.com/auth/calendar.readonly`
102
+ - `https://www.googleapis.com/auth/calendar.events`
103
+ - `https://www.googleapis.com/auth/userinfo.email`
104
+ - Click "Update" then "Save and Continue"
105
+ - Test users page: Add the user's email as a test user, click "Save and Continue"
106
+ - Summary page: Click "Back to Dashboard"
107
+
108
+ Tell the user: "Consent screen configured!"
109
+
110
+ ## Step 6: Create OAuth Credentials
111
+
112
+ Tell the user: "Creating OAuth credentials..."
113
+
114
+ Navigate to `https://console.cloud.google.com/apis/credentials?project=PROJECT_ID`.
115
+
116
+ Click "+ Create Credentials" then select "OAuth client ID".
117
+
118
+ Take a `browser_snapshot` and fill in:
119
+ 1. **Application type:** Select "Web application"
120
+ 2. **Name:** "Vellum Assistant"
121
+ 3. **Authorized redirect URIs:** Click "Add URI" and enter `${ingress.publicBaseUrl}/webhooks/oauth/callback`
122
+
123
+ Click "Create".
124
+
125
+ ## Step 7: Download Credentials JSON
126
+
127
+ Tell the user: "Almost done — downloading credentials..."
128
+
129
+ After the credentials dialog appears, click the "Download JSON" button (it may say "DOWNLOAD JSON" or show a download icon).
130
+
131
+ Use `browser_wait_for_download` to wait for the file to download.
132
+
133
+ Tell the user: "Credentials downloaded!"
134
+
135
+ ## Step 8: Secure Credential Entry
136
+
137
+ Tell the user: "I've downloaded the credentials file. Please open it and enter the values below. I won't see what you type — these go directly to secure storage."
138
+
139
+ ```
140
+ credential_store prompt:
141
+ service: "integration:gmail"
142
+ field: "client_id"
143
+ label: "Google OAuth Client ID"
144
+ description: "Open the downloaded JSON file and copy the client_id value"
145
+ placeholder: "123456789.apps.googleusercontent.com"
146
+ ```
147
+
148
+ ```
149
+ credential_store prompt:
150
+ service: "integration:gmail"
151
+ field: "client_secret"
152
+ label: "Google OAuth Client Secret"
153
+ description: "Copy the client_secret value from the same JSON file"
154
+ placeholder: "GOCSPX-..."
155
+ ```
156
+
157
+ ## Step 9: OAuth2 Authorization
158
+
159
+ Tell the user: "Opening Google sign-in so you can authorize Vellum. Just click 'Allow' on the consent page."
160
+
161
+ Use `credential_store` with:
162
+
163
+ ```
164
+ action: "oauth2_connect"
165
+ service: "integration:gmail"
166
+ ```
167
+
168
+ This auto-reads client_id/client_secret from the secure store and auto-fills auth_url, token_url, scopes, and extra_params from well-known config.
169
+
170
+ **If the user sees a "This app isn't verified" warning:** Tell them this is normal for apps in testing mode. Click "Advanced" then "Go to Vellum Assistant (unsafe)" to proceed.
171
+
172
+ ## Step 10: Done!
173
+
174
+ "**Gmail and Calendar are connected!** You can now read, search, and send emails, plus view and manage your calendar. Try asking me to check your inbox or show your upcoming events!"
175
+
176
+ ## Error Handling
177
+
178
+ - **Page load failures:** Retry navigation once. If it still fails, tell the user and ask them to check their internet connection.
179
+ - **Permission errors in GCP:** The user may need billing enabled or organization-level permissions. Explain clearly and ask them to resolve it.
180
+ - **Consent screen already configured:** Don't overwrite — skip to credential creation.
181
+ - **Element not found:** Take a fresh `browser_snapshot` to re-assess. GCP UI may have changed. Tell the user what you're looking for if stuck.
182
+ - **OAuth flow timeout or failure:** Offer to retry. The credentials are already stored, so reconnecting only requires re-running the authorization flow.
183
+ - **Any unexpected state:** Take a `browser_screenshot` and `browser_snapshot`, describe what you see, and ask the user for guidance.
@@ -0,0 +1,144 @@
1
+ ---
2
+ name: "Influencer Research"
3
+ description: "Research influencers on Instagram, TikTok, and X/Twitter using the Chrome extension relay"
4
+ user-invocable: true
5
+ metadata: {"vellum": {"emoji": "🔍"}}
6
+ ---
7
+
8
+ You can research and discover influencers across Instagram, TikTok, and X/Twitter using the `vellum influencer` CLI.
9
+
10
+ ## CLI Setup
11
+
12
+ **IMPORTANT: Always use `host_bash` (not `bash`) for all `vellum influencer` commands.** The influencer CLI needs host access for the Chrome extension relay and the `vellum` binary, neither of which are available inside the sandbox.
13
+
14
+ `vellum influencer` is a built-in subcommand of the Vellum assistant CLI. If `vellum` is not found, prepend `PATH="$HOME/.local/bin:$PATH"` to the command.
15
+
16
+ ## Prerequisites
17
+
18
+ - The Chrome extension relay must be connected (user should have the Vellum extension loaded in Chrome)
19
+ - The user must be **logged in** on each platform they want to search (Instagram, TikTok, X) in their Chrome browser
20
+ - The extension MUST have the `debugger` permission (required to bypass CSP on Instagram and other Meta sites)
21
+ - If the relay is not connected, tell the user: "Please open Chrome, click the Vellum extension icon, and click Connect — then I'll retry."
22
+
23
+ ## Platform-Specific Architecture
24
+
25
+ ### Instagram
26
+ Instagram's search at `/explore/search/keyword/?q=...` shows a **grid of posts**, NOT profiles. The discovery flow is:
27
+ 1. Search by keyword → extract post links (`/p/` and `/reel/`)
28
+ 2. Visit each post → find the author username from page links
29
+ 3. Deduplicate usernames
30
+ 4. Visit each unique profile → scrape stats from `meta[name="description"]` (most reliable source, format: "49K Followers, 463 Following, 551 Posts - Display Name (@user)")
31
+ 5. Filter and rank by criteria
32
+
33
+ **CSP Note:** Instagram blocks `eval()`, `new Function()`, inline scripts, and blob URLs via strict CSP. The extension uses `chrome.debugger` API (CDP Runtime.evaluate) as a fallback, which bypasses all CSP restrictions.
34
+
35
+ ### TikTok
36
+ TikTok has a dedicated user search at `/search/user?q=...`. Each result card produces a predictable text pattern in `innerText`:
37
+ ```
38
+ DisplayName
39
+ username
40
+ 77.9K
41
+ Followers
42
+ ·
43
+ 1.5M
44
+ Likes
45
+ Follow
46
+ ```
47
+ We parse this pattern directly (DOM class selectors are obfuscated and unreliable on TikTok). After extracting usernames and follower counts, we visit each profile for bios.
48
+
49
+ ### X/Twitter
50
+ X has a people search at `/search?q=...&f=user` with `[data-testid="UserCell"]` components containing username, display name, bio, and verified status.
51
+
52
+ ## Typical Flow
53
+
54
+ When the user asks to find or research influencers:
55
+
56
+ 1. **Understand the criteria.** Ask about:
57
+ - **Niche/topic** — what kind of influencers? (fitness, beauty, tech, food, etc.)
58
+ - **Platforms** — Instagram, TikTok, X/Twitter, or all three?
59
+ - **Follower range** — micro (1K-10K), mid-tier (10K-100K), macro (100K-1M), mega (1M+)?
60
+ - **Verified only?** — do they need the blue checkmark?
61
+ - Don't over-ask. If the user says "find me fitness influencers on Instagram", that's enough to start.
62
+
63
+ 2. **Search** — run `vellum influencer search "<query>" --platforms <platforms> [options] --json`
64
+
65
+ 3. **Present results** — show a clean summary of each influencer found:
66
+ - Username and display name
67
+ - Platform
68
+ - Follower count
69
+ - Bio snippet
70
+ - Verified status
71
+ - Content themes detected
72
+ - Profile URL
73
+
74
+ 4. **Deep dive** (if needed) — run `vellum influencer profile <username> --platform <platform> --json` to get detailed data on a specific influencer.
75
+
76
+ 5. **Compare** (if needed) — run `vellum influencer compare instagram:user1 twitter:user2 tiktok:user3 --json` to compare influencers side by side.
77
+
78
+ ## Follower Range Shortcuts
79
+
80
+ When the user describes influencer tiers, map to these ranges:
81
+ - **Nano**: `--min-followers 1000 --max-followers 10000`
82
+ - **Micro**: `--min-followers 10000 --max-followers 100000`
83
+ - **Mid-tier**: `--min-followers 100000 --max-followers 500000`
84
+ - **Macro**: `--min-followers 500000 --max-followers 1000000`
85
+ - **Mega**: `--min-followers 1000000`
86
+
87
+ Human-friendly numbers are supported: `10k`, `100k`, `1m`, etc.
88
+
89
+ ## Command Reference
90
+
91
+ ```
92
+ vellum influencer search "<query>" [options] --json
93
+ --platforms <list> Comma-separated: instagram,tiktok,twitter (default: all three)
94
+ --min-followers <n> Minimum follower count (e.g. 10k, 100000)
95
+ --max-followers <n> Maximum follower count (e.g. 1m, 500k)
96
+ --limit <n> Max results per platform (default: 10)
97
+ --verified Only return verified accounts
98
+
99
+ vellum influencer profile <username> --platform <platform> --json
100
+ --platform <platform> instagram, tiktok, or twitter (default: instagram)
101
+
102
+ vellum influencer compare <platform:username ...> --json
103
+ Arguments are space-separated platform:username pairs
104
+ e.g. instagram:nike twitter:nike tiktok:nike
105
+ ```
106
+
107
+ ## Important Behavior
108
+
109
+ - **Use `--json` flag** on all commands for reliable parsing.
110
+ - **Always use `host_bash`** for these commands, never `bash`.
111
+ - **Be patient with results.** The tool navigates actual browser tabs, so each platform search takes 10-30 seconds. Warn the user it may take a moment.
112
+ - **Rate limiting.** Don't hammer the platforms. The tool has built-in delays, but avoid running many searches in rapid succession.
113
+ - **Present results nicely.** Use tables or formatted lists. Group by platform. Highlight standout profiles.
114
+ - **Offer next steps.** After showing results, ask if they want to:
115
+ - Get more details on specific profiles
116
+ - Compare top picks side by side
117
+ - Search with different criteria
118
+ - Export the results
119
+ - **Handle errors gracefully.** If a platform fails (e.g. not logged in), show results from the platforms that worked and mention which one failed.
120
+ - **Do NOT use the browser skill.** All influencer research goes through the CLI, not browser automation.
121
+
122
+ ## Example Interactions
123
+
124
+ **User**: "Find me fitness influencers on Instagram and TikTok"
125
+
126
+ 1. `vellum influencer search "fitness coach workout" --platforms instagram,tiktok --limit 10 --json`
127
+ 2. Present results grouped by platform with follower counts and bios
128
+ 3. "I found 8 fitness influencers on Instagram and 6 on TikTok. Want me to dig deeper into any of these profiles?"
129
+
130
+ **User**: "I need micro-influencers in the beauty niche, verified only"
131
+
132
+ 1. `vellum influencer search "beauty makeup skincare" --platforms instagram,tiktok,twitter --min-followers 10k --max-followers 100k --verified --limit 10 --json`
133
+ 2. Present filtered results
134
+ 3. Offer to compare top picks
135
+
136
+ **User**: "Compare @username1 on Instagram with @username2 on TikTok"
137
+
138
+ 1. `vellum influencer compare instagram:username1 tiktok:username2 --json`
139
+ 2. Present side-by-side comparison with followers, engagement, bio, themes
140
+
141
+ **User**: "Tell me more about @specificuser on Instagram"
142
+
143
+ 1. `vellum influencer profile specificuser --platform instagram --json`
144
+ 2. Show full profile details including bio, follower/following counts, verified status, content themes
@@ -0,0 +1,12 @@
1
+ <svg viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg">
2
+ <rect x="2" y="2" width="12" height="11" fill="#e8e8e8" stroke="#333" stroke-width="1"/>
3
+ <rect x="3" y="3" width="10" height="9" fill="#f5f5f5"/>
4
+ <circle cx="8" cy="7" r="2" fill="#0071e3"/>
5
+ <rect x="5" y="5" width="1" height="1" fill="#333"/>
6
+ <rect x="10" y="5" width="1" height="1" fill="#333"/>
7
+ <rect x="5" y="9" width="1" height="1" fill="#333"/>
8
+ <rect x="10" y="9" width="1" height="1" fill="#333"/>
9
+ <path d="M 8 4 L 8 6 M 6 7 L 8 7 M 8 8 L 8 10 M 8 7 L 10 7" stroke="#0071e3" stroke-width="1" fill="none"/>
10
+ <rect x="3" y="13" width="10" height="1" fill="#333"/>
11
+ <rect x="4" y="14" width="8" height="1" fill="#0071e3"/>
12
+ </svg>
@@ -1,23 +1,22 @@
1
1
  ---
2
2
  name: "Media Processing"
3
- description: "Ingest and process media files (video, audio, image) through multi-stage pipelines including keyframe extraction, vision analysis, and timeline generation"
3
+ description: "Ingest and process media files (video, audio, image) through a 3-phase pipeline: preprocess, map (Gemini), and reduce (Claude)"
4
4
  metadata: {"vellum": {"emoji": "🎬"}}
5
5
  ---
6
6
 
7
- Ingest and track processing of media files (video, audio, images) through configurable multi-stage pipelines.
7
+ Ingest and track processing of media files (video, audio, images) through a configurable 3-phase pipeline.
8
8
 
9
9
  ## End-to-End Workflow
10
10
 
11
- The processing pipeline follows a sequential flow. Each stage depends on the output of the previous one:
11
+ The processing pipeline follows a sequential 3-phase flow:
12
12
 
13
13
  1. **Ingest** (`ingest_media`) — Register a media file, detect MIME type, extract duration, deduplicate by content hash.
14
- 2. **Extract Keyframes** (`extract_keyframes`) — Pull frames from video at regular intervals (default: every 3 seconds) using ffmpeg.
15
- 3. **Analyze Keyframes** (`analyze_keyframes`) — Send each keyframe to Claude VLM for structured scene analysis (subjects, actions, context).
16
- 4. **Generate Timeline** — Aggregate vision outputs into coherent timeline segments (called via `services/timeline-service.ts`).
17
- 5. **Detect Events** (`detect_events`) — Apply configurable detection rules against timeline segments to find events of interest.
18
- 6. **Query & Clip** — Use `query_media_events` to search events with natural language, and `generate_clip` to extract video clips around specific moments.
14
+ 2. **Preprocess** (`extract_keyframes`) — Detect dead time, segment the video into windows, extract downscaled keyframes, build a subject registry, and write a pipeline manifest.
15
+ 3. **Map** (`analyze_keyframes`) — Send each segment's frames to Gemini 2.5 Flash with assistant-provided extraction instructions and a JSON Schema for guaranteed structured output. Supports concurrency pooling, cost tracking, resumability, and automatic retries.
16
+ 4. **Reduce / Query** (`query_media`) Send all map output to Claude for intelligent analysis and Q&A. Supports arbitrary natural language queries about video content.
17
+ 5. **Clip** (`generate_clip`) — Extract video clips around specific moments.
19
18
 
20
- The processing pipeline service (`services/processing-pipeline.ts`) can orchestrate stages 2-5 automatically with retries, resumability, and cancellation support.
19
+ The processing pipeline service (`services/processing-pipeline.ts`) orchestrates phases 2-4 automatically with retries, resumability, and cancellation support.
21
20
 
22
21
  ## Tools
23
22
 
@@ -31,86 +30,85 @@ Query the processing status of a media asset. Returns the asset metadata along w
31
30
 
32
31
  ### extract_keyframes
33
32
 
34
- Extract keyframes from a video asset at regular intervals using ffmpeg. Frames are saved as JPEG images and registered in the database for subsequent vision analysis.
33
+ Preprocess a video asset: detect dead time via mpdecimate, segment the video into windows, extract downscaled keyframes at regular intervals, build a subject registry, and write a pipeline manifest.
35
34
 
36
- ### analyze_keyframes
35
+ Parameters:
36
+ - `asset_id` (required) — ID of the media asset.
37
+ - `interval_seconds` — Interval between keyframes (default: 3s).
38
+ - `segment_duration` — Duration of each segment window (default: 20s).
39
+ - `dead_time_threshold` — Sensitivity for dead-time detection (default: 0.02).
40
+ - `section_config` — Path to a JSON file with manual section boundaries.
41
+ - `skip_dead_time` — Whether to detect and skip dead time (default: true).
42
+ - `short_edge` — Short edge resolution for downscaled frames in pixels (default: 480).
37
43
 
38
- Analyze extracted keyframes using Claude VLM (vision language model). Produces structured JSON output with scene descriptions, subjects, actions, and context. Supports resumability by skipping already-analyzed frames.
44
+ ### analyze_keyframes
39
45
 
40
- ### detect_events
46
+ Map video segments through Gemini's structured output API. Reads frames from the preprocess manifest, sends each segment to Gemini with assistant-provided extraction instructions and a JSON Schema for guaranteed structured output. Supports concurrency pooling, cost tracking, resumability (skips segments with existing results), and automatic retries with exponential backoff.
41
47
 
42
- Detect events from timeline segments using configurable detection rules. Built-in rule types:
43
- - **segment_transition**Fires when a specified field changes between adjacent segments.
44
- - **short_segment**Fires when a segment's duration is below a threshold.
45
- - **attribute_match**Fires when segment attribute values match a regex pattern.
48
+ Parameters:
49
+ - `asset_id` (required) ID of the media asset.
50
+ - `system_prompt` (required) Extraction instructions for Gemini.
51
+ - `output_schema` (required) JSON Schema for structured output.
52
+ - `context` — Additional context to include in the prompt.
53
+ - `model` — Gemini model to use (default: `gemini-2.5-flash`).
54
+ - `concurrency` — Maximum concurrent API requests (default: 10).
55
+ - `max_retries` — Retry attempts per segment on failure (default: 3).
46
56
 
47
- If no rules are provided, sensible defaults are applied based on the event type.
57
+ ### query_media
48
58
 
49
- ### query_media_events
59
+ Query video analysis data using natural language. Sends map output (from analyze_keyframes) to Claude for intelligent analysis and Q&A. Supports arbitrary questions about video content.
50
60
 
51
- Query detected events using natural language. Parses the query into structured filters (event type, count, confidence threshold, time range) and returns matching events ranked by confidence.
61
+ Parameters:
62
+ - `asset_id` (required) — ID of the media asset.
63
+ - `query` (required) — Natural language query about the video data.
64
+ - `system_prompt` — Optional system prompt for Claude.
65
+ - `model` — LLM model to use (default: `claude-sonnet-4-6`).
52
66
 
53
67
  ### generate_clip
54
68
 
55
69
  Extract a video clip from a media asset using ffmpeg. Applies configurable pre/post-roll padding (clamped to file boundaries), outputs the clip as a temporary file.
56
70
 
57
- ### select_tracking_profile
58
-
59
- Configure which event capabilities are enabled for a media asset. Capabilities are organized into tiers:
60
- - **Ready**: Production-quality detection, included by default.
61
- - **Beta**: Functional but may have accuracy gaps. Results include a confidence disclaimer.
62
- - **Experimental**: Early-stage detection, expect noise. Results include a confidence disclaimer.
63
-
64
- Call without capabilities to see available options; call with a capabilities array to set the profile.
65
-
66
- ### submit_feedback
67
-
68
- Submit feedback on a detected event. Supports four types:
69
- - **correct** — Confirms the event is accurate.
70
- - **incorrect** — Marks a false positive.
71
- - **boundary_edit** — Adjusts start/end times.
72
- - **missed** — Reports an event the system failed to detect.
73
-
74
- ### recalibrate
75
-
76
- Re-rank existing events based on accumulated feedback. Adjusts confidence scores using correction patterns (false positive rates, missed events, boundary adjustments).
77
-
78
71
  ### media_diagnostics
79
72
 
80
73
  Get a diagnostic report for a media asset. Returns:
81
- - **Processing stats**: total keyframes, vision outputs, timeline segments, events detected.
82
- - **Per-stage status and timing**: which stages have run, how long each took, current progress.
74
+ - **Processing stats**: total keyframes extracted.
75
+ - **Per-stage status and timing**: which stages (preprocess, map, reduce) have run, how long each took, current progress.
83
76
  - **Failure reasons**: last error from any failed stage.
84
- - **Cost estimation**: based on keyframe count and estimated API cost per frame.
85
- - **Feedback summary**: precision/recall estimates per event type.
77
+ - **Cost estimation**: based on segment count and Gemini 2.5 Flash pricing, plus a note about Claude reduce costs.
86
78
 
87
79
  ## Services
88
80
 
89
81
  ### Processing Pipeline (services/processing-pipeline.ts)
90
82
 
91
83
  Orchestrates the full processing pipeline with reliability features:
92
- - **Sequential execution**: keyframe_extraction, vision_analysis, timeline_generation, event_detection.
84
+ - **Sequential execution**: preprocess, map, reduce.
93
85
  - **Retries**: Each stage is retried with exponential backoff and jitter (configurable max retries and base delay).
94
86
  - **Resumability**: Checks processing_stages to find the last completed stage and resumes from there. Safe to restart after crashes.
95
87
  - **Cancellation**: Cooperative cancellation via asset status. Set asset status to `cancelled` and the pipeline stops between stages.
96
88
  - **Idempotency**: Re-ingesting the same file hash is a no-op. Re-running a fully completed pipeline is also a no-op.
97
- - **Graceful degradation**: If a stage fails mid-batch (e.g., vision API errors), partial results are saved. The stage is marked as failed with the error details, and the pipeline stops without losing work.
89
+ - **Graceful degradation**: If a stage fails mid-batch (e.g., Gemini API errors), partial results are saved. The stage is marked as failed with the error details, and the pipeline stops without losing work.
90
+
91
+ ### Preprocess (services/preprocess.ts)
98
92
 
99
- ### Timeline Generation (services/timeline-service.ts)
93
+ Handles dead-time detection, video segmentation, keyframe extraction, and subject registry building. Writes a pipeline manifest consumed by the Map phase.
100
94
 
101
- Aggregates vision analysis outputs into coherent timeline segments. Groups adjacent keyframes that share similar scene characteristics into time ranges with merged attributes.
95
+ ### Gemini Map (services/gemini-map.ts)
102
96
 
103
- ### Event Detection (services/event-detection-service.ts)
97
+ Sends video segments to Gemini 2.5 Flash with structured output schemas. Handles concurrency pooling, cost tracking, resumability, and retries.
104
98
 
105
- Evaluates configurable detection rules against timeline segments. Produces scored event candidates with weighted confidence.
99
+ ### Reduce (services/reduce.ts)
106
100
 
107
- ### Feedback Aggregation (services/feedback-aggregation.ts)
101
+ Sends Map output to Claude as text for analysis. Two modes:
102
+ - **One-shot merge**: assembles all Map results and sends to Claude with a system prompt.
103
+ - **Interactive Q&A**: loads existing map output + user query, sends to Claude.
108
104
 
109
- Computes precision/recall estimates per event type from user feedback. Provides structured JSON export for offline analysis.
105
+ ### Concurrency Pool (services/concurrency-pool.ts)
110
106
 
111
- ### Capability Registry (services/capability-registry.ts)
107
+ Limits concurrent API calls during the Map phase to avoid rate limiting.
112
108
 
113
- Maintains an extensible, domain-agnostic catalog of available tracking capabilities with tier classification. Other domains can register their own capabilities by calling `registerCapability()`.
109
+ ### Cost Tracker (services/cost-tracker.ts)
110
+
111
+ Tracks estimated API costs during pipeline execution.
114
112
 
115
113
  ## Operator Runbook
116
114
 
@@ -131,62 +129,41 @@ Use `media_diagnostics` to get a full diagnostic report:
131
129
  2. Read the `lastError` field for that stage to understand what went wrong.
132
130
  3. Check `durationMs` to see if a stage timed out or ran unusually long.
133
131
  4. Common failure causes:
134
- - **keyframe_extraction**: ffmpeg not installed, corrupt video file, disk full.
135
- - **vision_analysis**: ANTHROPIC_API_KEY not set, API rate limits, network errors.
136
- - **timeline_generation**: No keyframes or vision outputs exist (earlier stage skipped or failed).
137
- - **event_detection**: No timeline segments exist.
132
+ - **preprocess**: ffmpeg not installed, corrupt video file, disk full.
133
+ - **map**: Gemini API key not configured, API rate limits, network errors.
134
+ - **reduce**: No LLM provider configured, no map output exists.
138
135
 
139
136
  After fixing the root cause, re-run the failed stage. The pipeline is resumable — it picks up from where it left off.
140
137
 
141
- ### Configuring Tracking Profiles
142
-
143
- 1. Call `select_tracking_profile` with just the `asset_id` to see available capabilities and their tiers.
144
- 2. Call again with a `capabilities` array to enable the desired event types.
145
- 3. Only enabled capabilities are returned by `query_media_events`.
146
- 4. The capability registry is extensible — new domains can register capabilities via `registerCapability()` in `services/capability-registry.ts`.
147
-
148
- ### Feedback and Recalibration
149
-
150
- 1. Review detected events using `query_media_events`.
151
- 2. For each event, submit feedback via `submit_feedback`:
152
- - Mark correct detections as `correct` to build precision data.
153
- - Mark false positives as `incorrect`.
154
- - Adjust boundaries with `boundary_edit`.
155
- - Report missed events with `missed` (creates a new event record).
156
- 3. Run `recalibrate` to re-rank events based on accumulated feedback.
157
- 4. Use `media_diagnostics` to check precision/recall estimates after feedback.
158
-
159
138
  ### Cost Expectations
160
139
 
161
- Vision analysis is the primary cost driver. Cost scales linearly with video duration and keyframe interval:
140
+ The Map phase (Gemini 2.5 Flash) is the primary cost driver. Cost scales with video duration, keyframe interval, and segment size:
162
141
 
163
- | Video Duration | Interval | Keyframes | Estimated Cost |
164
- |----------------|----------|-----------|----------------|
165
- | 30 min | 3s | ~600 | ~$1.80 |
166
- | 60 min | 3s | ~1,200 | ~$3.60 |
167
- | 90 min | 3s | ~1,800 | ~$5.40 |
168
- | 90 min | 5s | ~1,080 | ~$3.24 |
142
+ | Video Duration | Interval | Keyframes | Segments (~10 frames each) | Estimated Map Cost |
143
+ |----------------|----------|-----------|----------------------------|--------------------|
144
+ | 30 min | 3s | ~600 | ~60 | ~$0.06 |
145
+ | 60 min | 3s | ~1,200 | ~120 | ~$0.12 |
146
+ | 90 min | 3s | ~1,800 | ~180 | ~$0.18 |
147
+ | 90 min | 5s | ~1,080 | ~108 | ~$0.11 |
169
148
 
170
- Increasing the keyframe interval reduces cost proportionally but may miss short-duration events. The `media_diagnostics` tool provides per-asset cost estimates.
149
+ The Reduce phase (Claude) adds a small additional cost per query. The `media_diagnostics` tool provides per-asset cost estimates.
171
150
 
172
151
  ### Known Limitations
173
152
 
174
153
  - **ffmpeg required**: Keyframe extraction and clip generation require ffmpeg to be installed on the host.
175
154
  - **Single-file ingestion**: Each `ingest_media` call processes one file. Batch ingestion is not yet supported.
176
- - **Vision model latency**: Analyzing keyframes is the slowest stage. A 90-minute video at 3-second intervals requires ~1,800 API calls.
177
- - **Scene similarity heuristic**: Timeline segmentation uses Jaccard similarity on subjects — it works well for distinct scenes but may over-merge visually similar but semantically different moments.
178
- - **Detection rules are heuristic**: Event detection uses rule-based scoring, not ML. Accuracy depends on how well the rules match the target event patterns. Use feedback and recalibration to improve over time.
155
+ - **Gemini rate limits**: The Map phase uses concurrency pooling (default 10) to stay within API limits. Reduce concurrency if you hit 429 errors.
179
156
  - **No real-time processing**: The pipeline processes pre-recorded media files. Live/streaming video is not supported.
180
157
 
181
158
  ### Troubleshooting
182
159
 
183
160
  | Symptom | Likely Cause | Fix |
184
161
  |---------|-------------|-----|
185
- | "No keyframes found" | extract_keyframes not run or failed | Check keyframe_extraction stage status; re-run if needed |
186
- | "ANTHROPIC_API_KEY not set" | Missing env var | Set ANTHROPIC_API_KEY in the environment |
187
- | Vision analysis very slow | Large video, small interval | Increase interval_seconds or use smaller batch_size |
188
- | Low event confidence | Detection rules too broad | Tune rules: increase weights on high-signal rules, use tighter regex patterns |
189
- | Many false positives | Rules overfitting on noise | Submit `incorrect` feedback, then run `recalibrate` |
162
+ | "No keyframes found" | extract_keyframes not run or failed | Check preprocess stage status; re-run if needed |
163
+ | "No map output found" | analyze_keyframes not run | Run analyze_keyframes with appropriate system_prompt and output_schema |
164
+ | "No LLM provider available" | API key not configured | Add one in Settings |
165
+ | Map phase slow | Large video, small interval | Increase interval_seconds or reduce concurrency |
166
+ | Gemini returns errors | Rate limits or schema issues | Check max_retries setting; simplify output_schema if needed |
190
167
  | Pipeline stuck at "processing" | Stage crashed without updating status | Use `media_diagnostics` to find the stuck stage; re-run manually |
191
168
 
192
169
  ## Usage Notes
@@ -195,5 +172,5 @@ Increasing the keyframe interval reduces cost proportionally but may miss short-
195
172
  - Supported media types: video (mp4, mov, avi, mkv, webm, etc.), audio (mp3, wav, m4a, etc.), and images (png, jpg, gif, webp, etc.).
196
173
  - For video and audio files, duration is automatically extracted via ffprobe (requires ffmpeg to be installed).
197
174
  - Duplicate files are detected by content hash and return the existing asset record.
198
- - The `analyze_keyframes` tool is marked as medium risk because it makes external API calls to Claude VLM, which incur costs.
199
- - All schema tables, services, and tool interfaces are media-generic. Domain-specific interpretation belongs in VLM prompt templates.
175
+ - The `analyze_keyframes` tool is marked as medium risk because it makes external API calls to Gemini, which incur costs.
176
+ - All schema tables, services, and tool interfaces are media-generic. Domain-specific interpretation belongs in the system_prompt and output_schema parameters.