@okrlinkhub/agent-factory 3.0.2 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +235 -31
  2. package/dist/client/bridge.d.ts +1 -0
  3. package/dist/client/bridge.d.ts.map +1 -1
  4. package/dist/client/bridge.js.map +1 -1
  5. package/dist/client/index.d.ts +29 -3
  6. package/dist/client/index.d.ts.map +1 -1
  7. package/dist/client/index.js +59 -3
  8. package/dist/client/index.js.map +1 -1
  9. package/dist/component/_generated/api.d.ts +2 -0
  10. package/dist/component/_generated/api.d.ts.map +1 -1
  11. package/dist/component/_generated/api.js.map +1 -1
  12. package/dist/component/_generated/component.d.ts +140 -2
  13. package/dist/component/_generated/component.d.ts.map +1 -1
  14. package/dist/component/flyCleanup.d.ts +32 -0
  15. package/dist/component/flyCleanup.d.ts.map +1 -0
  16. package/dist/component/flyCleanup.js +272 -0
  17. package/dist/component/flyCleanup.js.map +1 -0
  18. package/dist/component/identity.d.ts +60 -2
  19. package/dist/component/identity.d.ts.map +1 -1
  20. package/dist/component/identity.js +372 -32
  21. package/dist/component/identity.js.map +1 -1
  22. package/dist/component/lib.d.ts +2 -1
  23. package/dist/component/lib.d.ts.map +1 -1
  24. package/dist/component/lib.js +2 -1
  25. package/dist/component/lib.js.map +1 -1
  26. package/dist/component/providers/fly.d.ts +23 -2
  27. package/dist/component/providers/fly.d.ts.map +1 -1
  28. package/dist/component/providers/fly.js +15 -3
  29. package/dist/component/providers/fly.js.map +1 -1
  30. package/dist/component/pushing.d.ts +4 -4
  31. package/dist/component/queue.d.ts +12 -7
  32. package/dist/component/queue.d.ts.map +1 -1
  33. package/dist/component/queue.js +9 -0
  34. package/dist/component/queue.js.map +1 -1
  35. package/dist/component/scheduler.d.ts +8 -8
  36. package/dist/component/scheduler.d.ts.map +1 -1
  37. package/dist/component/scheduler.js +22 -2
  38. package/dist/component/scheduler.js.map +1 -1
  39. package/dist/component/schema.d.ts +16 -4
  40. package/dist/component/schema.d.ts.map +1 -1
  41. package/dist/component/schema.js +16 -0
  42. package/dist/component/schema.js.map +1 -1
  43. package/package.json +1 -1
  44. package/src/client/bridge.ts +1 -0
  45. package/src/client/index.ts +68 -3
  46. package/src/component/_generated/api.ts +2 -0
  47. package/src/component/_generated/component.ts +188 -8
  48. package/src/component/flyCleanup.ts +386 -0
  49. package/src/component/identity.ts +425 -31
  50. package/src/component/lib.test.ts +197 -3
  51. package/src/component/lib.ts +3 -0
  52. package/src/component/providers/fly.ts +39 -5
  53. package/src/component/queue.ts +11 -0
  54. package/src/component/scheduler.ts +23 -2
  55. package/src/component/schema.ts +16 -0
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Convex Agent Factory
2
2
 
3
- [![npm version](https://badge.fury.io/js/@example%2Fagent-factory.svg)](https://badge.fury.io/js/@example%2Fagent-factory)
3
+ [npm version](https://badge.fury.io/js/@example%2Fagent-factory)
4
4
 
5
5
  A Convex component for hydration-based orchestration of OpenClaw agents on a generic worker pool (Fly Machines first, provider abstraction built-in).
6
6
 
@@ -20,29 +20,83 @@ app.use(agentFactory);
20
20
  export default app;
21
21
  ```
22
22
 
23
+ ## Upgrade to 3.1.0
24
+
25
+ Version `3.1.0` introduces a **Telegram bot identity routing change**.
26
+
27
+ What changed:
28
+
29
+ - Telegram integrations now derive a stable `botIdentity` from `Telegram getMe` using the imported bot token.
30
+ - `agentProfiles`, `pairingCodes`, and `identityBindings` now persist `botIdentity`.
31
+ - Telegram webhook ingress now requires `X-Telegram-Bot-Api-Secret-Token` and resolves agents by `botIdentity + telegramUserId/chatId`.
32
+ - Pairing is now bot-scoped, so a bot token must be imported and verified before creating a new pairing code.
33
+ - Hydrated bridge runtime config now carries the resolved profile `botIdentity` alongside the effective bridge secret ref.
34
+
35
+ Why this matters:
36
+
37
+ - This release fixes cross-bot collisions where different users chatting with different bots could still be resolved through only `telegramUserId` / `telegramChatId`.
38
+ - The supported model is `1 user -> 1 bot`, with the receiving bot acting as the first discriminator of the whole Telegram flow.
39
+
40
+ Important compatibility notes for existing consumer apps:
41
+
42
+ - If your app already has paired Telegram agents created before `3.1.0`, treat them as legacy rows until they are reconciled or re-paired.
43
+ - If your app wraps `configureTelegramWebhook`, `importTelegramTokenForAgent`, or `getUserAgentOnboardingState`, update local validators/types to include `botIdentity` and `secretTokenConfigured`.
44
+ - If your onboarding UI previously allowed pairing before token verification, it should now require successful token import first.
45
+ - If you have custom Telegram ingress code outside `registerRoutes(...)`, you must validate `X-Telegram-Bot-Api-Secret-Token` and pass `botIdentity` into pairing / resolution flows.
46
+
47
+ Recommended upgrade checklist:
48
+
49
+ 1. Upgrade the package to `3.1.0`.
50
+ 2. Regenerate Convex bindings in the consumer app.
51
+ 3. Re-import or reconcile Telegram bot tokens so each `agentProfile` stores the correct `botIdentity`.
52
+ 4. Reconfigure Telegram webhooks so the component can set the new `secret_token`.
53
+ 5. Run the soft-reset migration for legacy Telegram bindings without `botIdentity`.
54
+ 6. Create a fresh pairing for existing bots unless you have a verified reconciliation path in place.
55
+ 7. Smoke-test two different bots end-to-end and confirm each webhook resolves and bridges through the correct `agentKey`.
56
+
57
+ Recommended rollout for apps that already have this component installed:
58
+
59
+ 1. Deploy the updated consumer app code first so it understands the new return shapes and onboarding readiness rules.
60
+ 2. Upgrade to `@okrlinkhub/agent-factory@3.1.0`.
61
+ 3. Regenerate codegen / bindings.
62
+ 4. For every existing bot token, run the token reconciliation or re-import flow, then call webhook configuration again.
63
+ 5. Run `softResetTelegramBindingsMissingBotIdentity` during the rollout window so legacy bot-agnostic bindings are marked and optionally revoked in a controlled way.
64
+ 6. Re-pair existing bots that were active before this release.
65
+
66
+ New / updated component surfaces involved in this rollout:
67
+
68
+ - `importTelegramTokenForAgent`: now derives `botIdentity` from `Telegram getMe` and persists it before continuing.
69
+ - `configureTelegramWebhook`: now configures Telegram `secret_token` using the derived `botIdentity`.
70
+ - `reconcileTelegramBotIdentityForAgent`: verifies a stored token again and re-syncs `botIdentity` on the profile.
71
+ - `softResetTelegramBindingsMissingBotIdentity`: explicit migration helper for legacy Telegram bindings and pending pairings.
72
+
23
73
  ## Upgrade to 1.0.0
24
74
 
25
75
  Version `1.0.0` introduces a **worker lifecycle breaking change**.
26
76
 
27
77
  What changed:
78
+
28
79
  - `workers.status` is no longer binary.
29
80
  - New persisted statuses are now possible: `draining` and `stopping`.
30
81
  - The lifecycle is now `active -> draining -> stopping -> stopped`.
31
82
  - `active` now means **claimable**, not just "row exists and machine once existed".
32
83
 
33
84
  Current status values:
85
+
34
86
  - `active`: worker is healthy and can claim new jobs.
35
87
  - `draining`: worker must stop claiming and is waiting for final snapshot / shutdown progression.
36
88
  - `stopping`: final snapshot is ready or provider teardown is in progress / pending retry.
37
89
  - `stopped`: terminal state for that worker instance. Stopped workers are never reactivated.
38
90
 
39
91
  Important compatibility notes:
92
+
40
93
  - **No manual data migration is required** if your existing rows only contain `active` or `stopped`.
41
94
  - **Consumer code may require updates** if it assumes `worker.status` can only be `active` or `stopped`.
42
95
  - Any exhaustive `switch` / `if` logic, dashboards, alerts, or admin tools that parse worker status must handle `draining` and `stopping`.
43
96
  - `workerControlState` is stricter now: workers in non-claimable states, stale-heartbeat workers, and overdue workers return `shouldStop = true`.
44
97
 
45
98
  Recommended upgrade checklist:
99
+
46
100
  1. Upgrade the package to `1.0.0`.
47
101
  2. Regenerate Convex bindings in the consumer app.
48
102
  3. Update any consumer-side status handling for `workers.status`.
@@ -72,6 +126,7 @@ export default crons;
72
126
  Version `2.0.0` introduces a **conversation identity breaking change**.
73
127
 
74
128
  What changed:
129
+
75
130
  - `conversationId` is now required for worker snapshot upload and restore APIs.
76
131
  - `dataSnapshots.conversationId` is now mandatory in persisted storage.
77
132
  - Snapshot restore no longer falls back to the latest archive for `workspaceId + agentKey`.
@@ -79,6 +134,7 @@ What changed:
79
134
  - Telegram pairing no longer changes the conversation lineage used for chat history and snapshots.
80
135
 
81
136
  Important warnings:
137
+
82
138
  - This release is intentionally **not backward compatible** with legacy snapshots created without `conversationId`.
83
139
  - Existing non-prod agents, snapshots, bindings, and conversations created with the old model should be deleted before rollout.
84
140
  - If a worker runtime or consumer app still calls snapshot APIs without `conversationId`, the call will now fail at validation time.
@@ -86,6 +142,7 @@ Important warnings:
86
142
  - If you have custom dashboards, scripts, or admin tools that query snapshots only by `agentKey`, they must be updated to scope by `workspaceId + agentKey + conversationId`.
87
143
 
88
144
  Quick upgrade checklist:
145
+
89
146
  1. Delete legacy non-production agents, snapshots, conversations, and identity bindings created before this release.
90
147
  2. Upgrade the package to `2.0.0`.
91
148
  3. Regenerate Convex bindings in the consumer app.
@@ -95,6 +152,7 @@ Quick upgrade checklist:
95
152
  7. Smoke-test one manual user-agent flow, one Telegram-paired flow, and one worker snapshot restore flow before wider rollout.
96
153
 
97
154
  Recommended release notes to communicate to consumers:
155
+
98
156
  - treat this as a major upgrade, not a safe drop-in patch;
99
157
  - start from a clean non-prod environment;
100
158
  - roll out workers and consumer app together;
@@ -107,11 +165,13 @@ Recommended release notes to communicate to consumers:
107
165
  Starting with this release, the component also exposes an additive set of **user-facing aggregate APIs** for building pages like `MyAgent` and `MyAgentNew` without reconstructing state in the consumer app.
108
166
 
109
167
  What stays in the consumer app:
168
+
110
169
  - naming policy for agents and Telegram usernames
111
170
  - product-specific onboarding copy
112
171
  - cron presets or local `agentSettings`
113
172
 
114
173
  What is now exposed directly by the component:
174
+
115
175
  - user agent overview and active/history lookup
116
176
  - onboarding and pairing state
117
177
  - conversation view and queue items for a user agent
@@ -119,6 +179,7 @@ What is now exposed directly by the component:
119
179
  - user-centric snapshot listing and latest snapshot lookup
120
180
 
121
181
  Core APIs added for this pattern:
182
+
122
183
  - `listUserAgents`
123
184
  - `getUserAgent`
124
185
  - `getActiveUserAgent`
@@ -187,6 +248,7 @@ when you pass values inline from the UI, but automatic paths (enqueue + cron) re
187
248
  these stored secrets.
188
249
 
189
250
  If one is missing, reconcile fails with errors like:
251
+
190
252
  - `Missing Convex URL. Import an active 'convex.url' secret or pass convexUrl explicitly.`
191
253
  - `Missing Fly API token. Import an active 'fly.apiToken' secret or pass flyApiToken explicitly.`
192
254
 
@@ -205,6 +267,7 @@ npx convex run example:importSecret '{
205
267
  ```
206
268
 
207
269
  Important URL mapping:
270
+
208
271
  - Fly worker environment variable `CONVEX_URL` must use the `.convex.cloud` URL.
209
272
  - Component secret `convex.url` must use the `.convex.site` URL (used by component workflows and webhook-facing integration paths).
210
273
 
@@ -247,6 +310,7 @@ export const enqueueTelegramMessage = mutation({
247
310
  ```
248
311
 
249
312
  After enqueue, a **queue processor runtime** must process the queue by calling:
313
+
250
314
  - `components.agentFactory.lib.claim`
251
315
  - `components.agentFactory.lib.getHydrationBundle`
252
316
  - `components.agentFactory.lib.heartbeat`
@@ -257,6 +321,7 @@ explicitly alongside `workspaceId` and `agentKey`; the component no longer suppo
257
321
  fallbacks that select the latest archive for an agent without matching the conversation.
258
322
 
259
323
  Worker autoscaling reconcile now follows a hybrid model:
324
+
260
325
  - `enqueue` schedules an immediate async reconcile trigger (`runAfter(0, ...)`)
261
326
  - a periodic cron fallback is still recommended to recover from missed triggers
262
327
  - desired worker count is conversation-aware, so multiple queued messages on the same `conversationId` do not over-scale worker spawn
@@ -287,6 +352,89 @@ export default crons;
287
352
 
288
353
  This cron is a safety net. The primary path remains enqueue-triggered reconcile.
289
354
 
355
+ ### Component Fly cleanup for billing protection
356
+
357
+ The package now supports a dedicated Fly cleanup action in the component itself. The intent is to
358
+ protect the consumer's billing by giving every integration the same tested cleanup path instead of
359
+ reimplementing destructive Fly logic in each consumer app.
360
+
361
+ The public component action is exposed as:
362
+
363
+ - `components.agentFactory.lib.runFlyCleanup`
364
+
365
+ What the action does:
366
+
367
+ - resolves the target Fly app from `providerRuntimeConfig` unless the caller passes an explicit override
368
+ - reads `fly.apiToken` from the component secret store unless the caller passes an explicit override
369
+ - inventories machines and destroys them per machine ID
370
+ - verifies machine count again
371
+ - inventories volumes and destroys them per volume ID
372
+ - verifies volume count again and returns a report with counts, warnings, and errors
373
+
374
+ What the consumer still owns:
375
+
376
+ - choosing whether to run this policy at all
377
+ - choosing the schedule window
378
+ - optionally exposing an admin-only helper/wrapper
379
+
380
+ Thin wrapper through `exposeApi(...)`:
381
+
382
+ ```ts
383
+ const {
384
+ startWorkers,
385
+ runFlyCleanup,
386
+ } = exposeApi(components.agentFactory, {
387
+ providerConfig: EXAMPLE_PROVIDER_CONFIG,
388
+ auth: async (ctx, operation) => {
389
+ const userId = await getAuthUserId(ctx);
390
+ if (userId === null && operation.type === "write") {
391
+ throw new Error("Unauthorized");
392
+ }
393
+ return userId;
394
+ },
395
+ });
396
+ ```
397
+
398
+ This keeps the consumer surface small: the wrapper only forwards auth and the local
399
+ `providerConfig`, while the package owns the actual Fly inventory, destroy, and verification logic.
400
+
401
+ Minimal consumer cron wiring:
402
+
403
+ ```ts
404
+ import { cronJobs } from "convex/server";
405
+ import { api } from "./_generated/api";
406
+
407
+ const crons = cronJobs();
408
+
409
+ crons.interval(
410
+ "agent-factory reconcile workers fallback",
411
+ { minutes: 5 },
412
+ api.example.startWorkers,
413
+ {},
414
+ );
415
+
416
+ crons.cron(
417
+ "agent-factory nightly fly cleanup",
418
+ "0 3 * * *",
419
+ api.example.runFlyCleanup,
420
+ {},
421
+ );
422
+
423
+ export default crons;
424
+ ```
425
+
426
+ Recommended consumer helper:
427
+
428
+ - keep it thin
429
+ - call `components.agentFactory.lib.runFlyCleanup` directly, or expose `runFlyCleanup` through `exposeApi(...)`
430
+ - avoid duplicating inventory, destroy sequencing, or verification logic in the consumer
431
+
432
+ Operational prerequisites:
433
+
434
+ - `fly.apiToken` must be present as an active component secret
435
+ - the effective `providerRuntimeConfig` must point at the Fly app you want to protect
436
+ - the cleanup remains intentionally destructive, so it should only target a single explicit app per deployment
437
+
290
438
  ### Agent pushing schedule (hourly dispatcher)
291
439
 
292
440
  For agent pushing, the recommended scheduler is an hourly cron that dispatches due jobs:
@@ -308,6 +456,7 @@ export default crons;
308
456
  ```
309
457
 
310
458
  Important product constraint:
459
+
311
460
  - job configuration supports only fixed schedule slots (`HH:mm`, plus weekday/day-of-month)
312
461
  - minute-based recurrence ("every N minutes") is intentionally not supported
313
462
 
@@ -318,16 +467,19 @@ Admin broadcast is also supported through `sendBroadcastToAllActiveAgents`, whic
318
467
  The model/provider is controlled by Fly worker environment variables (for example `OPENCLAW_AGENT_MODEL`, `MOONSHOT_API_KEY`, `OPENAI_API_KEY`) and applied at runtime by the worker image bootstrap.
319
468
 
320
469
  Why:
470
+
321
471
  - keeps model routing as infrastructure/runtime concern
322
472
  - avoids per-agent schema coupling to a specific LLM field
323
473
  - lets you switch model/provider with a Fly deploy or env change only
324
474
 
325
475
  Practical notes:
476
+
326
477
  - set model/provider env on the Fly app (`fly secrets set` / `[env]` in `fly.toml`)
327
478
  - keep `agentProfiles` focused on identity, bridge configuration, and secrets references
328
479
  - worker image tag stays centralized in `src/component/config.ts` (`DEFAULT_WORKER_IMAGE`)
329
480
 
330
481
  If you use `exposeApi(...)`, the worker contract is available directly on the consumer API surface:
482
+
331
483
  - `workerClaim`
332
484
  - `workerHydrationBundle`
333
485
  - `workerHeartbeat`
@@ -339,6 +491,7 @@ If you use `exposeApi(...)`, the worker contract is available directly on the co
339
491
  `agent-factory` does **not** execute `agent-bridge` tools.
340
492
 
341
493
  Its role stops at:
494
+
342
495
  - storing bridge settings on the agent profile
343
496
  - resolving bridge secrets from the component secret store
344
497
  - exposing `bridgeRuntimeConfig` in hydration
@@ -346,7 +499,7 @@ Its role stops at:
346
499
 
347
500
  Tool execution belongs to the OpenClaw worker runtime / worker image, not to `agent-factory`.
348
501
 
349
- 1) Configure an agent profile with bridge settings:
502
+ 1. Configure an agent profile with bridge settings:
350
503
 
351
504
  ```ts
352
505
  await ctx.runMutation(components.agentFactory.lib.configureAgent, {
@@ -363,7 +516,7 @@ await ctx.runMutation(components.agentFactory.lib.configureAgent, {
363
516
  });
364
517
  ```
365
518
 
366
- 2) Import bridge service key in component secrets:
519
+ 1. Import bridge service key in component secrets:
367
520
 
368
521
  ```sh
369
522
  npx convex run example:importSecret '{
@@ -373,6 +526,7 @@ npx convex run example:importSecret '{
373
526
  ```
374
527
 
375
528
  Naming convention supported by hydration resolver:
529
+
376
530
  - per-agent service key: `agent-bridge.serviceKey.<agentKey>` (recommended)
377
531
  - global service key fallback: `agent-bridge.serviceKey`
378
532
  - optional profile override: `bridgeConfig.serviceKeySecretRef`
@@ -399,6 +553,7 @@ Do **not** treat `agent-factory` as the place where `bridge.<functionKey>` tool
399
553
  If your OpenClaw agents use `agent-bridge`, that execution flow must live in the worker runtime itself.
400
554
 
401
555
  Fallback env (worker-side only, used when hydration misses values):
556
+
402
557
  - `OPENCLAW_AGENT_BRIDGE_BASE_URL` or `AGENT_BRIDGE_BASE_URL`
403
558
  - `OPENCLAW_SERVICE_ID` or `AGENT_BRIDGE_SERVICE_ID`
404
559
  - `OPENCLAW_SERVICE_KEY` or `AGENT_BRIDGE_SERVICE_KEY`
@@ -408,12 +563,14 @@ Fallback env (worker-side only, used when hydration misses values):
408
563
 
409
564
  When `agent-factory` is used together with `agent-bridge`, spawned workers may need these environment variables available in their runtime:
410
565
 
411
- | Env var | Component secret ref | Purpose |
412
- |---------|----------------------|---------|
413
- | `OPENCLAW_SERVICE_ID` | `agent-bridge.serviceId` | Service identity for bridge auth |
414
- | `OPENCLAW_SERVICE_KEY` | `agent-bridge.serviceKey` | Service key for bridge auth |
566
+
567
+ | Env var | Component secret ref | Purpose |
568
+ | -------------------------------- | ---------------------------------- | -------------------------------------------------- |
569
+ | `OPENCLAW_SERVICE_ID` | `agent-bridge.serviceId` | Service identity for bridge auth |
570
+ | `OPENCLAW_SERVICE_KEY` | `agent-bridge.serviceKey` | Service key for bridge auth |
415
571
  | `OPENCLAW_LINKING_SHARED_SECRET` | `agent-bridge.linkingSharedSecret` | Shared secret for `execute-on-behalf` user linking |
416
572
 
573
+
417
574
  The scheduler forwards these from the component secret store into each machine's env at spawn time. These values prepare the worker runtime for bridge usage; they do not implement bridge tool execution inside `agent-factory`.
418
575
 
419
576
  Import all three into the component secret store:
@@ -445,6 +602,7 @@ export default http;
445
602
  ```
446
603
 
447
604
  This exposes:
605
+
448
606
  - `POST /agent-factory/telegram/webhook` -> enqueue-only (no business processing)
449
607
 
450
608
  Important: the webhook/router only receives ingress and enqueues.
@@ -472,6 +630,7 @@ await configureTelegramWebhook({
472
630
  ```
473
631
 
474
632
  This API:
633
+
475
634
  - loads bot token from component secrets (active secret for `secretRef`)
476
635
  - calls Telegram `setWebhook`
477
636
  - verifies status with `getWebhookInfo`
@@ -483,31 +642,35 @@ Typical one-time pairing flow:
483
642
 
484
643
  1. Configure webhook and verify `isReady === true` via `configureTelegramWebhook`.
485
644
  2. Your app authenticates the user and creates a one-time pairing code via
486
- `createPairingCode`.
645
+ `createPairingCode`.
487
646
  3. User opens Telegram deep-link (`/start <pairingCode>`).
488
647
  4. `registerRoutes(...)` webhook consumes the pairing code and performs
489
- `bindUserAgent` automatically with `source: "telegram_pairing"` and
648
+ `bindUserAgent` automatically with `source: "telegram_pairing"` and
490
649
  Telegram ids from the update.
491
650
  5. Webhook ingress then resolves the binding internally and enqueues with the mapped
492
- `agentKey`.
651
+ `agentKey`.
493
652
 
494
653
  Available pairing APIs (via `exposeApi(...)`):
654
+
495
655
  - `createPairingCode`
496
656
  - `getPairingCodeStatus`
497
657
  - `configureTelegramWebhook`
498
658
 
499
659
  Telegram token storage (multi-tenant):
660
+
500
661
  - store tenant token in component secrets with an agent-scoped ref (for example `telegram.botToken.<agentKey>`)
501
662
  - include that ref in `agentProfiles.secretsRef`
502
663
  - worker gets resolved plaintext from hydration bundle (`telegramBotToken`) at runtime
503
664
  - do not use a single global `TELEGRAM_BOT_TOKEN` on Fly app
504
665
 
505
666
  `registerRoutes(...)` supports this behavior with:
667
+
506
668
  - `resolveAgentKeyFromBinding` (default `true`)
507
669
  - `fallbackAgentKey` (default `"default"`)
508
670
  - `requireBindingForTelegram` (default `false`, when `true` rejects unbound users)
509
671
 
510
672
  Special handling for `/start`:
673
+
511
674
  - `/start <pairingCode>` attempts pairing consumption and does not enqueue the command.
512
675
  - invalid `/start` payload returns `200` with pairing error details to avoid Telegram retries.
513
676
 
@@ -530,9 +693,12 @@ flowchart LR
530
693
  flyWorkers --> claimLoop
531
694
  ```
532
695
 
696
+
697
+
533
698
  ## Data model
534
699
 
535
700
  Core tables:
701
+
536
702
  - `agentProfiles`
537
703
  - `conversations`
538
704
  - `messageQueue`
@@ -540,6 +706,7 @@ Core tables:
540
706
  - `secrets`
541
707
 
542
708
  Hydration/runtime tables:
709
+
543
710
  - `conversationHydrationCache`
544
711
  - `dataSnapshots`
545
712
 
@@ -558,12 +725,14 @@ Hydration/runtime tables:
558
725
 
559
726
  ## OpenClaw workspace persistence
560
727
 
561
- | OpenClaw source | Persistence layer |
562
- |---|---|
563
- | `AGENTS.md`, `SOUL.md`, `USER.md`, `IDENTITY.md`, `HEARTBEAT.md`, `TOOLS.md` | worker filesystem backup (`/data/workspace`) |
564
- | `memory/YYYY-MM-DD.md`, `MEMORY.md` | worker filesystem backup (`/data/workspace`) |
565
- | Skills and related assets | bundled directly in worker image (`openclaw-okr-image`) |
566
- | Conversation-specific deltas | `conversationHydrationCache` |
728
+
729
+ | OpenClaw source | Persistence layer |
730
+ | ---------------------------------------------------------------------------- | ------------------------------------------------------- |
731
+ | `AGENTS.md`, `SOUL.md`, `USER.md`, `IDENTITY.md`, `HEARTBEAT.md`, `TOOLS.md` | worker filesystem backup (`/data/workspace`) |
732
+ | `memory/YYYY-MM-DD.md`, `MEMORY.md` | worker filesystem backup (`/data/workspace`) |
733
+ | Skills and related assets | bundled directly in worker image (`openclaw-okr-image`) |
734
+ | Conversation-specific deltas | `conversationHydrationCache` |
735
+
567
736
 
568
737
  ## Failure model
569
738
 
@@ -576,6 +745,7 @@ Hydration/runtime tables:
576
745
  ## Config-first
577
746
 
578
747
  `src/component/config.ts` defines type-safe policies:
748
+
579
749
  - queue policy
580
750
  - retry policy
581
751
  - lease policy
@@ -585,6 +755,7 @@ Hydration/runtime tables:
585
755
  ## Fly.io provider notes
586
756
 
587
757
  The current provider implementation uses Fly Machines API endpoints for:
758
+
588
759
  - create machine
589
760
  - list machines
590
761
  - cordon machine
@@ -596,11 +767,13 @@ Do **not** share the same Fly app across multiple Convex backends/components tha
596
767
  their own queue polling/reconcile loop.
597
768
 
598
769
  Why this is required:
770
+
599
771
  - workers in a Fly app share the same control plane (create/list/stop),
600
772
  - each backend computes desired capacity from its own queue state only,
601
773
  - mixed backends in one app can stop each other's machines or produce unpredictable polling behavior.
602
774
 
603
775
  Recommended pattern:
776
+
604
777
  - one Convex backend -> one dedicated Fly app (for example `agent-factory-workers-prod`)
605
778
  - another Convex backend -> another dedicated Fly app (for example `agent-factory-workers-staging`)
606
779
  - keep `providerConfig.appName` and worker image registry aligned per backend/environment.
@@ -608,27 +781,32 @@ Recommended pattern:
608
781
  ### Worker image setup (required first step for custom skills)
609
782
 
610
783
  Any new skill you want inside OpenClaw agents must be added to the worker image source repo:
611
- - https://github.com/okrlinkhub/openclaw-okr-image
784
+
785
+ - [https://github.com/okrlinkhub/openclaw-okr-image](https://github.com/okrlinkhub/openclaw-okr-image)
612
786
 
613
787
  Fork this repository to maintain your own image with your custom skills/assets.
614
788
 
615
789
  For `globalSkills` managed by this component, the recommended runtime pattern is different:
790
+
616
791
  - store the source of truth in component tables `globalSkills`, `globalSkillVersions`, `globalSkillReleases`
617
792
  - treat each skill as a mini filesystem bundle (`files[]`), not as a single `sourceJs` blob
618
793
  - expose them through `getWorkerGlobalSkillsManifest`
619
794
  - let the worker image materialize them into `OPENCLAW_SKILLS_DIR` during prestart, before the OpenClaw gateway boots
620
795
 
621
796
  The manifest now carries an explicit on-disk layout contract for OpenClaw workspace skills:
797
+
622
798
  - `layoutVersion = openclaw-workspace-skill-v1`
623
799
  - `skillDirName`
624
800
  - `files[]` with `path`, `content`, `sha256`
625
801
 
626
802
  Breaking change in `3.0.0`:
803
+
627
804
  - `sourceJs` has been removed from the global skill model
628
805
  - existing legacy global skill rows must be deleted before moving to `3.0.0`
629
806
  - existing legacy skills must be republished as full bundles
630
807
 
631
808
  Bundle contract for `3.0.0`:
809
+
632
810
  - required user files:
633
811
  - `SKILL.md`
634
812
  - `scripts/index.mjs` or `scripts/index.cjs` (must match `moduleFormat`)
@@ -642,6 +820,7 @@ Extract a `Bundle files JSON` payload from an existing OpenClaw skill directory:
642
820
  Use this when you already have a correctly materialized skill inside an OpenClaw workspace and want to republish it as a `3.0.0` global skill bundle.
643
821
 
644
822
  Important:
823
+
645
824
  - run the command against the skill directory itself (for example `/path/to/workspace/skills/agent-bridge`)
646
825
  - the command automatically excludes `.af-global-skill.json`
647
826
  - hidden files other than `.af-global-skill.json` are excluded by default
@@ -720,11 +899,13 @@ EOF
720
899
  ```
721
900
 
722
901
  The resulting JSON should contain files like:
902
+
723
903
  - `SKILL.md`
724
904
  - `scripts/index.mjs`
725
905
  - any extra files such as `scripts/agent-bridge-cli.mjs`
726
906
 
727
907
  Recommended worker bootstrap order:
908
+
728
909
  1. restore snapshot into `/data`
729
910
  2. fetch `workerGlobalSkillsManifest`
730
911
  3. verify checksums and materialize skills atomically into `OPENCLAW_SKILLS_DIR`
@@ -733,11 +914,12 @@ Recommended worker bootstrap order:
733
914
  This avoids the historical race where the gateway could start before restored or DB-backed skills were present on disk.
734
915
 
735
916
  First required flow:
736
- 1) Take the image repo (fork/clone your own `openclaw-okr-image`).
737
- 2) Build and deploy it on your own Fly app.
738
- - Recommended build mode: remote Fly builder, `depot` disabled, `--remote-only`.
739
- 3) Use the published image as reference in `src/component/config.ts` (`DEFAULT_WORKER_IMAGE` is the source of truth).
740
- 4) Repeat the same process for every runtime/skills update.
917
+
918
+ 1. Take the image repo (fork/clone your own `openclaw-okr-image`).
919
+ 2. Build and deploy it on your own Fly app.
920
+ - Recommended build mode: remote Fly builder, `depot` disabled, `--remote-only`.
921
+ 3. Use the published image as reference in `src/component/config.ts` (`DEFAULT_WORKER_IMAGE` is the source of truth).
922
+ 4. Repeat the same process for every runtime/skills update.
741
923
 
742
924
  **Enterprise security model**: The worker image enforces a security policy where only skills explicitly included by the image maintainer are installed by default. Any other skills that may be present in the workspace are automatically removed on each worker startup. This ensures that only approved, vetted skills from the image source can execute within your OpenClaw agents.
743
925
 
@@ -745,20 +927,21 @@ First required flow:
745
927
 
746
928
  When you update the worker runtime (for example in `openclaw-okr-image/worker.mjs`), use this flow to publish and roll out safely.
747
929
 
748
- 1) Deploy with remote Fly builder (explicitly disabling Depot):
930
+ 1. Deploy with remote Fly builder (explicitly disabling Depot):
749
931
 
750
932
  ```sh
751
933
  cd /path/to/openclaw-okr-image
752
934
  fly deploy --remote-only --depot=false --yes
753
935
  ```
754
936
 
755
- 2) If deployment fails with `CONVEX_URL not set`, set the secret and retry:
937
+ 1. If deployment fails with `CONVEX_URL not set`, set the secret and retry:
756
938
 
757
939
  ```sh
758
940
  fly secrets set CONVEX_URL="https://<your-convex-deployment>.convex.cloud" -a <your-fly-worker-app>
759
941
  ```
760
942
 
761
- 3) Capture the new image tag from deploy output (for example
943
+ 1. Capture the new image tag from deploy output (for example
944
+
762
945
  `registry.fly.io/<your-fly-worker-app>:deployment-XXXXXXXXXXXX`), then update
763
946
  `src/component/config.ts` in this repo:
764
947
 
@@ -767,30 +950,34 @@ export const DEFAULT_WORKER_IMAGE =
767
950
  "registry.fly.io/<your-fly-worker-app>:deployment-XXXXXXXXXXXX";
768
951
  ```
769
952
 
770
- 4) Verify rollout:
953
+ 1. Verify rollout:
771
954
 
772
955
  ```sh
773
956
  fly status -a <your-fly-worker-app>
774
957
  fly logs -a <your-fly-worker-app> --no-tail
775
958
  ```
776
959
 
777
- 5) (Recommended) Commit the `DEFAULT_WORKER_IMAGE` update so scheduler-driven
960
+ 1. (Recommended) Commit the `DEFAULT_WORKER_IMAGE` update so scheduler-driven
961
+
778
962
  spawns use the exact image that was just deployed.
779
963
 
780
964
  Recommended runtime split:
965
+
781
966
  - Consumer app (Next.js/Vercel): webhook ingress + enqueue only
782
967
  - Fly worker app: claim/heartbeat/complete/fail loop
783
968
 
784
969
  Anti-pattern to avoid:
970
+
785
971
  - Telegram webhook -> Fly worker HTTP endpoint
786
972
  - Reason: workers are batch processors, may be scaled to zero, and should not be used as public ingress.
787
973
  - Global Fly env `TELEGRAM_BOT_TOKEN` for all tenants
788
974
  - Reason: breaks multi-tenant isolation and forces shared bot credentials.
789
975
 
790
976
  References:
791
- - https://docs.machines.dev/
792
- - https://fly.io/docs/machines/api/machines-resource/
793
- - https://docs.convex.dev/components/authoring
977
+
978
+ - [https://docs.machines.dev/](https://docs.machines.dev/)
979
+ - [https://fly.io/docs/machines/api/machines-resource/](https://fly.io/docs/machines/api/machines-resource/)
980
+ - [https://docs.convex.dev/components/authoring](https://docs.convex.dev/components/authoring)
794
981
 
795
982
  ## Development
796
983
 
@@ -799,4 +986,21 @@ npm i
799
986
  npm run dev
800
987
  ```
801
988
 
802
- Upgrade note for older releases: version `0.2.14` makes `agentProfiles.providerUserId`, `agentProfiles.soulMd`, `agentProfiles.clientMd`, and `agentProfiles.skills` optional only to let you clean them safely. Before upgrading to version `0.2.15`, where those fields are expected to be removed from the schema, install `0.2.14`, run `components.agentFactory.lib.clearDeprecatedAgentProfileFields` from Convex Dashboard, and make sure a second run returns `updated = 0`. This avoids schema validation issues caused by leftover stored values during the upgrade to `0.2.15`.
989
+ ### Release validation note
990
+
991
+ For npm releases cut from `develop`, known failures in the `example` Vitest suite are currently treated as non-blocking release noise.
992
+
993
+ What we still verify before publishing:
994
+
995
+ - `npm run lint`
996
+ - `npm run typecheck`
997
+ - `npm pack --dry-run`
998
+ - focused package tests when a change touches runtime behavior outside the example app
999
+
1000
+ What we intentionally do not require for publish:
1001
+
1002
+ - a fully green `npm test` run when the remaining failures are limited to the `example` app test surface and do not affect the published package itself
1003
+
1004
+ This choice was applied for the `3.0.2` npm release after confirming the package checks above passed and the remaining instability was in the example-only test flow.
1005
+
1006
+ Upgrade note for older releases: version `0.2.14` makes `agentProfiles.providerUserId`, `agentProfiles.soulMd`, `agentProfiles.clientMd`, and `agentProfiles.skills` optional only to let you clean them safely. Before upgrading to version `0.2.15`, where those fields are expected to be removed from the schema, install `0.2.14`, run `components.agentFactory.lib.clearDeprecatedAgentProfileFields` from Convex Dashboard, and make sure a second run returns `updated = 0`. This avoids schema validation issues caused by leftover stored values during the upgrade to `0.2.15`.
@@ -5,6 +5,7 @@ export type HydratedBridgeRuntimeConfig = {
5
5
  appKey: string | null;
6
6
  serviceKey: string | null;
7
7
  serviceKeySecretRef: string | null;
8
+ botIdentity?: string | null;
8
9
  };
9
10
  export type ResolvedBridgeRuntimeConfig = {
10
11
  baseUrl: string;
@@ -1 +1 @@
1
- {"version":3,"file":"bridge.d.ts","sourceRoot":"","sources":["../../src/client/bridge.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,2BAA2B,GAAG;IACxC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,iBAAiB,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;CACpC,CAAC;AAEF,MAAM,MAAM,2BAA2B,GAAG;IACxC,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG;IAClC,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,CAAC;AAEF,KAAK,yBAAyB,GAAG;IAC/B,MAAM,EAAE,2BAA2B,CAAC;IACpC,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC9B,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC,CAAC;IAClD,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;IACzB,KAAK,CAAC,EAAE;QACN,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;CACH,CAAC;AAEF,KAAK,8BAA8B,GAAG;IACpC,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAClC,cAAc,EAAE,2BAA2B,GAAG,IAAI,CAAC;IACnD,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;IACzB,KAAK,CAAC,EAAE;QACN,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC,CAAC;CAC1C,CAAC;AAEF,KAAK,gCAAgC,GACjC;IACE,OAAO,EAAE,KAAK,CAAC;CAChB,GACD;IACE,OAAO,EAAE,IAAI,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,qBAAqB,CAAC;CACjC,CAAC;AASN,wBAAgB,0BAA0B,CACxC,cAAc,EAAE,2BAA2B,GAAG,IAAI,GAAG,SAAS,EAC9D,GAAG,GAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,SAAS,CAAqD,GAEzF;IACE,EAAE,EAAE,IAAI,CAAC;IACT,MAAM,EAAE,2BAA2B,CAAC;CACrC,GACD;IACE,EAAE,EAAE,KAAK,CAAC;IACV,KAAK,EAAE,MAAM,CAAC;CACf,CAsCJ;AAED,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAE1D;AAED,wBAAgB,6BAA6B,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAK7E;AAED,wBAAsB,qBAAqB,CACzC,KAAK,EAAE,yBAAyB,GAC/B,OAAO,CAAC,qBAAqB,CAAC,CA2ChC;AAED,wBAAsB,0BAA0B,CAC9C,KAAK,EAAE,8BAA8B,GACpC,OAAO,CAAC,gCAAgC,CAAC,CAiC3C"}
1
+ {"version":3,"file":"bridge.d.ts","sourceRoot":"","sources":["../../src/client/bridge.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,2BAA2B,GAAG;IACxC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,iBAAiB,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B,CAAC;AAEF,MAAM,MAAM,2BAA2B,GAAG;IACxC,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG;IAClC,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,CAAC;AAEF,KAAK,yBAAyB,GAAG;IAC/B,MAAM,EAAE,2BAA2B,CAAC;IACpC,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC9B,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC,CAAC;IAClD,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;IACzB,KAAK,CAAC,EAAE;QACN,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;CACH,CAAC;AAEF,KAAK,8BAA8B,GAAG;IACpC,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAClC,cAAc,EAAE,2BAA2B,GAAG,IAAI,CAAC;IACnD,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;IACzB,KAAK,CAAC,EAAE;QACN,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC,CAAC;CAC1C,CAAC;AAEF,KAAK,gCAAgC,GACjC;IACE,OAAO,EAAE,KAAK,CAAC;CAChB,GACD;IACE,OAAO,EAAE,IAAI,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,qBAAqB,CAAC;CACjC,CAAC;AASN,wBAAgB,0BAA0B,CACxC,cAAc,EAAE,2BAA2B,GAAG,IAAI,GAAG,SAAS,EAC9D,GAAG,GAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,SAAS,CAAqD,GAEzF;IACE,EAAE,EAAE,IAAI,CAAC;IACT,MAAM,EAAE,2BAA2B,CAAC;CACrC,GACD;IACE,EAAE,EAAE,KAAK,CAAC;IACV,KAAK,EAAE,MAAM,CAAC;CACf,CAsCJ;AAED,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAE1D;AAED,wBAAgB,6BAA6B,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAK7E;AAED,wBAAsB,qBAAqB,CACzC,KAAK,EAAE,yBAAyB,GAC/B,OAAO,CAAC,qBAAqB,CAAC,CA2ChC;AAED,wBAAsB,0BAA0B,CAC9C,KAAK,EAAE,8BAA8B,GACpC,OAAO,CAAC,gCAAgC,CAAC,CAiC3C"}