@elizaos/agent 2.0.0-alpha.144 → 2.0.0-alpha.151
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/app-lifeops/src/actions/inbox-digest.d.ts +2 -0
- package/apps/app-lifeops/src/actions/inbox-digest.d.ts.map +1 -0
- package/apps/app-lifeops/src/actions/inbox-digest.js +1 -0
- package/apps/app-lifeops/src/actions/inbox-respond.d.ts +2 -0
- package/apps/app-lifeops/src/actions/inbox-respond.d.ts.map +1 -0
- package/apps/app-lifeops/src/actions/inbox-respond.js +1 -0
- package/apps/app-lifeops/src/actions/inbox-triage.d.ts +2 -0
- package/apps/app-lifeops/src/actions/inbox-triage.d.ts.map +1 -0
- package/apps/app-lifeops/src/actions/inbox-triage.js +1 -0
- package/apps/app-lifeops/src/actions/inbox.d.ts +3 -0
- package/apps/app-lifeops/src/actions/inbox.d.ts.map +1 -0
- package/apps/app-lifeops/src/actions/inbox.js +856 -0
- package/apps/app-lifeops/src/actions/update-owner-profile.d.ts +3 -0
- package/apps/app-lifeops/src/actions/update-owner-profile.d.ts.map +1 -0
- package/apps/app-lifeops/src/actions/update-owner-profile.js +131 -0
- package/apps/app-lifeops/src/inbox/channel-deep-links.d.ts.map +1 -0
- package/apps/app-lifeops/src/inbox/config.d.ts.map +1 -0
- package/{packages/agent → apps/app-lifeops}/src/inbox/config.js +1 -1
- package/apps/app-lifeops/src/inbox/message-fetcher.d.ts.map +1 -0
- package/apps/app-lifeops/src/inbox/reflection.d.ts.map +1 -0
- package/apps/app-lifeops/src/inbox/repository.d.ts.map +1 -0
- package/apps/app-lifeops/src/inbox/triage-classifier.d.ts.map +1 -0
- package/apps/app-lifeops/src/inbox/types.d.ts.map +1 -0
- package/apps/app-lifeops/src/lifeops/index.d.ts +25 -0
- package/apps/app-lifeops/src/lifeops/index.d.ts.map +1 -0
- package/apps/app-lifeops/src/lifeops/index.js +24 -0
- package/apps/app-lifeops/src/lifeops/screen-context.d.ts +52 -0
- package/apps/app-lifeops/src/lifeops/screen-context.d.ts.map +1 -0
- package/apps/app-lifeops/src/lifeops/screen-context.js +332 -0
- package/apps/app-lifeops/src/plugin.d.ts +3 -0
- package/apps/app-lifeops/src/plugin.d.ts.map +1 -1
- package/apps/app-lifeops/src/plugin.js +16 -3
- package/apps/app-lifeops/src/providers/inbox-triage.d.ts +3 -0
- package/apps/app-lifeops/src/providers/inbox-triage.d.ts.map +1 -0
- package/apps/app-lifeops/src/providers/inbox-triage.js +89 -0
- package/package.json +6 -4
- package/packages/agent/src/actions/calendar.d.ts +1 -18
- package/packages/agent/src/actions/calendar.d.ts.map +1 -1
- package/packages/agent/src/actions/calendar.js +1 -3143
- package/packages/agent/src/actions/check-balance.d.ts +17 -0
- package/packages/agent/src/actions/check-balance.d.ts.map +1 -0
- package/packages/agent/src/actions/check-balance.js +167 -0
- package/packages/agent/src/actions/connector-resolver.d.ts +75 -0
- package/packages/agent/src/actions/connector-resolver.d.ts.map +1 -0
- package/packages/agent/src/actions/connector-resolver.js +245 -0
- package/packages/agent/src/actions/context-signal-lexicon.d.ts +1 -1
- package/packages/agent/src/actions/context-signal-lexicon.d.ts.map +1 -1
- package/packages/agent/src/actions/context-signal-lexicon.js +6 -0
- package/packages/agent/src/actions/eject-plugin.d.ts +3 -0
- package/packages/agent/src/actions/eject-plugin.d.ts.map +1 -0
- package/packages/agent/src/actions/eject-plugin.js +48 -0
- package/packages/agent/src/actions/execute-trade.d.ts +17 -0
- package/packages/agent/src/actions/execute-trade.d.ts.map +1 -0
- package/packages/agent/src/actions/execute-trade.js +299 -0
- package/packages/agent/src/actions/get-self-status.d.ts +13 -0
- package/packages/agent/src/actions/get-self-status.d.ts.map +1 -0
- package/packages/agent/src/actions/get-self-status.js +66 -0
- package/packages/agent/src/actions/gmail.d.ts +1 -32
- package/packages/agent/src/actions/gmail.d.ts.map +1 -1
- package/packages/agent/src/actions/gmail.js +1 -1734
- package/packages/agent/src/actions/inbox-digest.d.ts +1 -1
- package/packages/agent/src/actions/inbox-digest.d.ts.map +1 -1
- package/packages/agent/src/actions/inbox-digest.js +1 -1
- package/packages/agent/src/actions/inbox-respond.d.ts +1 -1
- package/packages/agent/src/actions/inbox-respond.d.ts.map +1 -1
- package/packages/agent/src/actions/inbox-respond.js +1 -1
- package/packages/agent/src/actions/inbox-triage.d.ts +1 -1
- package/packages/agent/src/actions/inbox-triage.d.ts.map +1 -1
- package/packages/agent/src/actions/inbox-triage.js +1 -1
- package/packages/agent/src/actions/inbox.d.ts +1 -2
- package/packages/agent/src/actions/inbox.d.ts.map +1 -1
- package/packages/agent/src/actions/inbox.js +1 -856
- package/packages/agent/src/actions/index.d.ts +13 -0
- package/packages/agent/src/actions/index.d.ts.map +1 -1
- package/packages/agent/src/actions/index.js +13 -0
- package/packages/agent/src/actions/install-plugin.d.ts +3 -0
- package/packages/agent/src/actions/install-plugin.d.ts.map +1 -0
- package/packages/agent/src/actions/install-plugin.js +65 -0
- package/packages/agent/src/actions/life-goal-extractor.d.ts +1 -68
- package/packages/agent/src/actions/life-goal-extractor.d.ts.map +1 -1
- package/packages/agent/src/actions/life-goal-extractor.js +1 -354
- package/packages/agent/src/actions/life-param-extractor.d.ts +1 -77
- package/packages/agent/src/actions/life-param-extractor.d.ts.map +1 -1
- package/packages/agent/src/actions/life-param-extractor.js +1 -423
- package/packages/agent/src/actions/life-recent-context.d.ts +1 -8
- package/packages/agent/src/actions/life-recent-context.d.ts.map +1 -1
- package/packages/agent/src/actions/life-recent-context.js +1 -84
- package/packages/agent/src/actions/life-update-extractor.d.ts +1 -26
- package/packages/agent/src/actions/life-update-extractor.d.ts.map +1 -1
- package/packages/agent/src/actions/life-update-extractor.js +1 -195
- package/packages/agent/src/actions/life.d.ts +1 -8
- package/packages/agent/src/actions/life.d.ts.map +1 -1
- package/packages/agent/src/actions/life.extractor.d.ts +1 -17
- package/packages/agent/src/actions/life.extractor.d.ts.map +1 -1
- package/packages/agent/src/actions/life.extractor.js +1 -264
- package/packages/agent/src/actions/life.js +1 -3379
- package/packages/agent/src/actions/lifeops-extraction-config.d.ts +1 -15
- package/packages/agent/src/actions/lifeops-extraction-config.d.ts.map +1 -1
- package/packages/agent/src/actions/lifeops-extraction-config.js +1 -25
- package/packages/agent/src/actions/lifeops-google-helpers.d.ts +1 -61
- package/packages/agent/src/actions/lifeops-google-helpers.d.ts.map +1 -1
- package/packages/agent/src/actions/lifeops-google-helpers.js +1 -607
- package/packages/agent/src/actions/list-ejected.d.ts +3 -0
- package/packages/agent/src/actions/list-ejected.d.ts.map +1 -0
- package/packages/agent/src/actions/list-ejected.js +35 -0
- package/packages/agent/src/actions/log-level.d.ts +3 -0
- package/packages/agent/src/actions/log-level.d.ts.map +1 -0
- package/packages/agent/src/actions/log-level.js +125 -0
- package/packages/agent/src/actions/manage-tasks.d.ts.map +1 -1
- package/packages/agent/src/actions/manage-tasks.js +51 -15
- package/packages/agent/src/actions/media.d.ts +21 -0
- package/packages/agent/src/actions/media.d.ts.map +1 -0
- package/packages/agent/src/actions/media.js +384 -0
- package/packages/agent/src/actions/read-messages.d.ts +14 -0
- package/packages/agent/src/actions/read-messages.d.ts.map +1 -0
- package/packages/agent/src/actions/read-messages.js +228 -0
- package/packages/agent/src/actions/reinject-plugin.d.ts +3 -0
- package/packages/agent/src/actions/reinject-plugin.d.ts.map +1 -0
- package/packages/agent/src/actions/reinject-plugin.js +47 -0
- package/packages/agent/src/actions/send-message.d.ts +0 -7
- package/packages/agent/src/actions/send-message.d.ts.map +1 -1
- package/packages/agent/src/actions/send-message.js +170 -49
- package/packages/agent/src/actions/sync-plugin.d.ts +3 -0
- package/packages/agent/src/actions/sync-plugin.d.ts.map +1 -0
- package/packages/agent/src/actions/sync-plugin.js +47 -0
- package/packages/agent/src/actions/timezone-normalization.d.ts +1 -2
- package/packages/agent/src/actions/timezone-normalization.d.ts.map +1 -1
- package/packages/agent/src/actions/timezone-normalization.js +1 -107
- package/packages/agent/src/actions/transfer-token.d.ts +17 -0
- package/packages/agent/src/actions/transfer-token.d.ts.map +1 -0
- package/packages/agent/src/actions/transfer-token.js +470 -0
- package/packages/agent/src/actions/update-owner-profile.d.ts +1 -2
- package/packages/agent/src/actions/update-owner-profile.d.ts.map +1 -1
- package/packages/agent/src/actions/update-owner-profile.js +1 -131
- package/packages/agent/src/actions/wallet-action-shared.d.ts +15 -0
- package/packages/agent/src/actions/wallet-action-shared.d.ts.map +1 -0
- package/packages/agent/src/actions/wallet-action-shared.js +24 -0
- package/packages/agent/src/api/agent-admin-routes.d.ts.map +1 -1
- package/packages/agent/src/api/agent-admin-routes.js +1 -1
- package/packages/agent/src/api/binance-skill-helpers.d.ts.map +1 -1
- package/packages/agent/src/api/binance-skill-helpers.js +8 -3
- package/packages/agent/src/api/chat-routes.d.ts.map +1 -1
- package/packages/agent/src/api/chat-routes.js +20 -5
- package/packages/agent/src/api/coding-agents-auth-sanitize.d.ts +1 -22
- package/packages/agent/src/api/coding-agents-auth-sanitize.d.ts.map +1 -1
- package/packages/agent/src/api/coding-agents-auth-sanitize.js +1 -39
- package/packages/agent/src/api/coding-agents-preflight-normalize.d.ts +1 -28
- package/packages/agent/src/api/coding-agents-preflight-normalize.d.ts.map +1 -1
- package/packages/agent/src/api/coding-agents-preflight-normalize.js +1 -45
- package/packages/agent/src/api/coordinator-types.d.ts +1 -46
- package/packages/agent/src/api/coordinator-types.d.ts.map +1 -1
- package/packages/agent/src/api/coordinator-types.js +1 -1
- package/packages/agent/src/api/coordinator-wiring.d.ts +1 -45
- package/packages/agent/src/api/coordinator-wiring.d.ts.map +1 -1
- package/packages/agent/src/api/coordinator-wiring.js +1 -108
- package/packages/agent/src/api/index.d.ts +1 -1
- package/packages/agent/src/api/index.d.ts.map +1 -1
- package/packages/agent/src/api/index.js +1 -1
- package/packages/agent/src/api/lifeops-browser-packaging.d.ts +1 -15
- package/packages/agent/src/api/lifeops-browser-packaging.d.ts.map +1 -1
- package/packages/agent/src/api/lifeops-browser-packaging.js +1 -305
- package/packages/agent/src/api/lifeops-routes.d.ts +1 -19
- package/packages/agent/src/api/lifeops-routes.d.ts.map +1 -1
- package/packages/agent/src/api/lifeops-routes.js +1 -1173
- package/packages/agent/src/api/server.d.ts.map +1 -1
- package/packages/agent/src/api/server.js +6 -6
- package/packages/agent/src/api/task-agent-message-routing.d.ts +1 -9
- package/packages/agent/src/api/task-agent-message-routing.d.ts.map +1 -1
- package/packages/agent/src/api/task-agent-message-routing.js +1 -62
- package/packages/agent/src/api/website-blocker-routes.d.ts +1 -6
- package/packages/agent/src/api/website-blocker-routes.d.ts.map +1 -1
- package/packages/agent/src/api/website-blocker-routes.js +1 -174
- package/packages/agent/src/config/types.agent-defaults.d.ts +1 -1
- package/packages/agent/src/config/types.agent-defaults.d.ts.map +1 -1
- package/packages/agent/src/evals/coordinator-eval-client.d.ts +1 -38
- package/packages/agent/src/evals/coordinator-eval-client.d.ts.map +1 -1
- package/packages/agent/src/evals/coordinator-eval-client.js +1 -138
- package/packages/agent/src/evals/coordinator-live-runner.d.ts +1 -56
- package/packages/agent/src/evals/coordinator-live-runner.d.ts.map +1 -1
- package/packages/agent/src/evals/coordinator-live-runner.js +1 -546
- package/packages/agent/src/evals/coordinator-preflight.d.ts +1 -31
- package/packages/agent/src/evals/coordinator-preflight.d.ts.map +1 -1
- package/packages/agent/src/evals/coordinator-preflight.js +1 -296
- package/packages/agent/src/evals/coordinator-scenarios.d.ts +1 -23
- package/packages/agent/src/evals/coordinator-scenarios.d.ts.map +1 -1
- package/packages/agent/src/evals/coordinator-scenarios.js +1 -1141
- package/packages/agent/src/lifeops/app-state.d.ts +1 -10
- package/packages/agent/src/lifeops/app-state.d.ts.map +1 -1
- package/packages/agent/src/lifeops/app-state.js +1 -32
- package/packages/agent/src/lifeops/apple-reminders.d.ts +1 -57
- package/packages/agent/src/lifeops/apple-reminders.d.ts.map +1 -1
- package/packages/agent/src/lifeops/apple-reminders.js +1 -325
- package/packages/agent/src/lifeops/defaults.d.ts +1 -23
- package/packages/agent/src/lifeops/defaults.d.ts.map +1 -1
- package/packages/agent/src/lifeops/defaults.js +1 -205
- package/packages/agent/src/lifeops/engine.d.ts +1 -7
- package/packages/agent/src/lifeops/engine.d.ts.map +1 -1
- package/packages/agent/src/lifeops/engine.js +1 -389
- package/packages/agent/src/lifeops/goal-grounding.d.ts +1 -53
- package/packages/agent/src/lifeops/goal-grounding.d.ts.map +1 -1
- package/packages/agent/src/lifeops/goal-grounding.js +1 -147
- package/packages/agent/src/lifeops/goal-semantic-evaluator.d.ts +1 -11
- package/packages/agent/src/lifeops/goal-semantic-evaluator.d.ts.map +1 -1
- package/packages/agent/src/lifeops/goal-semantic-evaluator.js +1 -154
- package/packages/agent/src/lifeops/google-api-error.d.ts +1 -6
- package/packages/agent/src/lifeops/google-api-error.d.ts.map +1 -1
- package/packages/agent/src/lifeops/google-api-error.js +1 -35
- package/packages/agent/src/lifeops/google-calendar.d.ts +1 -52
- package/packages/agent/src/lifeops/google-calendar.d.ts.map +1 -1
- package/packages/agent/src/lifeops/google-calendar.js +1 -268
- package/packages/agent/src/lifeops/google-connector-gateway.d.ts +1 -18
- package/packages/agent/src/lifeops/google-connector-gateway.d.ts.map +1 -1
- package/packages/agent/src/lifeops/google-connector-gateway.js +1 -65
- package/packages/agent/src/lifeops/google-fetch.d.ts +1 -10
- package/packages/agent/src/lifeops/google-fetch.d.ts.map +1 -1
- package/packages/agent/src/lifeops/google-fetch.js +1 -85
- package/packages/agent/src/lifeops/google-gmail.d.ts +1 -53
- package/packages/agent/src/lifeops/google-gmail.d.ts.map +1 -1
- package/packages/agent/src/lifeops/google-gmail.js +1 -471
- package/packages/agent/src/lifeops/google-managed-client.d.ts +1 -126
- package/packages/agent/src/lifeops/google-managed-client.d.ts.map +1 -1
- package/packages/agent/src/lifeops/google-managed-client.js +1 -294
- package/packages/agent/src/lifeops/google-oauth.d.ts +1 -60
- package/packages/agent/src/lifeops/google-oauth.d.ts.map +1 -1
- package/packages/agent/src/lifeops/google-oauth.js +1 -494
- package/packages/agent/src/lifeops/google-scopes.d.ts +1 -12
- package/packages/agent/src/lifeops/google-scopes.d.ts.map +1 -1
- package/packages/agent/src/lifeops/google-scopes.js +1 -96
- package/packages/agent/src/lifeops/index.d.ts +1 -2
- package/packages/agent/src/lifeops/index.d.ts.map +1 -1
- package/packages/agent/src/lifeops/index.js +1 -2
- package/packages/agent/src/lifeops/owner-profile.d.ts +1 -14
- package/packages/agent/src/lifeops/owner-profile.d.ts.map +1 -1
- package/packages/agent/src/lifeops/owner-profile.js +1 -194
- package/packages/agent/src/lifeops/repository.d.ts +1 -208
- package/packages/agent/src/lifeops/repository.d.ts.map +1 -1
- package/packages/agent/src/lifeops/repository.js +1 -3187
- package/packages/agent/src/lifeops/runtime.d.ts +1 -13
- package/packages/agent/src/lifeops/runtime.d.ts.map +1 -1
- package/packages/agent/src/lifeops/runtime.js +1 -120
- package/packages/agent/src/lifeops/screen-context.d.ts +1 -51
- package/packages/agent/src/lifeops/screen-context.d.ts.map +1 -1
- package/packages/agent/src/lifeops/screen-context.js +1 -332
- package/packages/agent/src/lifeops/seed-routines.d.ts +1 -19
- package/packages/agent/src/lifeops/seed-routines.d.ts.map +1 -1
- package/packages/agent/src/lifeops/seed-routines.js +1 -111
- package/packages/agent/src/lifeops/service.d.ts +1 -274
- package/packages/agent/src/lifeops/service.d.ts.map +1 -1
- package/packages/agent/src/lifeops/service.js +1 -9260
- package/packages/agent/src/lifeops/sql.d.ts +1 -30
- package/packages/agent/src/lifeops/sql.d.ts.map +1 -1
- package/packages/agent/src/lifeops/sql.js +1 -247
- package/packages/agent/src/lifeops/time.d.ts +1 -16
- package/packages/agent/src/lifeops/time.d.ts.map +1 -1
- package/packages/agent/src/lifeops/time.js +1 -132
- package/packages/agent/src/lifeops/twilio.d.ts +1 -24
- package/packages/agent/src/lifeops/twilio.d.ts.map +1 -1
- package/packages/agent/src/lifeops/twilio.js +1 -157
- package/packages/agent/src/lifeops/x-poster.d.ts +1 -18
- package/packages/agent/src/lifeops/x-poster.d.ts.map +1 -1
- package/packages/agent/src/lifeops/x-poster.js +1 -148
- package/packages/agent/src/providers/inbox-triage.d.ts +1 -2
- package/packages/agent/src/providers/inbox-triage.d.ts.map +1 -1
- package/packages/agent/src/providers/inbox-triage.js +1 -89
- package/packages/agent/src/providers/index.d.ts +4 -1
- package/packages/agent/src/providers/index.d.ts.map +1 -1
- package/packages/agent/src/providers/index.js +4 -1
- package/packages/agent/src/providers/lifeops.d.ts +1 -2
- package/packages/agent/src/providers/lifeops.d.ts.map +1 -1
- package/packages/agent/src/providers/lifeops.js +1 -157
- package/packages/agent/src/providers/local-models.d.ts +118 -0
- package/packages/agent/src/providers/local-models.d.ts.map +1 -0
- package/packages/agent/src/providers/local-models.js +427 -0
- package/packages/agent/src/providers/media-provider.d.ts +192 -0
- package/packages/agent/src/providers/media-provider.d.ts.map +1 -0
- package/packages/agent/src/providers/media-provider.js +1088 -0
- package/packages/agent/src/providers/self-status.d.ts +4 -0
- package/packages/agent/src/providers/self-status.d.ts.map +1 -0
- package/packages/agent/src/providers/self-status.js +12 -0
- package/packages/agent/src/providers/tasks.d.ts.map +1 -1
- package/packages/agent/src/providers/tasks.js +7 -7
- package/packages/agent/src/runtime/core-plugins.js +1 -1
- package/packages/agent/src/runtime/eliza-plugin.d.ts.map +1 -1
- package/packages/agent/src/runtime/eliza-plugin.js +1 -7
- package/packages/agent/src/runtime/eliza.js +2 -2
- package/packages/agent/src/runtime/plugin-collector.js +3 -3
- package/packages/agent/src/runtime/plugin-lifecycle.d.ts.map +1 -1
- package/packages/agent/src/runtime/plugin-lifecycle.js +3 -13
- package/packages/agent/src/runtime/trajectory-internals.d.ts.map +1 -1
- package/packages/agent/src/runtime/trajectory-internals.js +1 -3
- package/packages/agent/src/services/built-in-app-routes/hyperscape.d.ts.map +1 -1
- package/packages/agent/src/services/coding-task-executor.d.ts +3 -3
- package/packages/agent/src/services/coding-task-executor.js +3 -3
- package/packages/shared/src/awareness/index.d.ts +2 -0
- package/packages/shared/src/awareness/index.d.ts.map +1 -0
- package/packages/shared/src/awareness/index.js +1 -0
- package/packages/shared/src/awareness/registry.d.ts +27 -0
- package/packages/shared/src/awareness/registry.d.ts.map +1 -0
- package/packages/shared/src/awareness/registry.js +161 -0
- package/packages/shared/src/i18n/generated/validation-keyword-data.d.ts +24 -0
- package/packages/shared/src/i18n/generated/validation-keyword-data.d.ts.map +1 -1
- package/packages/shared/src/i18n/generated/validation-keyword-data.js +24 -0
- package/packages/shared/src/runtime-env.d.ts.map +1 -1
- package/packages/shared/src/runtime-env.js +5 -1
- package/packages/typescript/src/generated/action-docs.d.ts +135 -0
- package/packages/typescript/src/generated/action-docs.d.ts.map +1 -1
- package/packages/typescript/src/generated/action-docs.js +237 -0
- package/packages/typescript/src/i18n/generated/validation-keyword-data.d.ts +24 -0
- package/packages/typescript/src/i18n/generated/validation-keyword-data.d.ts.map +1 -1
- package/packages/typescript/src/i18n/generated/validation-keyword-data.js +24 -0
- package/packages/typescript/src/index.node.d.ts +2 -2
- package/packages/typescript/src/index.node.d.ts.map +1 -1
- package/packages/typescript/src/index.node.js +4 -3
- package/packages/typescript/src/plugin-lifecycle.d.ts.map +1 -1
- package/packages/typescript/src/plugin-lifecycle.js +42 -3
- package/packages/typescript/src/services/message.d.ts.map +1 -1
- package/packages/typescript/src/services/message.js +32 -0
- package/apps/app-training/src/core/cli.d.ts +0 -11
- package/apps/app-training/src/core/cli.d.ts.map +0 -1
- package/apps/app-training/src/core/cli.js +0 -302
- package/apps/app-training/src/core/context-audit.d.ts +0 -51
- package/apps/app-training/src/core/context-audit.d.ts.map +0 -1
- package/apps/app-training/src/core/context-audit.js +0 -141
- package/apps/app-training/src/core/context-catalog.d.ts +0 -47
- package/apps/app-training/src/core/context-catalog.d.ts.map +0 -1
- package/apps/app-training/src/core/context-catalog.js +0 -259
- package/apps/app-training/src/core/context-types.d.ts +0 -3
- package/apps/app-training/src/core/context-types.d.ts.map +0 -1
- package/apps/app-training/src/core/context-types.js +0 -11
- package/apps/app-training/src/core/dataset-generator.d.ts +0 -135
- package/apps/app-training/src/core/dataset-generator.d.ts.map +0 -1
- package/apps/app-training/src/core/dataset-generator.js +0 -703
- package/apps/app-training/src/core/replay-validator.d.ts +0 -96
- package/apps/app-training/src/core/replay-validator.d.ts.map +0 -1
- package/apps/app-training/src/core/replay-validator.js +0 -265
- package/apps/app-training/src/core/roleplay-executor.d.ts +0 -123
- package/apps/app-training/src/core/roleplay-executor.d.ts.map +0 -1
- package/apps/app-training/src/core/roleplay-executor.js +0 -645
- package/apps/app-training/src/core/roleplay-trajectories.d.ts +0 -54
- package/apps/app-training/src/core/roleplay-trajectories.d.ts.map +0 -1
- package/apps/app-training/src/core/roleplay-trajectories.js +0 -73
- package/apps/app-training/src/core/scenario-blueprints.d.ts +0 -62
- package/apps/app-training/src/core/scenario-blueprints.d.ts.map +0 -1
- package/apps/app-training/src/core/scenario-blueprints.js +0 -790
- package/apps/app-training/src/core/trajectory-task-datasets.d.ts +0 -38
- package/apps/app-training/src/core/trajectory-task-datasets.d.ts.map +0 -1
- package/apps/app-training/src/core/trajectory-task-datasets.js +0 -281
- package/apps/app-training/src/core/vertex-tuning.d.ts +0 -139
- package/apps/app-training/src/core/vertex-tuning.d.ts.map +0 -1
- package/apps/app-training/src/core/vertex-tuning.js +0 -234
- package/packages/agent/src/inbox/channel-deep-links.d.ts.map +0 -1
- package/packages/agent/src/inbox/config.d.ts.map +0 -1
- package/packages/agent/src/inbox/message-fetcher.d.ts.map +0 -1
- package/packages/agent/src/inbox/reflection.d.ts.map +0 -1
- package/packages/agent/src/inbox/repository.d.ts.map +0 -1
- package/packages/agent/src/inbox/triage-classifier.d.ts.map +0 -1
- package/packages/agent/src/inbox/types.d.ts.map +0 -1
- package/packages/agent/src/training/cli.d.ts +0 -2
- package/packages/agent/src/training/cli.d.ts.map +0 -1
- package/packages/agent/src/training/cli.js +0 -2
- package/packages/agent/src/training/context-audit.d.ts +0 -2
- package/packages/agent/src/training/context-audit.d.ts.map +0 -1
- package/packages/agent/src/training/context-audit.js +0 -2
- package/packages/agent/src/training/context-catalog.d.ts +0 -2
- package/packages/agent/src/training/context-catalog.d.ts.map +0 -1
- package/packages/agent/src/training/context-catalog.js +0 -2
- package/packages/agent/src/training/context-types.d.ts +0 -2
- package/packages/agent/src/training/context-types.d.ts.map +0 -1
- package/packages/agent/src/training/context-types.js +0 -2
- package/packages/agent/src/training/dataset-generator.d.ts +0 -2
- package/packages/agent/src/training/dataset-generator.d.ts.map +0 -1
- package/packages/agent/src/training/dataset-generator.js +0 -2
- package/packages/agent/src/training/replay-validator.d.ts +0 -2
- package/packages/agent/src/training/replay-validator.d.ts.map +0 -1
- package/packages/agent/src/training/replay-validator.js +0 -2
- package/packages/agent/src/training/roleplay-executor.d.ts +0 -2
- package/packages/agent/src/training/roleplay-executor.d.ts.map +0 -1
- package/packages/agent/src/training/roleplay-executor.js +0 -2
- package/packages/agent/src/training/roleplay-trajectories.d.ts +0 -2
- package/packages/agent/src/training/roleplay-trajectories.d.ts.map +0 -1
- package/packages/agent/src/training/roleplay-trajectories.js +0 -2
- package/packages/agent/src/training/scenario-blueprints.d.ts +0 -2
- package/packages/agent/src/training/scenario-blueprints.d.ts.map +0 -1
- package/packages/agent/src/training/scenario-blueprints.js +0 -2
- package/packages/agent/src/training/trajectory-task-datasets.d.ts +0 -2
- package/packages/agent/src/training/trajectory-task-datasets.d.ts.map +0 -1
- package/packages/agent/src/training/trajectory-task-datasets.js +0 -2
- package/packages/agent/src/training/vertex-tuning.d.ts +0 -2
- package/packages/agent/src/training/vertex-tuning.d.ts.map +0 -1
- package/packages/agent/src/training/vertex-tuning.js +0 -2
- package/packages/typescript/src/features/orchestrator/actions/coding-task-handlers.d.ts +0 -41
- package/packages/typescript/src/features/orchestrator/actions/coding-task-handlers.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/coding-task-handlers.js +0 -443
- package/packages/typescript/src/features/orchestrator/actions/coding-task-helpers.d.ts +0 -34
- package/packages/typescript/src/features/orchestrator/actions/coding-task-helpers.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/coding-task-helpers.js +0 -171
- package/packages/typescript/src/features/orchestrator/actions/eval-metadata.d.ts +0 -11
- package/packages/typescript/src/features/orchestrator/actions/eval-metadata.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/eval-metadata.js +0 -55
- package/packages/typescript/src/features/orchestrator/actions/finalize-workspace.d.ts +0 -11
- package/packages/typescript/src/features/orchestrator/actions/finalize-workspace.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/finalize-workspace.js +0 -214
- package/packages/typescript/src/features/orchestrator/actions/list-agents.d.ts +0 -13
- package/packages/typescript/src/features/orchestrator/actions/list-agents.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/list-agents.js +0 -174
- package/packages/typescript/src/features/orchestrator/actions/manage-issues.d.ts +0 -11
- package/packages/typescript/src/features/orchestrator/actions/manage-issues.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/manage-issues.js +0 -428
- package/packages/typescript/src/features/orchestrator/actions/provision-workspace.d.ts +0 -11
- package/packages/typescript/src/features/orchestrator/actions/provision-workspace.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/provision-workspace.js +0 -189
- package/packages/typescript/src/features/orchestrator/actions/send-to-agent.d.ts +0 -12
- package/packages/typescript/src/features/orchestrator/actions/send-to-agent.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/send-to-agent.js +0 -265
- package/packages/typescript/src/features/orchestrator/actions/spawn-agent.d.ts +0 -12
- package/packages/typescript/src/features/orchestrator/actions/spawn-agent.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/spawn-agent.js +0 -356
- package/packages/typescript/src/features/orchestrator/actions/start-coding-task.d.ts +0 -22
- package/packages/typescript/src/features/orchestrator/actions/start-coding-task.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/start-coding-task.js +0 -270
- package/packages/typescript/src/features/orchestrator/actions/stop-agent.d.ts +0 -12
- package/packages/typescript/src/features/orchestrator/actions/stop-agent.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/stop-agent.js +0 -192
- package/packages/typescript/src/features/orchestrator/actions/task-control.d.ts +0 -3
- package/packages/typescript/src/features/orchestrator/actions/task-control.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/task-control.js +0 -217
- package/packages/typescript/src/features/orchestrator/actions/task-history.d.ts +0 -3
- package/packages/typescript/src/features/orchestrator/actions/task-history.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/task-history.js +0 -323
- package/packages/typescript/src/features/orchestrator/actions/task-share.d.ts +0 -3
- package/packages/typescript/src/features/orchestrator/actions/task-share.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/task-share.js +0 -168
- package/packages/typescript/src/features/orchestrator/actions/task-thread-target.d.ts +0 -11
- package/packages/typescript/src/features/orchestrator/actions/task-thread-target.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/actions/task-thread-target.js +0 -68
- package/packages/typescript/src/features/orchestrator/api/agent-routes.d.ts +0 -18
- package/packages/typescript/src/features/orchestrator/api/agent-routes.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/api/agent-routes.js +0 -654
- package/packages/typescript/src/features/orchestrator/api/coordinator-routes.d.ts +0 -22
- package/packages/typescript/src/features/orchestrator/api/coordinator-routes.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/api/coordinator-routes.js +0 -403
- package/packages/typescript/src/features/orchestrator/api/hook-routes.d.ts +0 -18
- package/packages/typescript/src/features/orchestrator/api/hook-routes.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/api/hook-routes.js +0 -164
- package/packages/typescript/src/features/orchestrator/api/issue-routes.d.ts +0 -17
- package/packages/typescript/src/features/orchestrator/api/issue-routes.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/api/issue-routes.js +0 -132
- package/packages/typescript/src/features/orchestrator/api/routes.d.ts +0 -37
- package/packages/typescript/src/features/orchestrator/api/routes.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/api/routes.js +0 -96
- package/packages/typescript/src/features/orchestrator/api/workspace-routes.d.ts +0 -17
- package/packages/typescript/src/features/orchestrator/api/workspace-routes.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/api/workspace-routes.js +0 -149
- package/packages/typescript/src/features/orchestrator/base-plugin.d.ts +0 -19
- package/packages/typescript/src/features/orchestrator/base-plugin.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/base-plugin.js +0 -75
- package/packages/typescript/src/features/orchestrator/claude-jsonl-completion-watcher.d.ts +0 -101
- package/packages/typescript/src/features/orchestrator/claude-jsonl-completion-watcher.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/claude-jsonl-completion-watcher.js +0 -310
- package/packages/typescript/src/features/orchestrator/index.d.ts +0 -33
- package/packages/typescript/src/features/orchestrator/index.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/index.js +0 -30
- package/packages/typescript/src/features/orchestrator/patch-agent-orchestrator-plugin.d.ts +0 -15
- package/packages/typescript/src/features/orchestrator/patch-agent-orchestrator-plugin.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/patch-agent-orchestrator-plugin.js +0 -1449
- package/packages/typescript/src/features/orchestrator/providers/action-examples.d.ts +0 -14
- package/packages/typescript/src/features/orchestrator/providers/action-examples.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/providers/action-examples.js +0 -151
- package/packages/typescript/src/features/orchestrator/providers/active-workspace-context.d.ts +0 -13
- package/packages/typescript/src/features/orchestrator/providers/active-workspace-context.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/providers/active-workspace-context.js +0 -142
- package/packages/typescript/src/features/orchestrator/services/agent-credentials.d.ts +0 -6
- package/packages/typescript/src/features/orchestrator/services/agent-credentials.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/agent-credentials.js +0 -91
- package/packages/typescript/src/features/orchestrator/services/agent-metrics.d.ts +0 -30
- package/packages/typescript/src/features/orchestrator/services/agent-metrics.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/agent-metrics.js +0 -54
- package/packages/typescript/src/features/orchestrator/services/agent-selection.d.ts +0 -53
- package/packages/typescript/src/features/orchestrator/services/agent-selection.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/agent-selection.js +0 -70
- package/packages/typescript/src/features/orchestrator/services/ansi-utils.d.ts +0 -61
- package/packages/typescript/src/features/orchestrator/services/ansi-utils.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/ansi-utils.js +0 -252
- package/packages/typescript/src/features/orchestrator/services/config-env.d.ts +0 -13
- package/packages/typescript/src/features/orchestrator/services/config-env.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/config-env.js +0 -37
- package/packages/typescript/src/features/orchestrator/services/coordinator-event-normalizer.d.ts +0 -50
- package/packages/typescript/src/features/orchestrator/services/coordinator-event-normalizer.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/coordinator-event-normalizer.js +0 -184
- package/packages/typescript/src/features/orchestrator/services/debug-capture.d.ts +0 -38
- package/packages/typescript/src/features/orchestrator/services/debug-capture.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/debug-capture.js +0 -113
- package/packages/typescript/src/features/orchestrator/services/pty-auto-response.d.ts +0 -30
- package/packages/typescript/src/features/orchestrator/services/pty-auto-response.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/pty-auto-response.js +0 -146
- package/packages/typescript/src/features/orchestrator/services/pty-init.d.ts +0 -54
- package/packages/typescript/src/features/orchestrator/services/pty-init.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/pty-init.js +0 -315
- package/packages/typescript/src/features/orchestrator/services/pty-service.d.ts +0 -175
- package/packages/typescript/src/features/orchestrator/services/pty-service.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/pty-service.js +0 -1469
- package/packages/typescript/src/features/orchestrator/services/pty-session-io.d.ts +0 -49
- package/packages/typescript/src/features/orchestrator/services/pty-session-io.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/pty-session-io.js +0 -180
- package/packages/typescript/src/features/orchestrator/services/pty-spawn.d.ts +0 -53
- package/packages/typescript/src/features/orchestrator/services/pty-spawn.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/pty-spawn.js +0 -280
- package/packages/typescript/src/features/orchestrator/services/pty-types.d.ts +0 -80
- package/packages/typescript/src/features/orchestrator/services/pty-types.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/pty-types.js +0 -51
- package/packages/typescript/src/features/orchestrator/services/repo-input.d.ts +0 -16
- package/packages/typescript/src/features/orchestrator/services/repo-input.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/repo-input.js +0 -88
- package/packages/typescript/src/features/orchestrator/services/stall-classifier.d.ts +0 -69
- package/packages/typescript/src/features/orchestrator/services/stall-classifier.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/stall-classifier.js +0 -446
- package/packages/typescript/src/features/orchestrator/services/swarm-coordinator-prompts.d.ts +0 -97
- package/packages/typescript/src/features/orchestrator/services/swarm-coordinator-prompts.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/swarm-coordinator-prompts.js +0 -342
- package/packages/typescript/src/features/orchestrator/services/swarm-coordinator.d.ts +0 -421
- package/packages/typescript/src/features/orchestrator/services/swarm-coordinator.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/swarm-coordinator.js +0 -2356
- package/packages/typescript/src/features/orchestrator/services/swarm-decision-loop.d.ts +0 -52
- package/packages/typescript/src/features/orchestrator/services/swarm-decision-loop.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/swarm-decision-loop.js +0 -1538
- package/packages/typescript/src/features/orchestrator/services/swarm-event-triage.d.ts +0 -49
- package/packages/typescript/src/features/orchestrator/services/swarm-event-triage.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/swarm-event-triage.js +0 -171
- package/packages/typescript/src/features/orchestrator/services/swarm-history.d.ts +0 -27
- package/packages/typescript/src/features/orchestrator/services/swarm-history.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/swarm-history.js +0 -148
- package/packages/typescript/src/features/orchestrator/services/swarm-idle-watchdog.d.ts +0 -22
- package/packages/typescript/src/features/orchestrator/services/swarm-idle-watchdog.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/swarm-idle-watchdog.js +0 -265
- package/packages/typescript/src/features/orchestrator/services/task-acceptance.d.ts +0 -8
- package/packages/typescript/src/features/orchestrator/services/task-acceptance.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/task-acceptance.js +0 -114
- package/packages/typescript/src/features/orchestrator/services/task-agent-auth.d.ts +0 -68
- package/packages/typescript/src/features/orchestrator/services/task-agent-auth.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/task-agent-auth.js +0 -559
- package/packages/typescript/src/features/orchestrator/services/task-agent-frameworks.d.ts +0 -82
- package/packages/typescript/src/features/orchestrator/services/task-agent-frameworks.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/task-agent-frameworks.js +0 -738
- package/packages/typescript/src/features/orchestrator/services/task-kind.d.ts +0 -3
- package/packages/typescript/src/features/orchestrator/services/task-kind.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/task-kind.js +0 -40
- package/packages/typescript/src/features/orchestrator/services/task-policy.d.ts +0 -17
- package/packages/typescript/src/features/orchestrator/services/task-policy.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/task-policy.js +0 -226
- package/packages/typescript/src/features/orchestrator/services/task-registry.d.ts +0 -550
- package/packages/typescript/src/features/orchestrator/services/task-registry.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/task-registry.js +0 -2182
- package/packages/typescript/src/features/orchestrator/services/task-share.d.ts +0 -18
- package/packages/typescript/src/features/orchestrator/services/task-share.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/task-share.js +0 -159
- package/packages/typescript/src/features/orchestrator/services/task-validation.d.ts +0 -69
- package/packages/typescript/src/features/orchestrator/services/task-validation.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/task-validation.js +0 -587
- package/packages/typescript/src/features/orchestrator/services/task-verifier-runner.d.ts +0 -5
- package/packages/typescript/src/features/orchestrator/services/task-verifier-runner.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/task-verifier-runner.js +0 -372
- package/packages/typescript/src/features/orchestrator/services/trajectory-context.d.ts +0 -73
- package/packages/typescript/src/features/orchestrator/services/trajectory-context.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/trajectory-context.js +0 -64
- package/packages/typescript/src/features/orchestrator/services/trajectory-feedback.d.ts +0 -53
- package/packages/typescript/src/features/orchestrator/services/trajectory-feedback.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/trajectory-feedback.js +0 -260
- package/packages/typescript/src/features/orchestrator/services/workspace-git-ops.d.ts +0 -28
- package/packages/typescript/src/features/orchestrator/services/workspace-git-ops.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/workspace-git-ops.js +0 -105
- package/packages/typescript/src/features/orchestrator/services/workspace-github.d.ts +0 -58
- package/packages/typescript/src/features/orchestrator/services/workspace-github.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/workspace-github.js +0 -139
- package/packages/typescript/src/features/orchestrator/services/workspace-lifecycle.d.ts +0 -18
- package/packages/typescript/src/features/orchestrator/services/workspace-lifecycle.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/workspace-lifecycle.js +0 -86
- package/packages/typescript/src/features/orchestrator/services/workspace-service.d.ts +0 -118
- package/packages/typescript/src/features/orchestrator/services/workspace-service.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/workspace-service.js +0 -533
- package/packages/typescript/src/features/orchestrator/services/workspace-types.d.ts +0 -81
- package/packages/typescript/src/features/orchestrator/services/workspace-types.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/services/workspace-types.js +0 -8
- package/packages/typescript/src/features/orchestrator/task-progress-streamer.d.ts +0 -38
- package/packages/typescript/src/features/orchestrator/task-progress-streamer.d.ts.map +0 -1
- package/packages/typescript/src/features/orchestrator/task-progress-streamer.js +0 -293
- /package/{packages/agent → apps/app-lifeops}/src/inbox/channel-deep-links.d.ts +0 -0
- /package/{packages/agent → apps/app-lifeops}/src/inbox/channel-deep-links.js +0 -0
- /package/{packages/agent → apps/app-lifeops}/src/inbox/config.d.ts +0 -0
- /package/{packages/agent → apps/app-lifeops}/src/inbox/message-fetcher.d.ts +0 -0
- /package/{packages/agent → apps/app-lifeops}/src/inbox/message-fetcher.js +0 -0
- /package/{packages/agent → apps/app-lifeops}/src/inbox/reflection.d.ts +0 -0
- /package/{packages/agent → apps/app-lifeops}/src/inbox/reflection.js +0 -0
- /package/{packages/agent → apps/app-lifeops}/src/inbox/repository.d.ts +0 -0
- /package/{packages/agent → apps/app-lifeops}/src/inbox/repository.js +0 -0
- /package/{packages/agent → apps/app-lifeops}/src/inbox/triage-classifier.d.ts +0 -0
- /package/{packages/agent → apps/app-lifeops}/src/inbox/triage-classifier.js +0 -0
- /package/{packages/agent → apps/app-lifeops}/src/inbox/types.d.ts +0 -0
- /package/{packages/agent → apps/app-lifeops}/src/inbox/types.js +0 -0
|
@@ -1,1141 +1 @@
|
|
|
1
|
-
|
|
2
|
-
"app_chat",
|
|
3
|
-
"discord",
|
|
4
|
-
"telegram",
|
|
5
|
-
"slack",
|
|
6
|
-
"whatsapp",
|
|
7
|
-
"signal",
|
|
8
|
-
"matrix",
|
|
9
|
-
"wechat",
|
|
10
|
-
];
|
|
11
|
-
function scenario(value) {
|
|
12
|
-
return value;
|
|
13
|
-
}
|
|
14
|
-
export const coordinatorScenarios = [
|
|
15
|
-
scenario({
|
|
16
|
-
id: "B001",
|
|
17
|
-
family: "build_and_edit",
|
|
18
|
-
profile: "smoke",
|
|
19
|
-
title: "build a personal birthday page and keep iterating",
|
|
20
|
-
summary: "The user asks for a small web page, then asks to view it and refine it without restating the whole task.",
|
|
21
|
-
channels: ALL_CHANNELS,
|
|
22
|
-
requiredCapabilities: [
|
|
23
|
-
"create_task",
|
|
24
|
-
"continue_task",
|
|
25
|
-
"artifact_visibility",
|
|
26
|
-
],
|
|
27
|
-
turns: [
|
|
28
|
-
{
|
|
29
|
-
speaker: "user",
|
|
30
|
-
text: "Can you make a little web page for me that shows my birthday, March 14, 1991, and my astrological sign?",
|
|
31
|
-
},
|
|
32
|
-
{ speaker: "user", text: "Can I view that?" },
|
|
33
|
-
{
|
|
34
|
-
speaker: "user",
|
|
35
|
-
text: "Change it so it also shows the weekday I was born on.",
|
|
36
|
-
},
|
|
37
|
-
],
|
|
38
|
-
doneWhen: [
|
|
39
|
-
"A task thread exists for the work.",
|
|
40
|
-
"The agent produces a concrete preview artifact, file path, or URL.",
|
|
41
|
-
"The follow-up request is treated as continuation of the same work.",
|
|
42
|
-
],
|
|
43
|
-
evidence: [
|
|
44
|
-
"task thread",
|
|
45
|
-
"task artifacts",
|
|
46
|
-
"trajectory records",
|
|
47
|
-
"changed files",
|
|
48
|
-
],
|
|
49
|
-
}),
|
|
50
|
-
scenario({
|
|
51
|
-
id: "B002",
|
|
52
|
-
family: "build_and_edit",
|
|
53
|
-
profile: "core",
|
|
54
|
-
title: "build a landing page from a vague aesthetic brief",
|
|
55
|
-
summary: "The user gives an intentionally fuzzy request and the agent must still create a concrete artifact and continue refining it.",
|
|
56
|
-
channels: ALL_CHANNELS,
|
|
57
|
-
requiredCapabilities: [
|
|
58
|
-
"create_task",
|
|
59
|
-
"clarify_or_execute",
|
|
60
|
-
"artifact_visibility",
|
|
61
|
-
],
|
|
62
|
-
turns: [
|
|
63
|
-
{
|
|
64
|
-
speaker: "user",
|
|
65
|
-
text: "Make me a small homepage that feels kind of dreamy and strange.",
|
|
66
|
-
},
|
|
67
|
-
{ speaker: "user", text: "Can you make it less cute and more severe?" },
|
|
68
|
-
{ speaker: "user", text: "Can I see the current version?" },
|
|
69
|
-
],
|
|
70
|
-
doneWhen: [
|
|
71
|
-
"The agent either asks a targeted clarification or starts building immediately.",
|
|
72
|
-
"A previewable result is produced.",
|
|
73
|
-
"The refinement request updates the same thread.",
|
|
74
|
-
],
|
|
75
|
-
evidence: ["task thread", "artifacts", "transcripts", "changed files"],
|
|
76
|
-
}),
|
|
77
|
-
scenario({
|
|
78
|
-
id: "B003",
|
|
79
|
-
family: "build_and_edit",
|
|
80
|
-
profile: "core",
|
|
81
|
-
title: "create a script and then explain how to run it",
|
|
82
|
-
summary: "The user asks for code and then asks how to execute it locally and from another machine.",
|
|
83
|
-
channels: ALL_CHANNELS,
|
|
84
|
-
requiredCapabilities: [
|
|
85
|
-
"create_task",
|
|
86
|
-
"continue_task",
|
|
87
|
-
"share_or_run_guidance",
|
|
88
|
-
],
|
|
89
|
-
turns: [
|
|
90
|
-
{
|
|
91
|
-
speaker: "user",
|
|
92
|
-
text: "Write a script that takes a folder of markdown files and makes one combined HTML page.",
|
|
93
|
-
},
|
|
94
|
-
{ speaker: "user", text: "How do I run it?" },
|
|
95
|
-
{ speaker: "user", text: "Can I run it from a remote computer too?" },
|
|
96
|
-
],
|
|
97
|
-
doneWhen: [
|
|
98
|
-
"A task thread is created and files are written.",
|
|
99
|
-
"The agent gives concrete run instructions tied to the produced artifact.",
|
|
100
|
-
"The remote-view question is handled as the next step of the same task.",
|
|
101
|
-
],
|
|
102
|
-
evidence: [
|
|
103
|
-
"task thread",
|
|
104
|
-
"artifacts",
|
|
105
|
-
"trajectory records",
|
|
106
|
-
"changed files",
|
|
107
|
-
],
|
|
108
|
-
}),
|
|
109
|
-
scenario({
|
|
110
|
-
id: "B004",
|
|
111
|
-
family: "build_and_edit",
|
|
112
|
-
profile: "core",
|
|
113
|
-
title: "make a one-file app and add a final polish pass",
|
|
114
|
-
summary: "The user requests a tiny app, then asks the agent to do a final polish pass without specifying exact edits.",
|
|
115
|
-
channels: ALL_CHANNELS,
|
|
116
|
-
requiredCapabilities: ["create_task", "continue_task", "iterative_editing"],
|
|
117
|
-
turns: [
|
|
118
|
-
{
|
|
119
|
-
speaker: "user",
|
|
120
|
-
text: "Build me a tiny notes page with local storage.",
|
|
121
|
-
},
|
|
122
|
-
{ speaker: "user", text: "Looks close. Do a final polish pass." },
|
|
123
|
-
],
|
|
124
|
-
doneWhen: [
|
|
125
|
-
"The app is created in files.",
|
|
126
|
-
"A second pass produces additional edits on the same task thread.",
|
|
127
|
-
],
|
|
128
|
-
evidence: ["changed files", "task thread decisions", "trajectory records"],
|
|
129
|
-
}),
|
|
130
|
-
scenario({
|
|
131
|
-
id: "B005",
|
|
132
|
-
family: "build_and_edit",
|
|
133
|
-
profile: "full",
|
|
134
|
-
title: "implement a code change in an existing repository",
|
|
135
|
-
summary: "The user asks for a real repo change and the agent must use repository context instead of scratch space.",
|
|
136
|
-
channels: ALL_CHANNELS,
|
|
137
|
-
requiredCapabilities: ["repo_tasking", "create_task", "worktree_artifacts"],
|
|
138
|
-
turns: [
|
|
139
|
-
{
|
|
140
|
-
speaker: "user",
|
|
141
|
-
text: "In the same repo, add a tiny diagnostics page that lists the current task threads.",
|
|
142
|
-
},
|
|
143
|
-
{ speaker: "user", text: "Can you show me where you put it?" },
|
|
144
|
-
],
|
|
145
|
-
doneWhen: [
|
|
146
|
-
"The task uses an existing repo or workspace instead of scratch.",
|
|
147
|
-
"Changed files are present in the repo.",
|
|
148
|
-
"The agent identifies the files or route that was added.",
|
|
149
|
-
],
|
|
150
|
-
evidence: [
|
|
151
|
-
"task thread",
|
|
152
|
-
"changed files",
|
|
153
|
-
"artifacts",
|
|
154
|
-
"trajectory records",
|
|
155
|
-
],
|
|
156
|
-
}),
|
|
157
|
-
scenario({
|
|
158
|
-
id: "C001",
|
|
159
|
-
family: "continuation",
|
|
160
|
-
profile: "smoke",
|
|
161
|
-
title: "continue a task from a vague pointer",
|
|
162
|
-
summary: "The user refers to earlier work with pronouns and expects continuation.",
|
|
163
|
-
channels: ALL_CHANNELS,
|
|
164
|
-
requiredCapabilities: ["continue_task", "thread_lookup", "history_search"],
|
|
165
|
-
turns: [
|
|
166
|
-
{ speaker: "user", text: "Can you make a little calendar view for me?" },
|
|
167
|
-
{
|
|
168
|
-
speaker: "user",
|
|
169
|
-
text: "Actually add that thing where I can jump to today.",
|
|
170
|
-
},
|
|
171
|
-
{ speaker: "user", text: "Now make it work on mobile too." },
|
|
172
|
-
],
|
|
173
|
-
doneWhen: [
|
|
174
|
-
"The same thread is reused for follow-up work.",
|
|
175
|
-
"The agent does not spawn unrelated duplicate tasks for simple continuation.",
|
|
176
|
-
],
|
|
177
|
-
evidence: ["task thread updates", "changed files", "transcripts"],
|
|
178
|
-
}),
|
|
179
|
-
scenario({
|
|
180
|
-
id: "C002",
|
|
181
|
-
family: "continuation",
|
|
182
|
-
profile: "core",
|
|
183
|
-
title: "continue work after a conversational detour",
|
|
184
|
-
summary: "The user briefly asks a side question and then returns to the original task.",
|
|
185
|
-
channels: ALL_CHANNELS,
|
|
186
|
-
requiredCapabilities: [
|
|
187
|
-
"continue_task",
|
|
188
|
-
"conversation_memory",
|
|
189
|
-
"thread_lookup",
|
|
190
|
-
],
|
|
191
|
-
turns: [
|
|
192
|
-
{
|
|
193
|
-
speaker: "user",
|
|
194
|
-
text: "Build me a page that shows my next three reminders.",
|
|
195
|
-
},
|
|
196
|
-
{
|
|
197
|
-
speaker: "user",
|
|
198
|
-
text: "By the way, what model are you using for task work right now?",
|
|
199
|
-
},
|
|
200
|
-
{ speaker: "user", text: "Okay continue with the reminder page." },
|
|
201
|
-
],
|
|
202
|
-
doneWhen: [
|
|
203
|
-
"The side question is answered without losing the main task.",
|
|
204
|
-
"The task resumes on the same thread.",
|
|
205
|
-
],
|
|
206
|
-
evidence: ["task thread", "task events", "trajectory records"],
|
|
207
|
-
}),
|
|
208
|
-
scenario({
|
|
209
|
-
id: "C003",
|
|
210
|
-
family: "continuation",
|
|
211
|
-
profile: "core",
|
|
212
|
-
title: "treat 'make it so' as approval to execute",
|
|
213
|
-
summary: "The user gives a non-specific approval utterance and expects the plan to execute.",
|
|
214
|
-
channels: ALL_CHANNELS,
|
|
215
|
-
requiredCapabilities: ["implicit_approval", "task_execution"],
|
|
216
|
-
turns: [
|
|
217
|
-
{
|
|
218
|
-
speaker: "user",
|
|
219
|
-
text: "Can you sketch the approach for a tiny dashboard that shows active tasks and recent completions?",
|
|
220
|
-
},
|
|
221
|
-
{ speaker: "user", text: "Yeah I'm down." },
|
|
222
|
-
],
|
|
223
|
-
doneWhen: [
|
|
224
|
-
"The approval utterance is interpreted as permission to execute.",
|
|
225
|
-
"A real task thread starts and produces artifacts.",
|
|
226
|
-
],
|
|
227
|
-
evidence: ["task thread", "trajectory records", "artifacts"],
|
|
228
|
-
}),
|
|
229
|
-
scenario({
|
|
230
|
-
id: "C004",
|
|
231
|
-
family: "continuation",
|
|
232
|
-
profile: "full",
|
|
233
|
-
title: "continue after asking to inspect current work",
|
|
234
|
-
summary: "The user pauses to inspect the current result and then asks for one more edit.",
|
|
235
|
-
channels: ALL_CHANNELS,
|
|
236
|
-
requiredCapabilities: [
|
|
237
|
-
"preview_visibility",
|
|
238
|
-
"continue_task",
|
|
239
|
-
"artifact_lookup",
|
|
240
|
-
],
|
|
241
|
-
turns: [
|
|
242
|
-
{
|
|
243
|
-
speaker: "user",
|
|
244
|
-
text: "Build me a compact dashboard for active tasks.",
|
|
245
|
-
},
|
|
246
|
-
{ speaker: "user", text: "Can I see it?" },
|
|
247
|
-
{ speaker: "user", text: "Okay now add a tiny recent-history section." },
|
|
248
|
-
],
|
|
249
|
-
doneWhen: [
|
|
250
|
-
"The preview request returns a real artifact or URL.",
|
|
251
|
-
"The subsequent edit continues the existing task.",
|
|
252
|
-
],
|
|
253
|
-
evidence: ["artifacts", "task thread", "changed files"],
|
|
254
|
-
}),
|
|
255
|
-
scenario({
|
|
256
|
-
id: "P001",
|
|
257
|
-
family: "preview_and_share",
|
|
258
|
-
profile: "smoke",
|
|
259
|
-
title: "answer 'can I see it' with a real artifact",
|
|
260
|
-
summary: "The user asks for visibility into produced work and expects a concrete view path.",
|
|
261
|
-
channels: ALL_CHANNELS,
|
|
262
|
-
requiredCapabilities: ["preview_visibility", "artifact_lookup"],
|
|
263
|
-
turns: [
|
|
264
|
-
{
|
|
265
|
-
speaker: "user",
|
|
266
|
-
text: "Make a tiny webpage that just says hello in a dramatic font.",
|
|
267
|
-
},
|
|
268
|
-
{ speaker: "user", text: "Can I see it?" },
|
|
269
|
-
],
|
|
270
|
-
doneWhen: [
|
|
271
|
-
"The agent returns a concrete artifact, file path, or URL.",
|
|
272
|
-
"The result is attached or discoverable from the task thread.",
|
|
273
|
-
],
|
|
274
|
-
evidence: ["artifacts", "task thread", "transcripts"],
|
|
275
|
-
}),
|
|
276
|
-
scenario({
|
|
277
|
-
id: "P002",
|
|
278
|
-
family: "preview_and_share",
|
|
279
|
-
profile: "core",
|
|
280
|
-
title: "remote view without a hardcoded transport",
|
|
281
|
-
summary: "The user asks to view the result from a remote machine and the agent must discover viable sharing options.",
|
|
282
|
-
channels: ALL_CHANNELS,
|
|
283
|
-
requiredCapabilities: [
|
|
284
|
-
"share_discovery",
|
|
285
|
-
"preview_visibility",
|
|
286
|
-
"environment_detection",
|
|
287
|
-
],
|
|
288
|
-
turns: [
|
|
289
|
-
{
|
|
290
|
-
speaker: "user",
|
|
291
|
-
text: "Build a tiny page for me with today's moon phase.",
|
|
292
|
-
},
|
|
293
|
-
{ speaker: "user", text: "How do I view that from a remote computer?" },
|
|
294
|
-
],
|
|
295
|
-
doneWhen: [
|
|
296
|
-
"The agent inspects available share mechanisms.",
|
|
297
|
-
"The response either provides a real remote path or clearly states which capability is missing.",
|
|
298
|
-
],
|
|
299
|
-
evidence: ["task artifacts", "task events", "trajectory records"],
|
|
300
|
-
}),
|
|
301
|
-
scenario({
|
|
302
|
-
id: "P003",
|
|
303
|
-
family: "preview_and_share",
|
|
304
|
-
profile: "core",
|
|
305
|
-
title: "send a link back over the originating connector",
|
|
306
|
-
summary: "The user wants the output link sent back in the same channel context.",
|
|
307
|
-
channels: ALL_CHANNELS,
|
|
308
|
-
requiredCapabilities: [
|
|
309
|
-
"share_discovery",
|
|
310
|
-
"connector_response",
|
|
311
|
-
"artifact_lookup",
|
|
312
|
-
],
|
|
313
|
-
turns: [
|
|
314
|
-
{ speaker: "user", text: "Build a one-page weather card." },
|
|
315
|
-
{ speaker: "user", text: "Pull it up for me." },
|
|
316
|
-
{
|
|
317
|
-
speaker: "user",
|
|
318
|
-
text: "If I'm on Discord, just send me the link there.",
|
|
319
|
-
},
|
|
320
|
-
],
|
|
321
|
-
doneWhen: [
|
|
322
|
-
"The resulting message is emitted on the originating channel.",
|
|
323
|
-
"If a shareable link exists, it is returned in-channel.",
|
|
324
|
-
],
|
|
325
|
-
evidence: ["connector-sourced response", "artifacts", "trajectory records"],
|
|
326
|
-
}),
|
|
327
|
-
scenario({
|
|
328
|
-
id: "P004",
|
|
329
|
-
family: "preview_and_share",
|
|
330
|
-
profile: "full",
|
|
331
|
-
title: "share a generated static artifact instead of a dev server",
|
|
332
|
-
summary: "The agent should recognize when a file download or static artifact is more appropriate than a live URL.",
|
|
333
|
-
channels: ALL_CHANNELS,
|
|
334
|
-
requiredCapabilities: ["artifact_lookup", "share_discovery"],
|
|
335
|
-
turns: [
|
|
336
|
-
{
|
|
337
|
-
speaker: "user",
|
|
338
|
-
text: "Generate a tiny printable HTML birthday card.",
|
|
339
|
-
},
|
|
340
|
-
{ speaker: "user", text: "What's the easiest way for me to get that?" },
|
|
341
|
-
],
|
|
342
|
-
doneWhen: [
|
|
343
|
-
"The agent returns the most appropriate artifact path, URI, or attachment route.",
|
|
344
|
-
],
|
|
345
|
-
evidence: ["artifacts", "task thread", "changed files"],
|
|
346
|
-
}),
|
|
347
|
-
scenario({
|
|
348
|
-
id: "S001",
|
|
349
|
-
family: "pause_resume_stop",
|
|
350
|
-
profile: "smoke",
|
|
351
|
-
title: "pause a task for review and resume it later",
|
|
352
|
-
summary: "The user wants to pause ongoing work, discuss it, then continue from preserved state.",
|
|
353
|
-
channels: ALL_CHANNELS,
|
|
354
|
-
requiredCapabilities: ["task_control", "pause_task", "resume_task"],
|
|
355
|
-
turns: [
|
|
356
|
-
{ speaker: "user", text: "Build a tiny portfolio page for me." },
|
|
357
|
-
{
|
|
358
|
-
speaker: "user",
|
|
359
|
-
text: "Hold on a second, can you pause that and let's discuss if it's right?",
|
|
360
|
-
},
|
|
361
|
-
{ speaker: "user", text: "Okay, make it so." },
|
|
362
|
-
],
|
|
363
|
-
doneWhen: [
|
|
364
|
-
"The thread enters a paused or waiting-on-user state.",
|
|
365
|
-
"The state is preserved across the discussion turn.",
|
|
366
|
-
"The task resumes instead of starting over.",
|
|
367
|
-
],
|
|
368
|
-
evidence: [
|
|
369
|
-
"task thread status changes",
|
|
370
|
-
"task events",
|
|
371
|
-
"trajectory records",
|
|
372
|
-
],
|
|
373
|
-
}),
|
|
374
|
-
scenario({
|
|
375
|
-
id: "S002",
|
|
376
|
-
family: "pause_resume_stop",
|
|
377
|
-
profile: "smoke",
|
|
378
|
-
title: "stop a running task when the user says stop",
|
|
379
|
-
summary: "The user issues an urgent stop request and expects the task to halt without losing audit history.",
|
|
380
|
-
channels: ALL_CHANNELS,
|
|
381
|
-
requiredCapabilities: ["task_control", "stop_task"],
|
|
382
|
-
turns: [
|
|
383
|
-
{ speaker: "user", text: "Make a small reminder dashboard." },
|
|
384
|
-
{ speaker: "user", text: "Stop, stop, stop doing what you're doing." },
|
|
385
|
-
],
|
|
386
|
-
doneWhen: [
|
|
387
|
-
"The task session is stopped.",
|
|
388
|
-
"The thread remains queryable afterward.",
|
|
389
|
-
],
|
|
390
|
-
evidence: ["task events", "task thread", "transcripts"],
|
|
391
|
-
}),
|
|
392
|
-
scenario({
|
|
393
|
-
id: "S003",
|
|
394
|
-
family: "pause_resume_stop",
|
|
395
|
-
profile: "core",
|
|
396
|
-
title: "pause when the user says the work is wrong",
|
|
397
|
-
summary: "The agent should stop pushing forward and ask for clarification when told the current direction is wrong.",
|
|
398
|
-
channels: ALL_CHANNELS,
|
|
399
|
-
requiredCapabilities: ["task_control", "clarification_after_pause"],
|
|
400
|
-
turns: [
|
|
401
|
-
{
|
|
402
|
-
speaker: "user",
|
|
403
|
-
text: "Build a mini stats panel for my current tasks.",
|
|
404
|
-
},
|
|
405
|
-
{ speaker: "user", text: "Hey wait, that's not right." },
|
|
406
|
-
],
|
|
407
|
-
doneWhen: [
|
|
408
|
-
"The task is paused or held.",
|
|
409
|
-
"The agent asks a clarifying follow-up instead of continuing blindly.",
|
|
410
|
-
],
|
|
411
|
-
evidence: ["task events", "trajectory records", "task thread status"],
|
|
412
|
-
}),
|
|
413
|
-
scenario({
|
|
414
|
-
id: "S004",
|
|
415
|
-
family: "pause_resume_stop",
|
|
416
|
-
profile: "core",
|
|
417
|
-
title: "resume with a specific correction",
|
|
418
|
-
summary: "The user corrects the plan after pausing and the agent resumes the same work item.",
|
|
419
|
-
channels: ALL_CHANNELS,
|
|
420
|
-
requiredCapabilities: ["task_control", "resume_task", "continue_task"],
|
|
421
|
-
turns: [
|
|
422
|
-
{ speaker: "user", text: "Build me a tiny status page." },
|
|
423
|
-
{ speaker: "user", text: "Pause that." },
|
|
424
|
-
{
|
|
425
|
-
speaker: "user",
|
|
426
|
-
text: "Okay continue, but make it text-only and minimal.",
|
|
427
|
-
},
|
|
428
|
-
],
|
|
429
|
-
doneWhen: [
|
|
430
|
-
"The original thread is resumed with new instructions.",
|
|
431
|
-
"Additional file or artifact updates occur after resume.",
|
|
432
|
-
],
|
|
433
|
-
evidence: ["task thread events", "changed files", "trajectory records"],
|
|
434
|
-
}),
|
|
435
|
-
scenario({
|
|
436
|
-
id: "H001",
|
|
437
|
-
family: "history_and_reporting",
|
|
438
|
-
profile: "smoke",
|
|
439
|
-
title: "what are you working on right now",
|
|
440
|
-
summary: "The user asks for active task status without wanting raw logs dumped into context.",
|
|
441
|
-
channels: ALL_CHANNELS,
|
|
442
|
-
requiredCapabilities: ["task_history", "active_status"],
|
|
443
|
-
turns: [{ speaker: "user", text: "What are you working on right now?" }],
|
|
444
|
-
doneWhen: [
|
|
445
|
-
"The answer comes from coordinator state or task history lookup.",
|
|
446
|
-
"The response summarizes active tasks without dumping huge raw transcripts.",
|
|
447
|
-
],
|
|
448
|
-
evidence: ["task history query", "trajectory records"],
|
|
449
|
-
}),
|
|
450
|
-
scenario({
|
|
451
|
-
id: "H002",
|
|
452
|
-
family: "history_and_reporting",
|
|
453
|
-
profile: "smoke",
|
|
454
|
-
title: "what tasks do you have going on",
|
|
455
|
-
summary: "The agent should enumerate ongoing tracked tasks and their states.",
|
|
456
|
-
channels: ALL_CHANNELS,
|
|
457
|
-
requiredCapabilities: ["task_history", "active_status"],
|
|
458
|
-
turns: [{ speaker: "user", text: "What tasks do you have going on?" }],
|
|
459
|
-
doneWhen: ["The response includes currently active or waiting threads."],
|
|
460
|
-
evidence: ["task history query", "task thread summaries"],
|
|
461
|
-
}),
|
|
462
|
-
scenario({
|
|
463
|
-
id: "H003",
|
|
464
|
-
family: "history_and_reporting",
|
|
465
|
-
profile: "core",
|
|
466
|
-
title: "show tasks from yesterday",
|
|
467
|
-
summary: "The user asks for prior work by time window.",
|
|
468
|
-
channels: ALL_CHANNELS,
|
|
469
|
-
requiredCapabilities: ["task_history", "time_window_lookup"],
|
|
470
|
-
turns: [
|
|
471
|
-
{ speaker: "user", text: "Can you show me what tasks we did yesterday?" },
|
|
472
|
-
],
|
|
473
|
-
doneWhen: [
|
|
474
|
-
"The response uses a date-window query over task history.",
|
|
475
|
-
"The result is time-bounded instead of a full-history dump.",
|
|
476
|
-
],
|
|
477
|
-
evidence: ["task history query", "trajectory records"],
|
|
478
|
-
}),
|
|
479
|
-
scenario({
|
|
480
|
-
id: "H004",
|
|
481
|
-
family: "history_and_reporting",
|
|
482
|
-
profile: "core",
|
|
483
|
-
title: "search last week by topic",
|
|
484
|
-
summary: "The user asks for a topical search over the previous week.",
|
|
485
|
-
channels: ALL_CHANNELS,
|
|
486
|
-
requiredCapabilities: [
|
|
487
|
-
"task_history",
|
|
488
|
-
"time_window_lookup",
|
|
489
|
-
"search_lookup",
|
|
490
|
-
],
|
|
491
|
-
turns: [
|
|
492
|
-
{
|
|
493
|
-
speaker: "user",
|
|
494
|
-
text: "In the last week, give me all tasks where we were working on the Discord connector.",
|
|
495
|
-
},
|
|
496
|
-
],
|
|
497
|
-
doneWhen: ["The response combines time window and topic filtering."],
|
|
498
|
-
evidence: ["task thread search", "task history query"],
|
|
499
|
-
}),
|
|
500
|
-
scenario({
|
|
501
|
-
id: "H005",
|
|
502
|
-
family: "history_and_reporting",
|
|
503
|
-
profile: "core",
|
|
504
|
-
title: "count task volume without polluting context",
|
|
505
|
-
summary: "The user asks for counts and expects a concise answer.",
|
|
506
|
-
channels: ALL_CHANNELS,
|
|
507
|
-
requiredCapabilities: ["task_history", "count_lookup"],
|
|
508
|
-
turns: [{ speaker: "user", text: "How many tasks have we done so far?" }],
|
|
509
|
-
doneWhen: [
|
|
510
|
-
"The answer is produced from durable state.",
|
|
511
|
-
"The response is concise and count-oriented.",
|
|
512
|
-
],
|
|
513
|
-
evidence: ["task history query", "db assertions"],
|
|
514
|
-
}),
|
|
515
|
-
scenario({
|
|
516
|
-
id: "H006",
|
|
517
|
-
family: "history_and_reporting",
|
|
518
|
-
profile: "full",
|
|
519
|
-
title: "explain why a task is blocked",
|
|
520
|
-
summary: "The user asks for status plus reason, not just a state label.",
|
|
521
|
-
channels: ALL_CHANNELS,
|
|
522
|
-
requiredCapabilities: ["task_history", "task_detail_lookup"],
|
|
523
|
-
turns: [{ speaker: "user", text: "Why is that task blocked?" }],
|
|
524
|
-
doneWhen: [
|
|
525
|
-
"The response identifies the relevant thread and summarizes the blocking reason.",
|
|
526
|
-
],
|
|
527
|
-
evidence: ["task detail lookup", "pending decisions", "task events"],
|
|
528
|
-
}),
|
|
529
|
-
scenario({
|
|
530
|
-
id: "R001",
|
|
531
|
-
family: "research_and_planning",
|
|
532
|
-
profile: "core",
|
|
533
|
-
title: "deep research with live provider backing",
|
|
534
|
-
summary: "The user asks for research that should run on live providers and produce a report artifact.",
|
|
535
|
-
channels: ALL_CHANNELS,
|
|
536
|
-
requiredCapabilities: [
|
|
537
|
-
"create_task",
|
|
538
|
-
"live_provider_execution",
|
|
539
|
-
"artifact_reporting",
|
|
540
|
-
],
|
|
541
|
-
turns: [
|
|
542
|
-
{
|
|
543
|
-
speaker: "user",
|
|
544
|
-
text: "Research the best current options for local-first connector observability and write me a recommendation.",
|
|
545
|
-
},
|
|
546
|
-
{
|
|
547
|
-
speaker: "user",
|
|
548
|
-
text: "Can you put the findings somewhere I can read them?",
|
|
549
|
-
},
|
|
550
|
-
],
|
|
551
|
-
doneWhen: [
|
|
552
|
-
"A research task runs on a live provider-backed framework.",
|
|
553
|
-
"A report artifact is attached to the thread.",
|
|
554
|
-
],
|
|
555
|
-
evidence: ["task artifacts", "trajectory records", "task thread"],
|
|
556
|
-
}),
|
|
557
|
-
scenario({
|
|
558
|
-
id: "R002",
|
|
559
|
-
family: "research_and_planning",
|
|
560
|
-
profile: "core",
|
|
561
|
-
title: "ask for a plan, then implicitly approve it",
|
|
562
|
-
summary: "The user asks for a plan and then gives a vague approval utterance.",
|
|
563
|
-
channels: ALL_CHANNELS,
|
|
564
|
-
requiredCapabilities: ["planning", "implicit_approval", "task_execution"],
|
|
565
|
-
turns: [
|
|
566
|
-
{
|
|
567
|
-
speaker: "user",
|
|
568
|
-
text: "Plan how you'd test the coordinator against lots of nuanced user requests.",
|
|
569
|
-
},
|
|
570
|
-
{ speaker: "user", text: "Yeah sounds good, do it." },
|
|
571
|
-
],
|
|
572
|
-
doneWhen: [
|
|
573
|
-
"The agent distinguishes planning from execution.",
|
|
574
|
-
"The approval starts execution rather than producing another plan.",
|
|
575
|
-
],
|
|
576
|
-
evidence: ["task thread", "trajectory records", "artifacts"],
|
|
577
|
-
}),
|
|
578
|
-
scenario({
|
|
579
|
-
id: "R003",
|
|
580
|
-
family: "research_and_planning",
|
|
581
|
-
profile: "full",
|
|
582
|
-
title: "parallel subtasks for research and synthesis",
|
|
583
|
-
summary: "The user asks for parallel work and expects coordinated output.",
|
|
584
|
-
channels: ALL_CHANNELS,
|
|
585
|
-
requiredCapabilities: ["multi_agent_coordination", "artifact_reporting"],
|
|
586
|
-
turns: [
|
|
587
|
-
{
|
|
588
|
-
speaker: "user",
|
|
589
|
-
text: "Split this into a few parallel task agents: one researches, one compares tradeoffs, one writes the summary.",
|
|
590
|
-
},
|
|
591
|
-
],
|
|
592
|
-
doneWhen: [
|
|
593
|
-
"Multiple sessions attach to one logical thread or clearly related threads.",
|
|
594
|
-
"A final synthesis artifact exists.",
|
|
595
|
-
],
|
|
596
|
-
evidence: ["multiple sessions", "task thread", "artifacts"],
|
|
597
|
-
}),
|
|
598
|
-
scenario({
|
|
599
|
-
id: "K001",
|
|
600
|
-
family: "connector_behavior",
|
|
601
|
-
profile: "smoke",
|
|
602
|
-
title: "respond appropriately from a Discord-origin message",
|
|
603
|
-
summary: "The same conversational request should run through Discord semantics and still create durable Eliza state.",
|
|
604
|
-
channels: ["discord"],
|
|
605
|
-
requiredCapabilities: ["connector_ingress", "task_execution"],
|
|
606
|
-
turns: [
|
|
607
|
-
{
|
|
608
|
-
speaker: "user",
|
|
609
|
-
text: "Build me a tiny page that says hi from Discord.",
|
|
610
|
-
},
|
|
611
|
-
{ speaker: "user", text: "Can I see it?" },
|
|
612
|
-
],
|
|
613
|
-
doneWhen: [
|
|
614
|
-
"The run is recorded as connector-originated.",
|
|
615
|
-
"A task thread and trajectories exist in Eliza.",
|
|
616
|
-
],
|
|
617
|
-
evidence: ["connector trajectory", "task thread", "artifacts"],
|
|
618
|
-
}),
|
|
619
|
-
scenario({
|
|
620
|
-
id: "K002",
|
|
621
|
-
family: "connector_behavior",
|
|
622
|
-
profile: "core",
|
|
623
|
-
title: "connector follow-up continues the same task",
|
|
624
|
-
summary: "A second turn on the same connector should continue existing work instead of starting over.",
|
|
625
|
-
channels: ["discord", "telegram", "slack"],
|
|
626
|
-
requiredCapabilities: ["connector_ingress", "continue_task"],
|
|
627
|
-
turns: [
|
|
628
|
-
{ speaker: "user", text: "Build a tiny countdown page." },
|
|
629
|
-
{ speaker: "user", text: "Now add a darker version too." },
|
|
630
|
-
],
|
|
631
|
-
doneWhen: ["The same connector session or thread is reused for follow-up."],
|
|
632
|
-
evidence: ["task thread", "connector trajectory", "changed files"],
|
|
633
|
-
}),
|
|
634
|
-
scenario({
|
|
635
|
-
id: "K003",
|
|
636
|
-
family: "connector_behavior",
|
|
637
|
-
profile: "full",
|
|
638
|
-
title: "connector permission failures are surfaced cleanly",
|
|
639
|
-
summary: "A channel with task-agent policy restrictions should return an explicit denial instead of failing silently.",
|
|
640
|
-
channels: ["discord"],
|
|
641
|
-
requiredCapabilities: ["connector_ingress", "policy_enforcement"],
|
|
642
|
-
turns: [
|
|
643
|
-
{
|
|
644
|
-
speaker: "user",
|
|
645
|
-
text: "Start a background task agent to make a little site for me.",
|
|
646
|
-
},
|
|
647
|
-
],
|
|
648
|
-
doneWhen: [
|
|
649
|
-
"If policy blocks the action, the denial is explicit and auditable.",
|
|
650
|
-
],
|
|
651
|
-
evidence: [
|
|
652
|
-
"task policy events",
|
|
653
|
-
"connector response",
|
|
654
|
-
"trajectory records",
|
|
655
|
-
],
|
|
656
|
-
}),
|
|
657
|
-
scenario({
|
|
658
|
-
id: "F001",
|
|
659
|
-
family: "recovery_and_failover",
|
|
660
|
-
profile: "core",
|
|
661
|
-
title: "framework failover after quota exhaustion",
|
|
662
|
-
summary: "The coordinator should continue work when one live framework is temporarily exhausted.",
|
|
663
|
-
channels: ALL_CHANNELS,
|
|
664
|
-
requiredCapabilities: ["framework_failover", "task_recovery"],
|
|
665
|
-
turns: [
|
|
666
|
-
{
|
|
667
|
-
speaker: "user",
|
|
668
|
-
text: "Take a deep pass on this problem and keep going even if one provider fails.",
|
|
669
|
-
},
|
|
670
|
-
],
|
|
671
|
-
doneWhen: [
|
|
672
|
-
"A failover event is recorded when the first framework becomes unavailable.",
|
|
673
|
-
"A replacement session continues the same task.",
|
|
674
|
-
],
|
|
675
|
-
evidence: ["task events", "sessions", "trajectory records"],
|
|
676
|
-
}),
|
|
677
|
-
scenario({
|
|
678
|
-
id: "F002",
|
|
679
|
-
family: "recovery_and_failover",
|
|
680
|
-
profile: "full",
|
|
681
|
-
title: "runtime restart leaves tasks auditable",
|
|
682
|
-
summary: "Interrupted tasks must still be queryable after restart.",
|
|
683
|
-
channels: ALL_CHANNELS,
|
|
684
|
-
requiredCapabilities: ["interrupt_recovery", "task_history"],
|
|
685
|
-
turns: [
|
|
686
|
-
{ speaker: "user", text: "What task was interrupted most recently?" },
|
|
687
|
-
],
|
|
688
|
-
doneWhen: ["Interrupted sessions are visible in history."],
|
|
689
|
-
evidence: ["task history query", "task status", "task events"],
|
|
690
|
-
}),
|
|
691
|
-
scenario({
|
|
692
|
-
id: "T001",
|
|
693
|
-
family: "task_management",
|
|
694
|
-
profile: "core",
|
|
695
|
-
title: "show the current task list and then drill into one item",
|
|
696
|
-
summary: "The user asks for a list and then asks for a specific item to be shown.",
|
|
697
|
-
channels: ALL_CHANNELS,
|
|
698
|
-
requiredCapabilities: ["task_history", "task_detail_lookup"],
|
|
699
|
-
turns: [
|
|
700
|
-
{ speaker: "user", text: "Show me the current task list." },
|
|
701
|
-
{ speaker: "user", text: "Open the most recent one." },
|
|
702
|
-
],
|
|
703
|
-
doneWhen: [
|
|
704
|
-
"The first turn returns a bounded summary.",
|
|
705
|
-
"The second returns thread-level detail without dumping raw terminal noise.",
|
|
706
|
-
],
|
|
707
|
-
evidence: ["task history query", "task detail lookup"],
|
|
708
|
-
}),
|
|
709
|
-
scenario({
|
|
710
|
-
id: "T002",
|
|
711
|
-
family: "task_management",
|
|
712
|
-
profile: "core",
|
|
713
|
-
title: "archive and reopen task history",
|
|
714
|
-
summary: "The user wants to clean up old work but still keep it recoverable.",
|
|
715
|
-
channels: ALL_CHANNELS,
|
|
716
|
-
requiredCapabilities: ["task_control", "archive_task", "reopen_task"],
|
|
717
|
-
turns: [
|
|
718
|
-
{ speaker: "user", text: "Archive that finished task." },
|
|
719
|
-
{ speaker: "user", text: "Actually reopen it." },
|
|
720
|
-
],
|
|
721
|
-
doneWhen: [
|
|
722
|
-
"The task thread is archived.",
|
|
723
|
-
"The same thread can be reopened.",
|
|
724
|
-
],
|
|
725
|
-
evidence: ["task thread status", "task events"],
|
|
726
|
-
}),
|
|
727
|
-
scenario({
|
|
728
|
-
id: "T003",
|
|
729
|
-
family: "task_management",
|
|
730
|
-
profile: "full",
|
|
731
|
-
title: "continue a paused task with specific guidance",
|
|
732
|
-
summary: "A paused task should preserve context and accept new instructions.",
|
|
733
|
-
channels: ALL_CHANNELS,
|
|
734
|
-
requiredCapabilities: ["task_control", "continue_task"],
|
|
735
|
-
turns: [
|
|
736
|
-
{ speaker: "user", text: "Pause that task for now." },
|
|
737
|
-
{
|
|
738
|
-
speaker: "user",
|
|
739
|
-
text: "Okay continue, but prioritize the mobile layout first.",
|
|
740
|
-
},
|
|
741
|
-
],
|
|
742
|
-
doneWhen: ["The same thread resumes with the new directive."],
|
|
743
|
-
evidence: ["task thread events", "changed files"],
|
|
744
|
-
}),
|
|
745
|
-
scenario({
|
|
746
|
-
id: "V001",
|
|
747
|
-
family: "visibility_and_audit",
|
|
748
|
-
profile: "smoke",
|
|
749
|
-
title: "every run emits trajectories and task-thread evidence",
|
|
750
|
-
summary: "The scenario system should be able to prove that chat, coordinator, and PTY evidence all exist.",
|
|
751
|
-
channels: ALL_CHANNELS,
|
|
752
|
-
requiredCapabilities: [
|
|
753
|
-
"trajectory_logging",
|
|
754
|
-
"task_thread_logging",
|
|
755
|
-
"artifact_logging",
|
|
756
|
-
],
|
|
757
|
-
turns: [
|
|
758
|
-
{
|
|
759
|
-
speaker: "user",
|
|
760
|
-
text: "Make a tiny file that says the eval is working.",
|
|
761
|
-
},
|
|
762
|
-
],
|
|
763
|
-
doneWhen: [
|
|
764
|
-
"At least one trajectory exists for the run.",
|
|
765
|
-
"A task thread exists when task work was required.",
|
|
766
|
-
"Artifacts or changed files are recorded.",
|
|
767
|
-
],
|
|
768
|
-
evidence: [
|
|
769
|
-
"trajectory records",
|
|
770
|
-
"task thread",
|
|
771
|
-
"artifacts",
|
|
772
|
-
"changed files",
|
|
773
|
-
],
|
|
774
|
-
}),
|
|
775
|
-
scenario({
|
|
776
|
-
id: "V002",
|
|
777
|
-
family: "visibility_and_audit",
|
|
778
|
-
profile: "core",
|
|
779
|
-
title: "export a run bundle for later inspection",
|
|
780
|
-
summary: "A completed scenario run should export a durable bundle of everything needed for review.",
|
|
781
|
-
channels: ALL_CHANNELS,
|
|
782
|
-
requiredCapabilities: [
|
|
783
|
-
"bundle_export",
|
|
784
|
-
"trajectory_export",
|
|
785
|
-
"report_generation",
|
|
786
|
-
],
|
|
787
|
-
turns: [
|
|
788
|
-
{ speaker: "user", text: "Run the scenario and save all the evidence." },
|
|
789
|
-
],
|
|
790
|
-
doneWhen: [
|
|
791
|
-
"A report bundle is written.",
|
|
792
|
-
"The bundle references trajectories, threads, artifacts, and changed files.",
|
|
793
|
-
],
|
|
794
|
-
evidence: ["bundle manifest", "trajectory export", "task-thread detail"],
|
|
795
|
-
}),
|
|
796
|
-
scenario({
|
|
797
|
-
id: "V003",
|
|
798
|
-
family: "visibility_and_audit",
|
|
799
|
-
profile: "full",
|
|
800
|
-
title: "group runs by scenario and batch identifiers",
|
|
801
|
-
summary: "The evaluator must be able to retrieve trajectories by scenario and batch.",
|
|
802
|
-
channels: ALL_CHANNELS,
|
|
803
|
-
requiredCapabilities: [
|
|
804
|
-
"scenario_tagging",
|
|
805
|
-
"batch_tagging",
|
|
806
|
-
"trajectory_export",
|
|
807
|
-
],
|
|
808
|
-
turns: [{ speaker: "user", text: "Run a tagged evaluation batch." }],
|
|
809
|
-
doneWhen: [
|
|
810
|
-
"Trajectory filters by scenario and batch return the expected runs.",
|
|
811
|
-
],
|
|
812
|
-
evidence: ["trajectory API filters", "report bundle"],
|
|
813
|
-
}),
|
|
814
|
-
scenario({
|
|
815
|
-
id: "B006",
|
|
816
|
-
family: "build_and_edit",
|
|
817
|
-
profile: "core",
|
|
818
|
-
title: "build a tiny CLI and revise its output format",
|
|
819
|
-
summary: "The user asks for a small command-line tool and then asks for a format change without restarting the task.",
|
|
820
|
-
channels: ALL_CHANNELS,
|
|
821
|
-
requiredCapabilities: ["create_task", "continue_task", "iterative_editing"],
|
|
822
|
-
turns: [
|
|
823
|
-
{
|
|
824
|
-
speaker: "user",
|
|
825
|
-
text: "Make me a tiny CLI that prints my birthday countdown in days.",
|
|
826
|
-
},
|
|
827
|
-
{
|
|
828
|
-
speaker: "user",
|
|
829
|
-
text: "Actually make the output JSON instead of plain text.",
|
|
830
|
-
},
|
|
831
|
-
],
|
|
832
|
-
doneWhen: [
|
|
833
|
-
"The CLI is created in files.",
|
|
834
|
-
"The output-format change updates the same task instead of spawning a new one.",
|
|
835
|
-
],
|
|
836
|
-
evidence: ["task thread", "changed files", "trajectory records"],
|
|
837
|
-
}),
|
|
838
|
-
scenario({
|
|
839
|
-
id: "B007",
|
|
840
|
-
family: "build_and_edit",
|
|
841
|
-
profile: "full",
|
|
842
|
-
title: "add a small feature in the same repo from prior context",
|
|
843
|
-
summary: "The user references the current repo implicitly and expects the agent to stay in that project.",
|
|
844
|
-
channels: ALL_CHANNELS,
|
|
845
|
-
requiredCapabilities: [
|
|
846
|
-
"repo_tasking",
|
|
847
|
-
"continue_task",
|
|
848
|
-
"worktree_artifacts",
|
|
849
|
-
],
|
|
850
|
-
turns: [
|
|
851
|
-
{
|
|
852
|
-
speaker: "user",
|
|
853
|
-
text: "In the same repo, add a small page that lists archived tasks too.",
|
|
854
|
-
},
|
|
855
|
-
{ speaker: "user", text: "Show me which files changed." },
|
|
856
|
-
],
|
|
857
|
-
doneWhen: [
|
|
858
|
-
"The repo context is reused correctly.",
|
|
859
|
-
"Changed files in the existing workspace are surfaced.",
|
|
860
|
-
],
|
|
861
|
-
evidence: ["task thread", "changed files", "artifacts"],
|
|
862
|
-
}),
|
|
863
|
-
scenario({
|
|
864
|
-
id: "C005",
|
|
865
|
-
family: "continuation",
|
|
866
|
-
profile: "core",
|
|
867
|
-
title: "treat keep-going language as continuation",
|
|
868
|
-
summary: "The user uses loose continuation language and expects the same task to keep moving.",
|
|
869
|
-
channels: ALL_CHANNELS,
|
|
870
|
-
requiredCapabilities: ["continue_task", "thread_lookup"],
|
|
871
|
-
turns: [
|
|
872
|
-
{ speaker: "user", text: "Make me a tiny habit tracker page." },
|
|
873
|
-
{ speaker: "user", text: "Okay keep going with that." },
|
|
874
|
-
{ speaker: "user", text: "Now make it feel a little more serious." },
|
|
875
|
-
],
|
|
876
|
-
doneWhen: ["All turns stay on one task thread."],
|
|
877
|
-
evidence: ["task thread updates", "changed files"],
|
|
878
|
-
}),
|
|
879
|
-
scenario({
|
|
880
|
-
id: "C006",
|
|
881
|
-
family: "continuation",
|
|
882
|
-
profile: "full",
|
|
883
|
-
title: "interpret same-project follow-up after a completed answer",
|
|
884
|
-
summary: "The user asks a new but related change after the agent already reported completion.",
|
|
885
|
-
channels: ALL_CHANNELS,
|
|
886
|
-
requiredCapabilities: ["continue_task", "thread_lookup", "repo_tasking"],
|
|
887
|
-
turns: [
|
|
888
|
-
{ speaker: "user", text: "Build me a tiny changelog viewer." },
|
|
889
|
-
{
|
|
890
|
-
speaker: "user",
|
|
891
|
-
text: "Cool, in that same project add a filter for only today.",
|
|
892
|
-
},
|
|
893
|
-
],
|
|
894
|
-
doneWhen: [
|
|
895
|
-
"The follow-up is attached to the prior work item or clearly linked task history.",
|
|
896
|
-
],
|
|
897
|
-
evidence: ["task thread", "changed files", "task events"],
|
|
898
|
-
}),
|
|
899
|
-
scenario({
|
|
900
|
-
id: "P005",
|
|
901
|
-
family: "preview_and_share",
|
|
902
|
-
profile: "core",
|
|
903
|
-
title: "surface the direct file path when that is the best view mechanism",
|
|
904
|
-
summary: "The user does not need a server link; the agent should provide the artifact itself.",
|
|
905
|
-
channels: ALL_CHANNELS,
|
|
906
|
-
requiredCapabilities: ["artifact_lookup", "preview_visibility"],
|
|
907
|
-
turns: [
|
|
908
|
-
{ speaker: "user", text: "Make me a little printable checklist page." },
|
|
909
|
-
{ speaker: "user", text: "Where is the actual file?" },
|
|
910
|
-
],
|
|
911
|
-
doneWhen: ["A direct artifact path or attachment route is returned."],
|
|
912
|
-
evidence: ["artifacts", "task thread", "trajectory records"],
|
|
913
|
-
}),
|
|
914
|
-
scenario({
|
|
915
|
-
id: "S005",
|
|
916
|
-
family: "pause_resume_stop",
|
|
917
|
-
profile: "core",
|
|
918
|
-
title: "pause a research task and then convert the pause into a new direction",
|
|
919
|
-
summary: "The user interrupts the current direction and redirects the same task after discussion.",
|
|
920
|
-
channels: ALL_CHANNELS,
|
|
921
|
-
requiredCapabilities: ["task_control", "pause_task", "continue_task"],
|
|
922
|
-
turns: [
|
|
923
|
-
{
|
|
924
|
-
speaker: "user",
|
|
925
|
-
text: "Research options for connector observability.",
|
|
926
|
-
},
|
|
927
|
-
{
|
|
928
|
-
speaker: "user",
|
|
929
|
-
text: "Hold that thought. I care more about logging and trajectories than dashboards.",
|
|
930
|
-
},
|
|
931
|
-
{ speaker: "user", text: "Okay continue with that new emphasis." },
|
|
932
|
-
],
|
|
933
|
-
doneWhen: [
|
|
934
|
-
"The task pauses for redirection.",
|
|
935
|
-
"The resumed work reflects the corrected emphasis.",
|
|
936
|
-
],
|
|
937
|
-
evidence: ["task events", "task thread status", "artifacts"],
|
|
938
|
-
}),
|
|
939
|
-
scenario({
|
|
940
|
-
id: "H007",
|
|
941
|
-
family: "history_and_reporting",
|
|
942
|
-
profile: "full",
|
|
943
|
-
title: "list blocked tasks from the last week",
|
|
944
|
-
summary: "The user wants a filtered operational view, not a raw dump.",
|
|
945
|
-
channels: ALL_CHANNELS,
|
|
946
|
-
requiredCapabilities: [
|
|
947
|
-
"task_history",
|
|
948
|
-
"time_window_lookup",
|
|
949
|
-
"search_lookup",
|
|
950
|
-
],
|
|
951
|
-
turns: [
|
|
952
|
-
{
|
|
953
|
-
speaker: "user",
|
|
954
|
-
text: "Show me every blocked task from the last week.",
|
|
955
|
-
},
|
|
956
|
-
],
|
|
957
|
-
doneWhen: [
|
|
958
|
-
"The answer is bounded to blocked tasks in the requested window.",
|
|
959
|
-
],
|
|
960
|
-
evidence: ["task history query", "task thread summaries"],
|
|
961
|
-
}),
|
|
962
|
-
scenario({
|
|
963
|
-
id: "H008",
|
|
964
|
-
family: "history_and_reporting",
|
|
965
|
-
profile: "full",
|
|
966
|
-
title: "search recent work by topic and completion state",
|
|
967
|
-
summary: "The user asks for recent finished work on a topic.",
|
|
968
|
-
channels: ALL_CHANNELS,
|
|
969
|
-
requiredCapabilities: [
|
|
970
|
-
"task_history",
|
|
971
|
-
"time_window_lookup",
|
|
972
|
-
"search_lookup",
|
|
973
|
-
],
|
|
974
|
-
turns: [
|
|
975
|
-
{
|
|
976
|
-
speaker: "user",
|
|
977
|
-
text: "What finished work did we do recently on calendar stuff?",
|
|
978
|
-
},
|
|
979
|
-
],
|
|
980
|
-
doneWhen: [
|
|
981
|
-
"The response combines topical search with status-aware filtering.",
|
|
982
|
-
],
|
|
983
|
-
evidence: ["task history query", "db assertions"],
|
|
984
|
-
}),
|
|
985
|
-
scenario({
|
|
986
|
-
id: "R004",
|
|
987
|
-
family: "research_and_planning",
|
|
988
|
-
profile: "full",
|
|
989
|
-
title: "research a comparison and deliver it in a structured artifact",
|
|
990
|
-
summary: "The user wants a comparative output that should result in a concrete written artifact.",
|
|
991
|
-
channels: ALL_CHANNELS,
|
|
992
|
-
requiredCapabilities: [
|
|
993
|
-
"create_task",
|
|
994
|
-
"live_provider_execution",
|
|
995
|
-
"artifact_reporting",
|
|
996
|
-
],
|
|
997
|
-
turns: [
|
|
998
|
-
{
|
|
999
|
-
speaker: "user",
|
|
1000
|
-
text: "Compare Codex and Claude for coordinator task work and put it in a small table I can read.",
|
|
1001
|
-
},
|
|
1002
|
-
{ speaker: "user", text: "Can you save that somewhere?" },
|
|
1003
|
-
],
|
|
1004
|
-
doneWhen: [
|
|
1005
|
-
"A research artifact is produced.",
|
|
1006
|
-
"The artifact is attached or discoverable through the thread.",
|
|
1007
|
-
],
|
|
1008
|
-
evidence: ["artifacts", "task thread", "trajectory records"],
|
|
1009
|
-
}),
|
|
1010
|
-
scenario({
|
|
1011
|
-
id: "K004",
|
|
1012
|
-
family: "connector_behavior",
|
|
1013
|
-
profile: "core",
|
|
1014
|
-
title: "WhatsApp-origin request continues across follow-up turns",
|
|
1015
|
-
summary: "The agent should preserve connector-origin context through multiple turns.",
|
|
1016
|
-
channels: ["whatsapp"],
|
|
1017
|
-
requiredCapabilities: ["connector_ingress", "continue_task"],
|
|
1018
|
-
turns: [
|
|
1019
|
-
{ speaker: "user", text: "Make me a tiny quote card." },
|
|
1020
|
-
{ speaker: "user", text: "Now give it a second style too." },
|
|
1021
|
-
{ speaker: "user", text: "Can I see both?" },
|
|
1022
|
-
],
|
|
1023
|
-
doneWhen: ["The WhatsApp-origin thread persists across follow-up turns."],
|
|
1024
|
-
evidence: ["connector trajectory", "task thread", "artifacts"],
|
|
1025
|
-
}),
|
|
1026
|
-
scenario({
|
|
1027
|
-
id: "K005",
|
|
1028
|
-
family: "connector_behavior",
|
|
1029
|
-
profile: "full",
|
|
1030
|
-
title: "Matrix-origin pause and resume flow",
|
|
1031
|
-
summary: "Connector-origin conversations should support interruption controls too.",
|
|
1032
|
-
channels: ["matrix"],
|
|
1033
|
-
requiredCapabilities: ["connector_ingress", "task_control", "resume_task"],
|
|
1034
|
-
turns: [
|
|
1035
|
-
{
|
|
1036
|
-
speaker: "user",
|
|
1037
|
-
text: "Build a tiny status card for my current tasks.",
|
|
1038
|
-
},
|
|
1039
|
-
{ speaker: "user", text: "Pause that." },
|
|
1040
|
-
{ speaker: "user", text: "Okay continue and add a recent history list." },
|
|
1041
|
-
],
|
|
1042
|
-
doneWhen: [
|
|
1043
|
-
"The task pauses and resumes within the same connector-origin flow.",
|
|
1044
|
-
],
|
|
1045
|
-
evidence: ["connector trajectory", "task events", "changed files"],
|
|
1046
|
-
}),
|
|
1047
|
-
scenario({
|
|
1048
|
-
id: "F003",
|
|
1049
|
-
family: "recovery_and_failover",
|
|
1050
|
-
profile: "full",
|
|
1051
|
-
title: "missing provider readiness is surfaced as a concrete failure reason",
|
|
1052
|
-
summary: "If a framework cannot run because auth or installation is missing, the failure should be explicit and auditable.",
|
|
1053
|
-
channels: ALL_CHANNELS,
|
|
1054
|
-
requiredCapabilities: ["framework_failover", "task_recovery"],
|
|
1055
|
-
turns: [
|
|
1056
|
-
{
|
|
1057
|
-
speaker: "user",
|
|
1058
|
-
text: "Use whichever task agent can actually run this and tell me clearly if one is unavailable.",
|
|
1059
|
-
},
|
|
1060
|
-
],
|
|
1061
|
-
doneWhen: [
|
|
1062
|
-
"Any framework readiness issue is surfaced explicitly in task evidence or response text.",
|
|
1063
|
-
],
|
|
1064
|
-
evidence: ["task events", "trajectory records", "task thread"],
|
|
1065
|
-
}),
|
|
1066
|
-
scenario({
|
|
1067
|
-
id: "T004",
|
|
1068
|
-
family: "task_management",
|
|
1069
|
-
profile: "full",
|
|
1070
|
-
title: "stop one task while leaving the rest alone",
|
|
1071
|
-
summary: "The user wants granular task control rather than a global stop.",
|
|
1072
|
-
channels: ALL_CHANNELS,
|
|
1073
|
-
requiredCapabilities: ["task_control", "task_detail_lookup", "stop_task"],
|
|
1074
|
-
turns: [
|
|
1075
|
-
{ speaker: "user", text: "Show me the current task list." },
|
|
1076
|
-
{
|
|
1077
|
-
speaker: "user",
|
|
1078
|
-
text: "Stop the most recent one, but leave the others running.",
|
|
1079
|
-
},
|
|
1080
|
-
],
|
|
1081
|
-
doneWhen: [
|
|
1082
|
-
"One task is interrupted or stopped without wiping the rest of the task list.",
|
|
1083
|
-
],
|
|
1084
|
-
evidence: ["task thread status", "task history query", "task events"],
|
|
1085
|
-
}),
|
|
1086
|
-
scenario({
|
|
1087
|
-
id: "V004",
|
|
1088
|
-
family: "visibility_and_audit",
|
|
1089
|
-
profile: "full",
|
|
1090
|
-
title: "task threads are retrievable by scenario and batch identifiers",
|
|
1091
|
-
summary: "The evaluator must be able to retrieve coordinator task state with the same scenario and batch tags used for trajectories.",
|
|
1092
|
-
channels: ALL_CHANNELS,
|
|
1093
|
-
requiredCapabilities: [
|
|
1094
|
-
"scenario_tagging",
|
|
1095
|
-
"batch_tagging",
|
|
1096
|
-
"task_thread_logging",
|
|
1097
|
-
],
|
|
1098
|
-
turns: [
|
|
1099
|
-
{
|
|
1100
|
-
speaker: "user",
|
|
1101
|
-
text: "Run this as a tagged coordinator evaluation and make sure the task history is grouped with it.",
|
|
1102
|
-
},
|
|
1103
|
-
],
|
|
1104
|
-
doneWhen: [
|
|
1105
|
-
"Task thread queries scoped by scenario and batch return the expected run.",
|
|
1106
|
-
],
|
|
1107
|
-
evidence: [
|
|
1108
|
-
"task thread query filters",
|
|
1109
|
-
"trajectory records",
|
|
1110
|
-
"bundle manifest",
|
|
1111
|
-
],
|
|
1112
|
-
}),
|
|
1113
|
-
];
|
|
1114
|
-
export const coordinatorScenarioById = new Map(coordinatorScenarios.map((item) => [item.id, item]));
|
|
1115
|
-
export function listCoordinatorScenarios(profile = "full") {
|
|
1116
|
-
if (profile === "full") {
|
|
1117
|
-
return coordinatorScenarios.slice();
|
|
1118
|
-
}
|
|
1119
|
-
const allowedProfiles = profile === "smoke"
|
|
1120
|
-
? new Set(["smoke"])
|
|
1121
|
-
: new Set(["smoke", "core"]);
|
|
1122
|
-
return coordinatorScenarios.filter((item) => allowedProfiles.has(item.profile));
|
|
1123
|
-
}
|
|
1124
|
-
export function countCoordinatorScenariosByFamily() {
|
|
1125
|
-
const counts = {
|
|
1126
|
-
build_and_edit: 0,
|
|
1127
|
-
continuation: 0,
|
|
1128
|
-
preview_and_share: 0,
|
|
1129
|
-
pause_resume_stop: 0,
|
|
1130
|
-
history_and_reporting: 0,
|
|
1131
|
-
research_and_planning: 0,
|
|
1132
|
-
connector_behavior: 0,
|
|
1133
|
-
recovery_and_failover: 0,
|
|
1134
|
-
task_management: 0,
|
|
1135
|
-
visibility_and_audit: 0,
|
|
1136
|
-
};
|
|
1137
|
-
for (const item of coordinatorScenarios) {
|
|
1138
|
-
counts[item.family] += 1;
|
|
1139
|
-
}
|
|
1140
|
-
return counts;
|
|
1141
|
-
}
|
|
1
|
+
export * from "@elizaos/app-task-coordinator/evals/coordinator-scenarios";
|