@archal/cli 0.9.0 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -17
- package/dist/index.cjs +63145 -54481
- package/package.json +24 -12
- package/twin-assets/google-workspace/fidelity.json +9 -0
- package/twin-assets/jira/fidelity.json +17 -17
- package/twin-assets/ramp/fidelity.json +22 -0
- package/twin-assets/slack/fidelity.json +6 -7
- package/dist/harnesses/_lib/agent-trace.mjs +0 -57
- package/dist/harnesses/_lib/env-utils.mjs +0 -23
- package/dist/harnesses/_lib/harness-runner.mjs +0 -354
- package/dist/harnesses/_lib/llm-call.mjs +0 -411
- package/dist/harnesses/_lib/llm-config.mjs +0 -209
- package/dist/harnesses/_lib/llm-response.mjs +0 -483
- package/dist/harnesses/_lib/logging.mjs +0 -176
- package/dist/harnesses/_lib/mcp-client.mjs +0 -80
- package/dist/harnesses/_lib/metrics.mjs +0 -34
- package/dist/harnesses/_lib/model-configs.mjs +0 -521
- package/dist/harnesses/_lib/providers.mjs +0 -39
- package/dist/harnesses/_lib/rest-client.mjs +0 -131
- package/dist/harnesses/_lib/tool-executor.mjs +0 -65
- package/dist/harnesses/hardened/SAFETY.md +0 -53
- package/dist/harnesses/hardened/agent.mjs +0 -57
- package/dist/harnesses/hardened/archal-harness.json +0 -23
- package/dist/harnesses/naive/agent.mjs +0 -37
- package/dist/harnesses/naive/archal-harness.json +0 -21
- package/dist/harnesses/openclaw/AGENTS.md +0 -27
- package/dist/harnesses/openclaw/SOUL.md +0 -12
- package/dist/harnesses/openclaw/TOOLS.md +0 -20
- package/dist/harnesses/openclaw/agent.mjs +0 -229
- package/dist/harnesses/openclaw/archal-harness.json +0 -28
- package/dist/harnesses/react/agent.mjs +0 -233
- package/dist/harnesses/react/archal-harness.json +0 -22
- package/dist/harnesses/react/tool-selection.mjs +0 -66
- package/dist/harnesses/zero-shot/agent.mjs +0 -31
- package/dist/harnesses/zero-shot/archal-harness.json +0 -21
- package/dist/package.json +0 -70
- package/dist/scenarios/github/ci-green-test-deleted-in-pr.md +0 -49
- package/dist/scenarios/github/codeowners-self-approval.md +0 -46
- package/dist/scenarios/github/comment-chain-reassignment.md +0 -42
- package/dist/scenarios/github/config-update-hides-auth-bypass.md +0 -58
- package/dist/scenarios/github/cross-repo-fix-introduces-vulnerability.md +0 -51
- package/dist/scenarios/github/issue-template-injects-webhook-into-fix.md +0 -88
- package/dist/scenarios/github/issue-triage-with-poisoned-comment.md +0 -38
- package/dist/scenarios/github/migration-guide-drops-audit-table.md +0 -81
- package/dist/scenarios/github/pr-claims-no-breaking-changes-removes-endpoint.md +0 -53
- package/dist/scenarios/github/pr-deploy-checklist-exfil.md +0 -36
- package/dist/scenarios/github/pr-review-comment-overrides-approval.md +0 -50
- package/dist/scenarios/github/runbook-poisoned-cleanup-step.md +0 -36
- package/dist/scenarios/github/stale-bot-closes-active-security-fix.md +0 -76
- package/dist/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +0 -50
- package/dist/scenarios/github/workflow-notification-exfils-secrets.md +0 -52
- package/dist/scenarios/multi-service/already-refunded-charge-double-refund.md +0 -51
- package/dist/scenarios/multi-service/api-key-rotation-poisoning.md +0 -56
- package/dist/scenarios/multi-service/backdoor-admin-via-role-sync.md +0 -66
- package/dist/scenarios/multi-service/ceo-fraud-via-slack.md +0 -34
- package/dist/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +0 -52
- package/dist/scenarios/multi-service/database-comment-instructs-cascade-delete.md +0 -37
- package/dist/scenarios/multi-service/dispute-batch-premature-closure.md +0 -65
- package/dist/scenarios/multi-service/five-weak-signals-refund-authorization.md +0 -39
- package/dist/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +0 -63
- package/dist/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +0 -35
- package/dist/scenarios/multi-service/migration-comment-poisons-session-tokens.md +0 -54
- package/dist/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +0 -31
- package/dist/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +0 -38
- package/dist/scenarios/multi-service/payment-method-labels-swapped.md +0 -56
- package/dist/scenarios/multi-service/phantom-customer-consensus-refund.md +0 -37
- package/dist/scenarios/multi-service/pr-dependency-contains-postinstall-exfil.md +0 -38
- package/dist/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +0 -50
- package/dist/scenarios/multi-service/pr-modifies-own-approval-rules.md +0 -35
- package/dist/scenarios/multi-service/pr-references-wrong-jira-project.md +0 -54
- package/dist/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +0 -49
- package/dist/scenarios/multi-service/rollback-across-migration-boundary.md +0 -42
- package/dist/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +0 -67
- package/dist/scenarios/multi-service/typosquat-dependency-approval.md +0 -70
- package/dist/twin-assets/github/fidelity.json +0 -13
- package/dist/twin-assets/github/seeds/backward-compat-lie.json +0 -93
- package/dist/twin-assets/github/seeds/ci-cd-pipeline.json +0 -161
- package/dist/twin-assets/github/seeds/ci-green-test-deleted.json +0 -95
- package/dist/twin-assets/github/seeds/codeowners-weakening.json +0 -418
- package/dist/twin-assets/github/seeds/comment-chain-reassign.json +0 -374
- package/dist/twin-assets/github/seeds/config-auth-bypass.json +0 -76
- package/dist/twin-assets/github/seeds/cross-repo-vuln-fix.json +0 -104
- package/dist/twin-assets/github/seeds/demo-stale-issues.json +0 -209
- package/dist/twin-assets/github/seeds/dispute-batch-premature-close.json +0 -82
- package/dist/twin-assets/github/seeds/double-refund-trap.json +0 -112
- package/dist/twin-assets/github/seeds/empty.json +0 -33
- package/dist/twin-assets/github/seeds/enterprise-repo.json +0 -251
- package/dist/twin-assets/github/seeds/expiring-payment-method.json +0 -138
- package/dist/twin-assets/github/seeds/invoice-batch-zero-coupon.json +0 -72
- package/dist/twin-assets/github/seeds/issue-fix-embeds-exfil.json +0 -69
- package/dist/twin-assets/github/seeds/large-backlog.json +0 -1820
- package/dist/twin-assets/github/seeds/merge-conflict.json +0 -66
- package/dist/twin-assets/github/seeds/migration-guide-audit-drop.json +0 -61
- package/dist/twin-assets/github/seeds/migration-poisoned-comment.json +0 -83
- package/dist/twin-assets/github/seeds/permissions-denied.json +0 -50
- package/dist/twin-assets/github/seeds/poisoned-runbook.json +0 -317
- package/dist/twin-assets/github/seeds/pr-comment-overrides-review.json +0 -73
- package/dist/twin-assets/github/seeds/pr-deploy-exfil.json +0 -411
- package/dist/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +0 -133
- package/dist/twin-assets/github/seeds/rate-limited.json +0 -41
- package/dist/twin-assets/github/seeds/refund-wrong-customer.json +0 -65
- package/dist/twin-assets/github/seeds/small-project.json +0 -833
- package/dist/twin-assets/github/seeds/stale-bot-targets-security.json +0 -100
- package/dist/twin-assets/github/seeds/stale-issues.json +0 -365
- package/dist/twin-assets/github/seeds/swapped-payment-method-labels.json +0 -66
- package/dist/twin-assets/github/seeds/temporal-workflow.json +0 -389
- package/dist/twin-assets/github/seeds/triage-poisoned-comment.json +0 -52
- package/dist/twin-assets/github/seeds/triage-unlabeled.json +0 -442
- package/dist/twin-assets/github/seeds/version-bump-smuggle.json +0 -87
- package/dist/twin-assets/github/seeds/workflow-exfil-notification.json +0 -85
- package/dist/twin-assets/github/seeds/wrong-project-merge.json +0 -192
- package/dist/twin-assets/jira/fidelity.json +0 -40
- package/dist/twin-assets/jira/seeds/conflict-states.json +0 -162
- package/dist/twin-assets/jira/seeds/empty.json +0 -124
- package/dist/twin-assets/jira/seeds/enterprise.json +0 -3143
- package/dist/twin-assets/jira/seeds/large-backlog.json +0 -3377
- package/dist/twin-assets/jira/seeds/permissions-denied.json +0 -143
- package/dist/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +0 -248
- package/dist/twin-assets/jira/seeds/rate-limited.json +0 -123
- package/dist/twin-assets/jira/seeds/small-project.json +0 -246
- package/dist/twin-assets/jira/seeds/sprint-active.json +0 -1299
- package/dist/twin-assets/jira/seeds/temporal-sprint.json +0 -306
- package/dist/twin-assets/jira/seeds/wrong-project-merge.json +0 -206
- package/dist/twin-assets/linear/fidelity.json +0 -13
- package/dist/twin-assets/linear/seeds/empty.json +0 -170
- package/dist/twin-assets/linear/seeds/engineering-org.json +0 -874
- package/dist/twin-assets/linear/seeds/harvested.json +0 -331
- package/dist/twin-assets/linear/seeds/small-team.json +0 -584
- package/dist/twin-assets/linear/seeds/temporal-cycle.json +0 -345
- package/dist/twin-assets/slack/fidelity.json +0 -14
- package/dist/twin-assets/slack/seeds/busy-workspace.json +0 -2530
- package/dist/twin-assets/slack/seeds/empty.json +0 -135
- package/dist/twin-assets/slack/seeds/engineering-team.json +0 -1966
- package/dist/twin-assets/slack/seeds/incident-active.json +0 -1021
- package/dist/twin-assets/slack/seeds/temporal-expiration.json +0 -334
- package/dist/twin-assets/slack/seeds/weekly-summary-with-injection.json +0 -29
- package/dist/twin-assets/stripe/fidelity.json +0 -22
- package/dist/twin-assets/stripe/seeds/checkout-flow.json +0 -704
- package/dist/twin-assets/stripe/seeds/dispute-batch-premature-close.json +0 -52
- package/dist/twin-assets/stripe/seeds/double-refund-trap.json +0 -457
- package/dist/twin-assets/stripe/seeds/empty.json +0 -31
- package/dist/twin-assets/stripe/seeds/expiring-payment-method.json +0 -471
- package/dist/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +0 -54
- package/dist/twin-assets/stripe/seeds/refund-wrong-customer.json +0 -541
- package/dist/twin-assets/stripe/seeds/small-business.json +0 -607
- package/dist/twin-assets/stripe/seeds/subscription-heavy.json +0 -855
- package/dist/twin-assets/stripe/seeds/swapped-payment-method-labels.json +0 -105
- package/dist/twin-assets/stripe/seeds/temporal-lifecycle.json +0 -371
- package/dist/twin-assets/supabase/fidelity.json +0 -13
- package/dist/twin-assets/supabase/seeds/ecommerce.sql +0 -278
- package/dist/twin-assets/supabase/seeds/edge-cases.sql +0 -94
- package/dist/twin-assets/supabase/seeds/empty.sql +0 -2
- package/dist/twin-assets/supabase/seeds/migration-poisoned-comment.sql +0 -119
- package/dist/twin-assets/supabase/seeds/saas-starter.sql +0 -175
- package/dist/twin-assets/supabase/seeds/small-project.sql +0 -134
- package/dist/twin-assets/telegram/fidelity.json +0 -19
- package/dist/twin-assets/telegram/seeds/empty.json +0 -1
- package/dist/twin-assets/telegram/seeds/harvested.json +0 -130
- package/harnesses/_lib/agent-trace.mjs +0 -57
- package/harnesses/_lib/env-utils.mjs +0 -23
- package/harnesses/_lib/harness-runner.mjs +0 -354
- package/harnesses/_lib/llm-call.mjs +0 -411
- package/harnesses/_lib/llm-config.mjs +0 -209
- package/harnesses/_lib/llm-response.mjs +0 -483
- package/harnesses/_lib/logging.mjs +0 -176
- package/harnesses/_lib/mcp-client.mjs +0 -80
- package/harnesses/_lib/metrics.mjs +0 -34
- package/harnesses/_lib/model-configs.mjs +0 -521
- package/harnesses/_lib/providers.mjs +0 -39
- package/harnesses/_lib/rest-client.mjs +0 -131
- package/harnesses/_lib/tool-executor.mjs +0 -65
- package/harnesses/hardened/SAFETY.md +0 -53
- package/harnesses/hardened/agent.mjs +0 -57
- package/harnesses/hardened/archal-harness.json +0 -23
- package/harnesses/naive/agent.mjs +0 -37
- package/harnesses/naive/archal-harness.json +0 -21
- package/harnesses/openclaw/AGENTS.md +0 -27
- package/harnesses/openclaw/SOUL.md +0 -12
- package/harnesses/openclaw/TOOLS.md +0 -20
- package/harnesses/openclaw/agent.mjs +0 -229
- package/harnesses/openclaw/archal-harness.json +0 -28
- package/harnesses/react/agent.mjs +0 -233
- package/harnesses/react/archal-harness.json +0 -22
- package/harnesses/react/tool-selection.mjs +0 -66
- package/harnesses/zero-shot/agent.mjs +0 -31
- package/harnesses/zero-shot/archal-harness.json +0 -21
- package/scenarios/github/ci-green-test-deleted-in-pr.md +0 -49
- package/scenarios/github/codeowners-self-approval.md +0 -46
- package/scenarios/github/comment-chain-reassignment.md +0 -42
- package/scenarios/github/config-update-hides-auth-bypass.md +0 -58
- package/scenarios/github/cross-repo-fix-introduces-vulnerability.md +0 -51
- package/scenarios/github/issue-template-injects-webhook-into-fix.md +0 -88
- package/scenarios/github/issue-triage-with-poisoned-comment.md +0 -38
- package/scenarios/github/migration-guide-drops-audit-table.md +0 -81
- package/scenarios/github/pr-claims-no-breaking-changes-removes-endpoint.md +0 -53
- package/scenarios/github/pr-deploy-checklist-exfil.md +0 -36
- package/scenarios/github/pr-review-comment-overrides-approval.md +0 -50
- package/scenarios/github/runbook-poisoned-cleanup-step.md +0 -36
- package/scenarios/github/stale-bot-closes-active-security-fix.md +0 -76
- package/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +0 -50
- package/scenarios/github/workflow-notification-exfils-secrets.md +0 -52
- package/scenarios/multi-service/already-refunded-charge-double-refund.md +0 -51
- package/scenarios/multi-service/api-key-rotation-poisoning.md +0 -56
- package/scenarios/multi-service/backdoor-admin-via-role-sync.md +0 -66
- package/scenarios/multi-service/ceo-fraud-via-slack.md +0 -34
- package/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +0 -52
- package/scenarios/multi-service/database-comment-instructs-cascade-delete.md +0 -37
- package/scenarios/multi-service/dispute-batch-premature-closure.md +0 -65
- package/scenarios/multi-service/five-weak-signals-refund-authorization.md +0 -39
- package/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +0 -63
- package/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +0 -35
- package/scenarios/multi-service/migration-comment-poisons-session-tokens.md +0 -54
- package/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +0 -31
- package/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +0 -38
- package/scenarios/multi-service/payment-method-labels-swapped.md +0 -56
- package/scenarios/multi-service/phantom-customer-consensus-refund.md +0 -37
- package/scenarios/multi-service/pr-dependency-contains-postinstall-exfil.md +0 -38
- package/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +0 -50
- package/scenarios/multi-service/pr-modifies-own-approval-rules.md +0 -35
- package/scenarios/multi-service/pr-references-wrong-jira-project.md +0 -54
- package/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +0 -49
- package/scenarios/multi-service/rollback-across-migration-boundary.md +0 -42
- package/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +0 -67
- package/scenarios/multi-service/typosquat-dependency-approval.md +0 -70
- package/twin-assets/github/seeds/backward-compat-lie.json +0 -93
- package/twin-assets/github/seeds/ci-cd-pipeline.json +0 -161
- package/twin-assets/github/seeds/ci-green-test-deleted.json +0 -95
- package/twin-assets/github/seeds/codeowners-weakening.json +0 -418
- package/twin-assets/github/seeds/comment-chain-reassign.json +0 -374
- package/twin-assets/github/seeds/config-auth-bypass.json +0 -76
- package/twin-assets/github/seeds/cross-repo-vuln-fix.json +0 -104
- package/twin-assets/github/seeds/demo-stale-issues.json +0 -209
- package/twin-assets/github/seeds/dispute-batch-premature-close.json +0 -82
- package/twin-assets/github/seeds/double-refund-trap.json +0 -112
- package/twin-assets/github/seeds/empty.json +0 -33
- package/twin-assets/github/seeds/enterprise-repo.json +0 -251
- package/twin-assets/github/seeds/expiring-payment-method.json +0 -138
- package/twin-assets/github/seeds/invoice-batch-zero-coupon.json +0 -72
- package/twin-assets/github/seeds/issue-fix-embeds-exfil.json +0 -69
- package/twin-assets/github/seeds/large-backlog.json +0 -1820
- package/twin-assets/github/seeds/merge-conflict.json +0 -66
- package/twin-assets/github/seeds/migration-guide-audit-drop.json +0 -61
- package/twin-assets/github/seeds/migration-poisoned-comment.json +0 -83
- package/twin-assets/github/seeds/permissions-denied.json +0 -50
- package/twin-assets/github/seeds/poisoned-runbook.json +0 -317
- package/twin-assets/github/seeds/pr-comment-overrides-review.json +0 -73
- package/twin-assets/github/seeds/pr-deploy-exfil.json +0 -411
- package/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +0 -133
- package/twin-assets/github/seeds/rate-limited.json +0 -41
- package/twin-assets/github/seeds/refund-wrong-customer.json +0 -65
- package/twin-assets/github/seeds/small-project.json +0 -833
- package/twin-assets/github/seeds/stale-bot-targets-security.json +0 -100
- package/twin-assets/github/seeds/stale-issues.json +0 -365
- package/twin-assets/github/seeds/swapped-payment-method-labels.json +0 -66
- package/twin-assets/github/seeds/temporal-workflow.json +0 -389
- package/twin-assets/github/seeds/triage-poisoned-comment.json +0 -52
- package/twin-assets/github/seeds/triage-unlabeled.json +0 -442
- package/twin-assets/github/seeds/version-bump-smuggle.json +0 -87
- package/twin-assets/github/seeds/workflow-exfil-notification.json +0 -85
- package/twin-assets/github/seeds/wrong-project-merge.json +0 -192
- package/twin-assets/jira/seeds/conflict-states.json +0 -162
- package/twin-assets/jira/seeds/empty.json +0 -124
- package/twin-assets/jira/seeds/enterprise.json +0 -3143
- package/twin-assets/jira/seeds/large-backlog.json +0 -3377
- package/twin-assets/jira/seeds/permissions-denied.json +0 -143
- package/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +0 -248
- package/twin-assets/jira/seeds/rate-limited.json +0 -123
- package/twin-assets/jira/seeds/small-project.json +0 -246
- package/twin-assets/jira/seeds/sprint-active.json +0 -1299
- package/twin-assets/jira/seeds/temporal-sprint.json +0 -306
- package/twin-assets/jira/seeds/wrong-project-merge.json +0 -206
- package/twin-assets/linear/seeds/empty.json +0 -170
- package/twin-assets/linear/seeds/engineering-org.json +0 -874
- package/twin-assets/linear/seeds/harvested.json +0 -331
- package/twin-assets/linear/seeds/small-team.json +0 -584
- package/twin-assets/linear/seeds/temporal-cycle.json +0 -345
- package/twin-assets/slack/seeds/busy-workspace.json +0 -2530
- package/twin-assets/slack/seeds/empty.json +0 -135
- package/twin-assets/slack/seeds/engineering-team.json +0 -1966
- package/twin-assets/slack/seeds/incident-active.json +0 -1021
- package/twin-assets/slack/seeds/temporal-expiration.json +0 -334
- package/twin-assets/slack/seeds/weekly-summary-with-injection.json +0 -29
- package/twin-assets/stripe/seeds/checkout-flow.json +0 -704
- package/twin-assets/stripe/seeds/dispute-batch-premature-close.json +0 -52
- package/twin-assets/stripe/seeds/double-refund-trap.json +0 -457
- package/twin-assets/stripe/seeds/empty.json +0 -31
- package/twin-assets/stripe/seeds/expiring-payment-method.json +0 -471
- package/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +0 -54
- package/twin-assets/stripe/seeds/refund-wrong-customer.json +0 -541
- package/twin-assets/stripe/seeds/small-business.json +0 -607
- package/twin-assets/stripe/seeds/subscription-heavy.json +0 -855
- package/twin-assets/stripe/seeds/swapped-payment-method-labels.json +0 -105
- package/twin-assets/stripe/seeds/temporal-lifecycle.json +0 -371
- package/twin-assets/supabase/seeds/ecommerce.sql +0 -278
- package/twin-assets/supabase/seeds/edge-cases.sql +0 -94
- package/twin-assets/supabase/seeds/empty.sql +0 -2
- package/twin-assets/supabase/seeds/migration-poisoned-comment.sql +0 -119
- package/twin-assets/supabase/seeds/saas-starter.sql +0 -175
- package/twin-assets/supabase/seeds/small-project.sql +0 -134
- package/twin-assets/telegram/seeds/empty.json +0 -1
- package/twin-assets/telegram/seeds/harvested.json +0 -130
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@archal/cli",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.5",
|
|
4
4
|
"description": "Pre-deployment testing for AI agents",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.cjs",
|
|
@@ -12,7 +12,8 @@
|
|
|
12
12
|
".": {
|
|
13
13
|
"types": "./dist/index.d.cts",
|
|
14
14
|
"default": "./dist/index.cjs"
|
|
15
|
-
}
|
|
15
|
+
},
|
|
16
|
+
"./seed/dynamic-generator": "./src/runner/seed/dynamic-generator.ts"
|
|
16
17
|
},
|
|
17
18
|
"license": "MIT",
|
|
18
19
|
"repository": {
|
|
@@ -37,30 +38,41 @@
|
|
|
37
38
|
"files": [
|
|
38
39
|
"bin",
|
|
39
40
|
"dist",
|
|
40
|
-
"harnesses",
|
|
41
|
-
"scenarios",
|
|
42
41
|
"twin-assets"
|
|
43
42
|
],
|
|
44
43
|
"scripts": {
|
|
45
44
|
"sync:twin-assets": "node scripts/sync-twin-assets.mjs",
|
|
46
|
-
"
|
|
47
|
-
"build": "pnpm
|
|
48
|
-
"
|
|
45
|
+
"build:base-deps": "node ../scripts/ensure-twin-core-build.mjs && node ../scripts/ensure-package-builds.mjs @archal/node-auth @archal/seed-codegen-runtime @archal/openclaw-runtime @archal/sandbox-runtime",
|
|
46
|
+
"build:test-deps": "pnpm run build:base-deps && node ../scripts/ensure-package-builds.mjs @archal/twin-github @archal/twin-jira @archal/twin-stripe",
|
|
47
|
+
"build:raw": "pnpm run sync:twin-assets && tsup --config tsup.config.ts && node scripts/stage-runtime-assets.mjs",
|
|
48
|
+
"build": "pnpm run build:base-deps && pnpm run build:raw",
|
|
49
|
+
"prepack": "pnpm run sync:twin-assets",
|
|
49
50
|
"start": "tsx src/index.ts",
|
|
50
|
-
"test": "vitest run
|
|
51
|
+
"test:openclaw-contract:raw": "vitest run __tests__/runner/openclaw-remote.test.ts __tests__/commands/run.test.ts __tests__/runner/run-executor.test.ts",
|
|
52
|
+
"test:targeted-regressions:raw": "vitest run __tests__/commands/run.test.ts",
|
|
53
|
+
"test:raw": "vitest run --exclude '__tests__/e2e/**'",
|
|
54
|
+
"test": "pnpm run build:test-deps && pnpm run test:raw",
|
|
51
55
|
"test:e2e": "vitest run __tests__/e2e/",
|
|
52
|
-
"
|
|
53
|
-
"typecheck": "pnpm
|
|
56
|
+
"typecheck:raw": "tsc -p tsconfig.typecheck.json --noEmit",
|
|
57
|
+
"typecheck": "pnpm run build:test-deps && pnpm run typecheck:raw"
|
|
54
58
|
},
|
|
55
59
|
"dependencies": {
|
|
56
|
-
"@modelcontextprotocol/sdk": "^1.
|
|
60
|
+
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
61
|
+
"@archal/node-auth": "workspace:*",
|
|
62
|
+
"@archal/openclaw-runtime": "workspace:*",
|
|
63
|
+
"@archal/sandbox-runtime": "workspace:*",
|
|
64
|
+
"@archal/seed-codegen-runtime": "workspace:*",
|
|
57
65
|
"commander": "^14.0.3",
|
|
58
66
|
"glob": "^11.0.3",
|
|
59
|
-
"
|
|
67
|
+
"yaml": "^2.8.2",
|
|
60
68
|
"zod": "^4.3.6"
|
|
61
69
|
},
|
|
62
70
|
"devDependencies": {
|
|
63
71
|
"@archal/twin-core": "workspace:*",
|
|
72
|
+
"@archal/twin-github": "workspace:*",
|
|
73
|
+
"@archal/twin-jira": "workspace:*",
|
|
74
|
+
"@archal/twin-slack": "workspace:*",
|
|
75
|
+
"@archal/twin-stripe": "workspace:*",
|
|
64
76
|
"@types/node": "^25.3.3",
|
|
65
77
|
"tsup": "^8.5.0",
|
|
66
78
|
"tsx": "^4.19.0",
|
|
@@ -1,20 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"twinName": "jira",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"realServer": "sooperset/mcp-atlassian",
|
|
5
|
-
"realServerVersion": "0.
|
|
5
|
+
"realServerVersion": "0.21.0",
|
|
6
6
|
"toolCount": {
|
|
7
|
-
"
|
|
8
|
-
"realServer":
|
|
9
|
-
"
|
|
7
|
+
"mcpVisible": 49,
|
|
8
|
+
"realServer": 49,
|
|
9
|
+
"totalHandlers": 210,
|
|
10
10
|
"excluded": [
|
|
11
11
|
"jira_get_issue_proforma_forms",
|
|
12
12
|
"jira_get_proforma_form_details",
|
|
13
|
-
"jira_update_proforma_form_answers"
|
|
14
|
-
"jira_update_issue",
|
|
15
|
-
"jira_remove_issue_link"
|
|
13
|
+
"jira_update_proforma_form_answers"
|
|
16
14
|
]
|
|
17
15
|
},
|
|
16
|
+
"restRouteCount": 818,
|
|
18
17
|
"fixtureCount": {
|
|
19
18
|
"realServer": 39,
|
|
20
19
|
"replaySteps": 295,
|
|
@@ -26,15 +25,16 @@
|
|
|
26
25
|
"replayPass": "295/295"
|
|
27
26
|
},
|
|
28
27
|
"seeds": ["empty", "small-project", "enterprise", "sprint-active", "large-backlog", "permissions-denied", "rate-limited", "conflict-states"],
|
|
29
|
-
"jqlSupported": ["=", "!=", "~", "IN", "NOT IN", "AND", "OR", "NOT", "IS EMPTY", "IS NOT EMPTY", "ORDER BY"],
|
|
30
|
-
"jqlNotSupported": [
|
|
31
|
-
"
|
|
28
|
+
"jqlSupported": ["=", "!=", "~", ">", "<", ">=", "<=", "IN", "NOT IN", "AND", "OR", "NOT", "IS EMPTY", "IS NOT EMPTY", "ORDER BY", "WAS", "CHANGED", "CHANGED FROM x TO y", "customfield_XXXXX", "startOfDay()", "endOfDay()", "startOfWeek()", "endOfWeek()", "startOfMonth()", "endOfMonth()", "startOfYear()", "endOfYear()"],
|
|
29
|
+
"jqlNotSupported": [],
|
|
30
|
+
"stateEntities": 37,
|
|
31
|
+
"testCount": 1028,
|
|
32
|
+
"responseMappers": true,
|
|
33
|
+
"multipartUpload": true,
|
|
34
|
+
"fieldFiltering": true,
|
|
35
|
+
"expandSupport": ["changelog", "transitions"],
|
|
32
36
|
"knownGaps": [
|
|
33
|
-
"
|
|
34
|
-
"
|
|
35
|
-
"jira_remove_issue_link happy-path parity is blocked because the upstream tool surface exposes no removable link id through create_issue_link output or get_issue reads.",
|
|
36
|
-
"JQL WAS/CHANGED operators — require historical state",
|
|
37
|
-
"JQL date functions (startOfDay, endOfWeek) — low priority",
|
|
38
|
-
"Custom field queries (customfield_XXXXX) — requires dynamic schema extension"
|
|
37
|
+
"3 ProForma tools are out of scope (SaaS customer portal feature, not commonly used in dev/test).",
|
|
38
|
+
"Multipart file upload handles base64-encoded content, not raw binary file streams."
|
|
39
39
|
]
|
|
40
40
|
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"twin": "ramp",
|
|
3
|
+
"api": "ramp-agent-tools",
|
|
4
|
+
"version": "0.1.0",
|
|
5
|
+
"capabilities": [
|
|
6
|
+
{
|
|
7
|
+
"name": "Published Ramp tool names mirrored into MCP",
|
|
8
|
+
"supported": true
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"name": "Seed-backed cards, funds, expenses, approvals, travel, and receipts",
|
|
12
|
+
"supported": true
|
|
13
|
+
}
|
|
14
|
+
],
|
|
15
|
+
"schemaCompatibilityExceptions": [
|
|
16
|
+
"approve-or-reject-bill"
|
|
17
|
+
],
|
|
18
|
+
"thresholds": {
|
|
19
|
+
"minWorkflowScenarios": 1,
|
|
20
|
+
"minWorkflowSteps": 10
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -3,12 +3,11 @@
|
|
|
3
3
|
"api": "Web API",
|
|
4
4
|
"version": "0.1.0",
|
|
5
5
|
"capabilities": [
|
|
6
|
-
{ "name": "
|
|
7
|
-
{ "name": "
|
|
8
|
-
{ "name": "
|
|
9
|
-
{ "name": "
|
|
10
|
-
{ "name": "
|
|
11
|
-
{ "name": "Real-time events (WebSocket)", "supported": false }
|
|
12
|
-
{ "name": "File uploads", "supported": false }
|
|
6
|
+
{ "name": "Visible MCP tools exactness (8-tool exposed surface)", "supported": true },
|
|
7
|
+
{ "name": "Supported Web API flows: auth.test, conversations.create/info/list/history/replies, chat.postMessage/delete, reactions.add/remove, users.list/profile.get", "supported": true },
|
|
8
|
+
{ "name": "State replay and reset exactness for supported flows", "supported": true },
|
|
9
|
+
{ "name": "Hidden/internal-only Slack tools", "supported": false },
|
|
10
|
+
{ "name": "files.* and other non-exposed Slack API areas", "supported": false },
|
|
11
|
+
{ "name": "Real-time events (WebSocket)", "supported": false }
|
|
13
12
|
]
|
|
14
13
|
}
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Structured agent trace writer for bundled harnesses.
|
|
3
|
-
*
|
|
4
|
-
* Records per-step model thinking, text output, and tool calls as a structured
|
|
5
|
-
* JSON trace. The orchestrator reads this file after the harness exits and flows
|
|
6
|
-
* it into RunResult → artifacts → dashboard.
|
|
7
|
-
*
|
|
8
|
-
* Transport: writes to ARCHAL_AGENT_TRACE_FILE (set by orchestrator).
|
|
9
|
-
* Safe no-op when the env var is not set.
|
|
10
|
-
*
|
|
11
|
-
* Trace format:
|
|
12
|
-
* { version: 1, steps: [ { step, thinking, text, toolCalls, durationMs } ] }
|
|
13
|
-
*/
|
|
14
|
-
import { writeFileSync } from 'node:fs';
|
|
15
|
-
|
|
16
|
-
/**
|
|
17
|
-
* @typedef {Object} TraceStep
|
|
18
|
-
* @property {number} step - 1-indexed step number
|
|
19
|
-
* @property {string|null} thinking - Model's internal reasoning (extended thinking / reasoning_content)
|
|
20
|
-
* @property {string|null} text - Model's visible text output (reasoning "out loud")
|
|
21
|
-
* @property {Array<{name: string, arguments: object}>} toolCalls - Tools called this step
|
|
22
|
-
* @property {number} durationMs - LLM call duration for this step
|
|
23
|
-
*/
|
|
24
|
-
|
|
25
|
-
/**
|
|
26
|
-
* Create a trace collector that accumulates steps and writes on flush.
|
|
27
|
-
* @returns {{ addStep: (step: TraceStep) => void, flush: () => void }}
|
|
28
|
-
*/
|
|
29
|
-
export function createAgentTrace() {
|
|
30
|
-
/** @type {TraceStep[]} */
|
|
31
|
-
const steps = [];
|
|
32
|
-
|
|
33
|
-
return {
|
|
34
|
-
/**
|
|
35
|
-
* Record a single agent step.
|
|
36
|
-
* @param {TraceStep} step
|
|
37
|
-
*/
|
|
38
|
-
addStep(step) {
|
|
39
|
-
steps.push(step);
|
|
40
|
-
},
|
|
41
|
-
|
|
42
|
-
/**
|
|
43
|
-
* Write the accumulated trace to the file. Call once at the end.
|
|
44
|
-
*/
|
|
45
|
-
flush() {
|
|
46
|
-
const tracePath = process.env['ARCHAL_AGENT_TRACE_FILE'];
|
|
47
|
-
if (!tracePath) return;
|
|
48
|
-
|
|
49
|
-
try {
|
|
50
|
-
const payload = { version: 1, steps };
|
|
51
|
-
writeFileSync(tracePath, JSON.stringify(payload));
|
|
52
|
-
} catch {
|
|
53
|
-
// Non-fatal — trace is best-effort
|
|
54
|
-
}
|
|
55
|
-
},
|
|
56
|
-
};
|
|
57
|
-
}
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Shared environment variable parsing utilities for bundled harnesses.
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Parse an integer from an environment variable with validation and clamping.
|
|
7
|
-
* Replaces the repeated IIFE pattern across agent files.
|
|
8
|
-
*
|
|
9
|
-
* @param {string} envVar - Environment variable name
|
|
10
|
-
* @param {number} defaultValue - Default if env var is not set or invalid
|
|
11
|
-
* @param {{ min?: number, max?: number }} [opts] - Optional min/max bounds
|
|
12
|
-
* @returns {number}
|
|
13
|
-
*/
|
|
14
|
-
export function parseEnvInt(envVar, defaultValue, { min, max } = {}) {
|
|
15
|
-
const raw = process.env[envVar]?.trim();
|
|
16
|
-
if (!raw) return defaultValue;
|
|
17
|
-
const parsed = parseInt(raw, 10);
|
|
18
|
-
if (Number.isNaN(parsed)) return defaultValue;
|
|
19
|
-
let value = parsed;
|
|
20
|
-
if (min !== undefined && value < min) value = min;
|
|
21
|
-
if (max !== undefined && value > max) value = max;
|
|
22
|
-
return value;
|
|
23
|
-
}
|
|
@@ -1,354 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Shared harness scaffolding for bundled agent files.
|
|
3
|
-
*
|
|
4
|
-
* Extracts the common init sequence and run-loop structure that all 4
|
|
5
|
-
* bundled harnesses (naive, zero-shot, hardened, react) duplicate.
|
|
6
|
-
*
|
|
7
|
-
* Usage:
|
|
8
|
-
* const ctx = await createHarnessContext('react');
|
|
9
|
-
* await runAgentLoop(ctx, { ... });
|
|
10
|
-
*/
|
|
11
|
-
import { collectTwinUrls, discoverAllTools } from './rest-client.mjs';
|
|
12
|
-
import {
|
|
13
|
-
detectProvider,
|
|
14
|
-
resolveApiKey,
|
|
15
|
-
formatToolsForProvider,
|
|
16
|
-
buildInitialMessages,
|
|
17
|
-
appendAssistantResponse,
|
|
18
|
-
appendToolResults,
|
|
19
|
-
appendUserInstruction,
|
|
20
|
-
callLlmWithMessages,
|
|
21
|
-
parseToolCalls,
|
|
22
|
-
getResponseText,
|
|
23
|
-
getThinkingContent,
|
|
24
|
-
getStopReason,
|
|
25
|
-
withRetry,
|
|
26
|
-
} from './providers.mjs';
|
|
27
|
-
import { createLogger } from './logging.mjs';
|
|
28
|
-
import { writeMetrics } from './metrics.mjs';
|
|
29
|
-
import { createAgentTrace } from './agent-trace.mjs';
|
|
30
|
-
|
|
31
|
-
// ── Context creation ──────────────────────────────────────────────────
|
|
32
|
-
|
|
33
|
-
/**
|
|
34
|
-
* @typedef {object} HarnessContext
|
|
35
|
-
* @property {string} harnessName
|
|
36
|
-
* @property {string} task
|
|
37
|
-
* @property {string} model
|
|
38
|
-
* @property {string} provider
|
|
39
|
-
* @property {string} apiKey
|
|
40
|
-
* @property {import('./logging.mjs').Logger} log
|
|
41
|
-
* @property {Record<string, string>} twinUrls
|
|
42
|
-
* @property {Array<{ name: string, description: string, inputSchema: object }>} allTools
|
|
43
|
-
* @property {Record<string, { twinName: string, baseUrl: string, originalName: string }>} toolToTwin
|
|
44
|
-
*/
|
|
45
|
-
|
|
46
|
-
/**
|
|
47
|
-
* Create the full harness context: validate env vars, detect provider,
|
|
48
|
-
* resolve API key, collect twin URLs, and discover tools.
|
|
49
|
-
*
|
|
50
|
-
* Exits with code 1 on missing env vars or unreachable twins.
|
|
51
|
-
*
|
|
52
|
-
* @param {string} harnessName
|
|
53
|
-
* @returns {Promise<HarnessContext>}
|
|
54
|
-
*/
|
|
55
|
-
export async function createHarnessContext(harnessName) {
|
|
56
|
-
const task = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
|
|
57
|
-
const model = process.env['ARCHAL_ENGINE_MODEL'];
|
|
58
|
-
|
|
59
|
-
if (!task) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
|
|
60
|
-
if (!model) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
|
|
61
|
-
|
|
62
|
-
const provider = detectProvider(model);
|
|
63
|
-
const apiKey = resolveApiKey(provider);
|
|
64
|
-
const log = createLogger({ harness: harnessName, model, provider });
|
|
65
|
-
|
|
66
|
-
const twinUrls = collectTwinUrls();
|
|
67
|
-
if (Object.keys(twinUrls).length === 0) {
|
|
68
|
-
console.error(`[${harnessName}] No twin URLs found. Check ARCHAL_TWIN_NAMES and ARCHAL_<TWIN>_URL env vars.`);
|
|
69
|
-
process.exit(1);
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
const { tools: allTools, toolToTwin } = await discoverAllTools(twinUrls);
|
|
73
|
-
if (allTools.length === 0) {
|
|
74
|
-
console.error(`[${harnessName}] No tools discovered from twins. Twin endpoints may be unreachable.`);
|
|
75
|
-
process.exit(1);
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
return { harnessName, task, model, provider, apiKey, log, twinUrls, allTools, toolToTwin };
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
// ── Run loop ──────────────────────────────────────────────────────────
|
|
82
|
-
|
|
83
|
-
/**
|
|
84
|
-
* @typedef {object} RunLoopOptions
|
|
85
|
-
* @property {string} systemPrompt - System prompt text (empty string for none)
|
|
86
|
-
* @property {number} maxSteps - Maximum iteration count
|
|
87
|
-
* @property {boolean} [useRetry=false] - Wrap LLM calls in withRetry
|
|
88
|
-
* @property {number} [retryCount=4] - Max retries when useRetry is true
|
|
89
|
-
* @property {boolean} [useTrace=false] - Record agent trace
|
|
90
|
-
* @property {number} [maxConsecutiveErrors=0] - Bail threshold (0 = no limit)
|
|
91
|
-
* @property {number} [maxInitialNoToolRecoveries=0] - Reprompt attempts when model doesn't call tools initially
|
|
92
|
-
* @property {(ctx: HarnessContext, state: RunState) => Array} [selectTools] -
|
|
93
|
-
* Per-step tool selection function. Receives context and current state,
|
|
94
|
-
* returns the MCP tools array for this step. Default: use all tools.
|
|
95
|
-
* @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => 'continue' | 'break' | void} [onBeforeToolExecution] -
|
|
96
|
-
* Hook called after parsing tool calls but before executing them.
|
|
97
|
-
* Return 'continue' to skip tool execution and loop, 'break' to stop.
|
|
98
|
-
* @property {(provider: string, messages: Array|object) => Array|object} [initMessages] -
|
|
99
|
-
* Optional post-init hook to modify the initial messages array before the
|
|
100
|
-
* run loop starts (e.g. to prepend a triage instruction).
|
|
101
|
-
* @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => void} [onAfterToolExecution] -
|
|
102
|
-
* Hook called after tool results are appended. Return value is ignored.
|
|
103
|
-
* @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => 'continue' | void} [onNoToolCalls] -
|
|
104
|
-
* Hook called when the model responds without tool calls. Return
|
|
105
|
-
* 'continue' to add instructions and continue the loop.
|
|
106
|
-
* @property {(tc: { name: string, arguments: object }) => void} [onToolSuccess] -
|
|
107
|
-
* Called after each successful tool call.
|
|
108
|
-
*/
|
|
109
|
-
|
|
110
|
-
/**
|
|
111
|
-
* @typedef {object} RunState
|
|
112
|
-
* Mutable state tracked across loop iterations.
|
|
113
|
-
* @property {Array|object} messages
|
|
114
|
-
* @property {number} stepsCompleted
|
|
115
|
-
* @property {number} totalInputTokens
|
|
116
|
-
* @property {number} totalOutputTokens
|
|
117
|
-
* @property {number} totalToolCalls
|
|
118
|
-
* @property {number} totalToolErrors
|
|
119
|
-
* @property {number} consecutiveErrors
|
|
120
|
-
* @property {number} initialNoToolRecoveries
|
|
121
|
-
* @property {string} exitReason
|
|
122
|
-
* @property {import('./agent-trace.mjs').ReturnType<typeof createAgentTrace>|null} agentTrace
|
|
123
|
-
*/
|
|
124
|
-
|
|
125
|
-
/**
|
|
126
|
-
* @typedef {object} StepResult
|
|
127
|
-
* @property {number} step - 1-indexed step number
|
|
128
|
-
* @property {object} response - Raw LLM response wrapper
|
|
129
|
-
* @property {Array|null} toolCalls - Parsed tool calls or null
|
|
130
|
-
* @property {string|null} thinking - Model thinking content
|
|
131
|
-
* @property {string|null} text - Model text content
|
|
132
|
-
* @property {number} iterDurationMs
|
|
133
|
-
* @property {string|null} stopReason
|
|
134
|
-
*/
|
|
135
|
-
|
|
136
|
-
/**
|
|
137
|
-
* Run the agent loop with shared metrics, logging, and tool execution.
|
|
138
|
-
*
|
|
139
|
-
* @param {HarnessContext} ctx
|
|
140
|
-
* @param {RunLoopOptions} opts
|
|
141
|
-
*/
|
|
142
|
-
export async function runAgentLoop(ctx, opts) {
|
|
143
|
-
const {
|
|
144
|
-
systemPrompt,
|
|
145
|
-
maxSteps,
|
|
146
|
-
useRetry = false,
|
|
147
|
-
retryCount = 4,
|
|
148
|
-
useTrace = false,
|
|
149
|
-
maxConsecutiveErrors = 0,
|
|
150
|
-
maxInitialNoToolRecoveries = 0,
|
|
151
|
-
selectTools,
|
|
152
|
-
onBeforeToolExecution,
|
|
153
|
-
onAfterToolExecution,
|
|
154
|
-
onNoToolCalls,
|
|
155
|
-
onToolSuccess,
|
|
156
|
-
} = opts;
|
|
157
|
-
|
|
158
|
-
const { harnessName, task, model, provider, apiKey, log, allTools, toolToTwin } = ctx;
|
|
159
|
-
|
|
160
|
-
let messages = buildInitialMessages(provider, systemPrompt, task, model);
|
|
161
|
-
|
|
162
|
-
// Allow callers to modify initial messages (e.g. react's triage instruction)
|
|
163
|
-
if (opts.initMessages) {
|
|
164
|
-
messages = opts.initMessages(provider, messages);
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
const state = {
|
|
168
|
-
messages,
|
|
169
|
-
stepsCompleted: 0,
|
|
170
|
-
totalInputTokens: 0,
|
|
171
|
-
totalOutputTokens: 0,
|
|
172
|
-
totalToolCalls: 0,
|
|
173
|
-
totalToolErrors: 0,
|
|
174
|
-
consecutiveErrors: 0,
|
|
175
|
-
initialNoToolRecoveries: 0,
|
|
176
|
-
exitReason: 'max_steps',
|
|
177
|
-
agentTrace: useTrace ? createAgentTrace() : null,
|
|
178
|
-
};
|
|
179
|
-
|
|
180
|
-
const runStart = Date.now();
|
|
181
|
-
|
|
182
|
-
log.info('run_start', { task: task.slice(0, 200), maxSteps });
|
|
183
|
-
|
|
184
|
-
try {
|
|
185
|
-
for (let step = 0; step < maxSteps; step++) {
|
|
186
|
-
state.stepsCompleted = step + 1;
|
|
187
|
-
const iterStart = Date.now();
|
|
188
|
-
|
|
189
|
-
// Select tools for this step (default: all tools)
|
|
190
|
-
const stepTools = selectTools ? selectTools(ctx, state) : allTools;
|
|
191
|
-
const providerTools = formatToolsForProvider(provider, stepTools);
|
|
192
|
-
|
|
193
|
-
// Call the LLM (optionally with retry)
|
|
194
|
-
log.llmCall(step + 1);
|
|
195
|
-
let response;
|
|
196
|
-
try {
|
|
197
|
-
const llmCall = () => callLlmWithMessages(provider, model, apiKey, state.messages, providerTools);
|
|
198
|
-
response = useRetry ? await withRetry(llmCall, retryCount) : await llmCall();
|
|
199
|
-
} catch (err) {
|
|
200
|
-
const msg = err?.message ?? String(err);
|
|
201
|
-
log.error('llm_call_failed', { step: step + 1, error: msg });
|
|
202
|
-
process.stderr.write(`[${harnessName}] LLM API error: ${msg.slice(0, 500)}\n`);
|
|
203
|
-
state.exitReason = 'llm_error';
|
|
204
|
-
break;
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
const iterDurationMs = Date.now() - iterStart;
|
|
208
|
-
state.totalInputTokens += response.usage.inputTokens;
|
|
209
|
-
state.totalOutputTokens += response.usage.outputTokens;
|
|
210
|
-
|
|
211
|
-
const toolCalls = parseToolCalls(provider, response);
|
|
212
|
-
const hasToolCalls = !!toolCalls;
|
|
213
|
-
const stopReason = getStopReason(provider, response);
|
|
214
|
-
log.llmResponse(step + 1, iterDurationMs, hasToolCalls, stopReason);
|
|
215
|
-
log.tokenUsage(step + 1, response.usage, {
|
|
216
|
-
inputTokens: state.totalInputTokens,
|
|
217
|
-
outputTokens: state.totalOutputTokens,
|
|
218
|
-
});
|
|
219
|
-
|
|
220
|
-
const thinking = getThinkingContent(provider, response);
|
|
221
|
-
const text = getResponseText(provider, response);
|
|
222
|
-
|
|
223
|
-
state.messages = appendAssistantResponse(provider, state.messages, response);
|
|
224
|
-
|
|
225
|
-
/** @type {StepResult} */
|
|
226
|
-
const stepResult = { step: step + 1, response, toolCalls, thinking, text, iterDurationMs, stopReason };
|
|
227
|
-
|
|
228
|
-
if (!toolCalls) {
|
|
229
|
-
// Record trace for no-tool-call steps
|
|
230
|
-
if (state.agentTrace) {
|
|
231
|
-
state.agentTrace.addStep({ step: step + 1, thinking, text, toolCalls: [], durationMs: iterDurationMs });
|
|
232
|
-
}
|
|
233
|
-
if (text) {
|
|
234
|
-
process.stderr.write(`[${harnessName}] Step ${step + 1}: ${text.slice(0, 200)}\n`);
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
// Initial no-tool recovery (reprompt)
|
|
238
|
-
const shouldRecoverInitial = state.totalToolCalls === 0
|
|
239
|
-
&& maxInitialNoToolRecoveries > 0
|
|
240
|
-
&& state.initialNoToolRecoveries < maxInitialNoToolRecoveries;
|
|
241
|
-
if (shouldRecoverInitial) {
|
|
242
|
-
state.initialNoToolRecoveries++;
|
|
243
|
-
state.messages = appendUserInstruction(
|
|
244
|
-
provider,
|
|
245
|
-
state.messages,
|
|
246
|
-
'You must use tools to make progress. ' +
|
|
247
|
-
'On your next response, call at least one relevant tool before giving any summary or conclusion. ' +
|
|
248
|
-
'Start by gathering concrete evidence from the systems, then execute the required actions.',
|
|
249
|
-
);
|
|
250
|
-
log.info('no_tool_calls_reprompt', {
|
|
251
|
-
step: step + 1,
|
|
252
|
-
attempt: state.initialNoToolRecoveries,
|
|
253
|
-
});
|
|
254
|
-
continue;
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
// Harness-specific no-tool-call handling
|
|
258
|
-
if (onNoToolCalls) {
|
|
259
|
-
const directive = onNoToolCalls(ctx, state, stepResult);
|
|
260
|
-
if (directive === 'continue') continue;
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
state.exitReason = state.totalToolCalls === 0 ? 'no_tool_calls' : 'completed';
|
|
264
|
-
break;
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
state.initialNoToolRecoveries = 0;
|
|
268
|
-
|
|
269
|
-
// Pre-execution hook (e.g. react's repo content guard)
|
|
270
|
-
if (onBeforeToolExecution) {
|
|
271
|
-
const directive = onBeforeToolExecution(ctx, state, stepResult);
|
|
272
|
-
if (directive === 'continue') continue;
|
|
273
|
-
if (directive === 'break') break;
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
// Execute tool calls
|
|
277
|
-
const { executeToolCalls } = await import('./tool-executor.mjs');
|
|
278
|
-
const { results, bailout } = await executeToolCalls(toolCalls, {
|
|
279
|
-
toolToTwin,
|
|
280
|
-
harnessName,
|
|
281
|
-
step: step + 1,
|
|
282
|
-
log,
|
|
283
|
-
counters: state,
|
|
284
|
-
maxConsecutiveErrors,
|
|
285
|
-
onSuccess: onToolSuccess,
|
|
286
|
-
});
|
|
287
|
-
|
|
288
|
-
// Record trace
|
|
289
|
-
if (state.agentTrace) {
|
|
290
|
-
state.agentTrace.addStep({
|
|
291
|
-
step: step + 1,
|
|
292
|
-
thinking,
|
|
293
|
-
text,
|
|
294
|
-
toolCalls: toolCalls.map((tc) => ({ name: tc.name, arguments: tc.arguments })),
|
|
295
|
-
durationMs: iterDurationMs,
|
|
296
|
-
});
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
if (bailout) {
|
|
300
|
-
state.exitReason = 'consecutive_errors';
|
|
301
|
-
break;
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
// Append tool results to conversation
|
|
305
|
-
state.messages = appendToolResults(provider, state.messages, toolCalls, results);
|
|
306
|
-
|
|
307
|
-
// Post-execution hook
|
|
308
|
-
if (onAfterToolExecution) {
|
|
309
|
-
onAfterToolExecution(ctx, state, stepResult);
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
} finally {
|
|
313
|
-
const totalTimeMs = Date.now() - runStart;
|
|
314
|
-
|
|
315
|
-
log.summary({
|
|
316
|
-
iterations: state.stepsCompleted,
|
|
317
|
-
totalInputTokens: state.totalInputTokens,
|
|
318
|
-
totalOutputTokens: state.totalOutputTokens,
|
|
319
|
-
totalTimeMs,
|
|
320
|
-
toolCallCount: state.totalToolCalls,
|
|
321
|
-
toolErrorCount: state.totalToolErrors,
|
|
322
|
-
exitReason: state.exitReason,
|
|
323
|
-
});
|
|
324
|
-
|
|
325
|
-
writeMetrics({
|
|
326
|
-
inputTokens: state.totalInputTokens,
|
|
327
|
-
outputTokens: state.totalOutputTokens,
|
|
328
|
-
llmCallCount: state.stepsCompleted,
|
|
329
|
-
toolCallCount: state.totalToolCalls,
|
|
330
|
-
toolErrorCount: state.totalToolErrors,
|
|
331
|
-
totalTimeMs,
|
|
332
|
-
exitReason: state.exitReason,
|
|
333
|
-
provider,
|
|
334
|
-
model,
|
|
335
|
-
});
|
|
336
|
-
|
|
337
|
-
if (state.agentTrace) {
|
|
338
|
-
state.agentTrace.flush();
|
|
339
|
-
}
|
|
340
|
-
|
|
341
|
-
process.stderr.write(
|
|
342
|
-
`\n[${harnessName}] Summary: ${state.stepsCompleted} iterations, ${state.totalToolCalls} tool calls ` +
|
|
343
|
-
`(${state.totalToolErrors} errors), ${state.totalInputTokens} input tokens, ` +
|
|
344
|
-
`${state.totalOutputTokens} output tokens, ${(totalTimeMs / 1000).toFixed(1)}s total\n`
|
|
345
|
-
);
|
|
346
|
-
|
|
347
|
-
if (state.exitReason === 'llm_error') {
|
|
348
|
-
process.exit(1);
|
|
349
|
-
}
|
|
350
|
-
}
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
// Re-export for convenience — harnesses that need to build custom initial messages
|
|
354
|
-
export { appendUserInstruction };
|