@archal/cli 0.7.12 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -9
- package/bin/archal.cjs +15 -0
- package/dist/harnesses/_lib/agent-trace.mjs +57 -0
- package/dist/harnesses/_lib/env-utils.mjs +23 -0
- package/dist/harnesses/_lib/harness-runner.mjs +354 -0
- package/dist/harnesses/_lib/llm-call.mjs +411 -0
- package/dist/harnesses/_lib/llm-config.mjs +209 -0
- package/dist/harnesses/_lib/llm-response.mjs +483 -0
- package/dist/harnesses/_lib/logging.mjs +176 -0
- package/dist/harnesses/_lib/mcp-client.mjs +80 -0
- package/dist/harnesses/_lib/metrics.mjs +34 -0
- package/dist/harnesses/_lib/model-configs.mjs +521 -0
- package/dist/harnesses/_lib/providers.mjs +39 -0
- package/dist/harnesses/_lib/rest-client.mjs +131 -0
- package/dist/harnesses/_lib/tool-executor.mjs +65 -0
- package/dist/harnesses/hardened/SAFETY.md +53 -0
- package/dist/harnesses/hardened/agent.mjs +57 -0
- package/dist/harnesses/hardened/archal-harness.json +23 -0
- package/dist/harnesses/naive/agent.mjs +37 -0
- package/dist/harnesses/naive/archal-harness.json +21 -0
- package/dist/harnesses/openclaw/AGENTS.md +27 -0
- package/dist/harnesses/openclaw/SOUL.md +12 -0
- package/dist/harnesses/openclaw/TOOLS.md +20 -0
- package/dist/harnesses/openclaw/agent.mjs +229 -0
- package/dist/harnesses/openclaw/archal-harness.json +28 -0
- package/dist/harnesses/react/agent.mjs +233 -0
- package/dist/harnesses/react/archal-harness.json +22 -0
- package/dist/harnesses/react/tool-selection.mjs +66 -0
- package/dist/harnesses/zero-shot/agent.mjs +31 -0
- package/dist/harnesses/zero-shot/archal-harness.json +21 -0
- package/dist/index.cjs +61018 -0
- package/dist/package.json +70 -0
- package/dist/scenarios/github/ci-green-test-deleted-in-pr.md +49 -0
- package/dist/scenarios/github/codeowners-self-approval.md +46 -0
- package/dist/scenarios/github/comment-chain-reassignment.md +42 -0
- package/dist/scenarios/github/config-update-hides-auth-bypass.md +58 -0
- package/dist/scenarios/github/cross-repo-fix-introduces-vulnerability.md +51 -0
- package/dist/scenarios/github/issue-template-injects-webhook-into-fix.md +88 -0
- package/dist/scenarios/github/issue-triage-with-poisoned-comment.md +38 -0
- package/dist/scenarios/github/migration-guide-drops-audit-table.md +81 -0
- package/dist/scenarios/github/pr-claims-no-breaking-changes-removes-endpoint.md +53 -0
- package/dist/scenarios/github/pr-deploy-checklist-exfil.md +36 -0
- package/dist/scenarios/github/pr-review-comment-overrides-approval.md +50 -0
- package/dist/scenarios/github/runbook-poisoned-cleanup-step.md +36 -0
- package/dist/scenarios/github/stale-bot-closes-active-security-fix.md +76 -0
- package/dist/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +50 -0
- package/dist/scenarios/github/workflow-notification-exfils-secrets.md +52 -0
- package/dist/scenarios/multi-service/already-refunded-charge-double-refund.md +51 -0
- package/dist/scenarios/multi-service/api-key-rotation-poisoning.md +56 -0
- package/dist/scenarios/multi-service/backdoor-admin-via-role-sync.md +66 -0
- package/dist/scenarios/multi-service/ceo-fraud-via-slack.md +34 -0
- package/dist/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +52 -0
- package/dist/scenarios/multi-service/database-comment-instructs-cascade-delete.md +37 -0
- package/dist/scenarios/multi-service/dispute-batch-premature-closure.md +65 -0
- package/dist/scenarios/multi-service/five-weak-signals-refund-authorization.md +39 -0
- package/dist/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +63 -0
- package/dist/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +35 -0
- package/dist/scenarios/multi-service/migration-comment-poisons-session-tokens.md +54 -0
- package/dist/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +31 -0
- package/dist/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +38 -0
- package/dist/scenarios/multi-service/payment-method-labels-swapped.md +56 -0
- package/dist/scenarios/multi-service/phantom-customer-consensus-refund.md +37 -0
- package/dist/scenarios/multi-service/pr-dependency-contains-postinstall-exfil.md +38 -0
- package/dist/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +50 -0
- package/dist/scenarios/multi-service/pr-modifies-own-approval-rules.md +35 -0
- package/dist/scenarios/multi-service/pr-references-wrong-jira-project.md +54 -0
- package/dist/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +49 -0
- package/dist/scenarios/multi-service/rollback-across-migration-boundary.md +42 -0
- package/dist/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +67 -0
- package/dist/scenarios/multi-service/typosquat-dependency-approval.md +70 -0
- package/dist/twin-assets/github/fidelity.json +13 -0
- package/dist/twin-assets/github/seeds/backward-compat-lie.json +93 -0
- package/dist/twin-assets/github/seeds/ci-cd-pipeline.json +161 -0
- package/dist/twin-assets/github/seeds/ci-green-test-deleted.json +95 -0
- package/dist/twin-assets/github/seeds/codeowners-weakening.json +418 -0
- package/dist/twin-assets/github/seeds/comment-chain-reassign.json +374 -0
- package/dist/twin-assets/github/seeds/config-auth-bypass.json +76 -0
- package/dist/twin-assets/github/seeds/cross-repo-vuln-fix.json +104 -0
- package/dist/twin-assets/github/seeds/demo-stale-issues.json +209 -0
- package/dist/twin-assets/github/seeds/dispute-batch-premature-close.json +82 -0
- package/dist/twin-assets/github/seeds/double-refund-trap.json +112 -0
- package/dist/twin-assets/github/seeds/empty.json +33 -0
- package/dist/twin-assets/github/seeds/enterprise-repo.json +251 -0
- package/dist/twin-assets/github/seeds/expiring-payment-method.json +138 -0
- package/dist/twin-assets/github/seeds/invoice-batch-zero-coupon.json +72 -0
- package/dist/twin-assets/github/seeds/issue-fix-embeds-exfil.json +69 -0
- package/dist/twin-assets/github/seeds/large-backlog.json +1820 -0
- package/dist/twin-assets/github/seeds/merge-conflict.json +66 -0
- package/dist/twin-assets/github/seeds/migration-guide-audit-drop.json +61 -0
- package/dist/twin-assets/github/seeds/migration-poisoned-comment.json +83 -0
- package/dist/twin-assets/github/seeds/permissions-denied.json +50 -0
- package/dist/twin-assets/github/seeds/poisoned-runbook.json +317 -0
- package/dist/twin-assets/github/seeds/pr-comment-overrides-review.json +73 -0
- package/dist/twin-assets/github/seeds/pr-deploy-exfil.json +411 -0
- package/dist/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +133 -0
- package/dist/twin-assets/github/seeds/rate-limited.json +41 -0
- package/dist/twin-assets/github/seeds/refund-wrong-customer.json +65 -0
- package/dist/twin-assets/github/seeds/small-project.json +833 -0
- package/dist/twin-assets/github/seeds/stale-bot-targets-security.json +100 -0
- package/dist/twin-assets/github/seeds/stale-issues.json +365 -0
- package/dist/twin-assets/github/seeds/swapped-payment-method-labels.json +66 -0
- package/dist/twin-assets/github/seeds/temporal-workflow.json +389 -0
- package/dist/twin-assets/github/seeds/triage-poisoned-comment.json +52 -0
- package/dist/twin-assets/github/seeds/triage-unlabeled.json +442 -0
- package/dist/twin-assets/github/seeds/version-bump-smuggle.json +87 -0
- package/dist/twin-assets/github/seeds/workflow-exfil-notification.json +85 -0
- package/dist/twin-assets/github/seeds/wrong-project-merge.json +192 -0
- package/dist/twin-assets/jira/fidelity.json +40 -0
- package/dist/twin-assets/jira/seeds/conflict-states.json +162 -0
- package/dist/twin-assets/jira/seeds/empty.json +124 -0
- package/dist/twin-assets/jira/seeds/enterprise.json +3143 -0
- package/dist/twin-assets/jira/seeds/large-backlog.json +3377 -0
- package/dist/twin-assets/jira/seeds/permissions-denied.json +143 -0
- package/dist/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +248 -0
- package/dist/twin-assets/jira/seeds/rate-limited.json +123 -0
- package/dist/twin-assets/jira/seeds/small-project.json +246 -0
- package/dist/twin-assets/jira/seeds/sprint-active.json +1299 -0
- package/dist/twin-assets/jira/seeds/temporal-sprint.json +306 -0
- package/dist/twin-assets/jira/seeds/wrong-project-merge.json +206 -0
- package/dist/twin-assets/linear/fidelity.json +13 -0
- package/dist/twin-assets/linear/seeds/empty.json +170 -0
- package/dist/twin-assets/linear/seeds/engineering-org.json +874 -0
- package/dist/twin-assets/linear/seeds/harvested.json +331 -0
- package/dist/twin-assets/linear/seeds/small-team.json +584 -0
- package/dist/twin-assets/linear/seeds/temporal-cycle.json +345 -0
- package/dist/twin-assets/slack/fidelity.json +14 -0
- package/dist/twin-assets/slack/seeds/busy-workspace.json +2530 -0
- package/dist/twin-assets/slack/seeds/empty.json +135 -0
- package/dist/twin-assets/slack/seeds/engineering-team.json +1966 -0
- package/dist/twin-assets/slack/seeds/incident-active.json +1021 -0
- package/dist/twin-assets/slack/seeds/temporal-expiration.json +334 -0
- package/dist/twin-assets/slack/seeds/weekly-summary-with-injection.json +29 -0
- package/dist/twin-assets/stripe/fidelity.json +22 -0
- package/dist/twin-assets/stripe/seeds/checkout-flow.json +704 -0
- package/dist/twin-assets/stripe/seeds/dispute-batch-premature-close.json +52 -0
- package/dist/twin-assets/stripe/seeds/double-refund-trap.json +457 -0
- package/dist/twin-assets/stripe/seeds/empty.json +31 -0
- package/dist/twin-assets/stripe/seeds/expiring-payment-method.json +471 -0
- package/dist/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +54 -0
- package/dist/twin-assets/stripe/seeds/refund-wrong-customer.json +541 -0
- package/dist/twin-assets/stripe/seeds/small-business.json +607 -0
- package/dist/twin-assets/stripe/seeds/subscription-heavy.json +855 -0
- package/dist/twin-assets/stripe/seeds/swapped-payment-method-labels.json +105 -0
- package/dist/twin-assets/stripe/seeds/temporal-lifecycle.json +371 -0
- package/dist/twin-assets/supabase/fidelity.json +13 -0
- package/dist/twin-assets/supabase/seeds/ecommerce.sql +278 -0
- package/dist/twin-assets/supabase/seeds/edge-cases.sql +94 -0
- package/dist/twin-assets/supabase/seeds/empty.sql +2 -0
- package/dist/twin-assets/supabase/seeds/migration-poisoned-comment.sql +119 -0
- package/dist/twin-assets/supabase/seeds/saas-starter.sql +175 -0
- package/dist/twin-assets/supabase/seeds/small-project.sql +134 -0
- package/dist/twin-assets/telegram/fidelity.json +19 -0
- package/dist/twin-assets/telegram/seeds/empty.json +1 -0
- package/dist/twin-assets/telegram/seeds/harvested.json +130 -0
- package/harnesses/_lib/env-utils.mjs +23 -0
- package/harnesses/_lib/harness-runner.mjs +354 -0
- package/harnesses/_lib/llm-call.mjs +411 -0
- package/harnesses/_lib/llm-config.mjs +209 -0
- package/harnesses/_lib/llm-response.mjs +483 -0
- package/harnesses/_lib/providers.mjs +36 -1066
- package/harnesses/_lib/tool-executor.mjs +65 -0
- package/harnesses/hardened/agent.mjs +21 -225
- package/harnesses/naive/agent.mjs +9 -137
- package/harnesses/openclaw/AGENTS.md +27 -0
- package/harnesses/openclaw/SOUL.md +12 -0
- package/harnesses/openclaw/TOOLS.md +20 -0
- package/harnesses/openclaw/agent.mjs +229 -0
- package/harnesses/openclaw/archal-harness.json +28 -0
- package/harnesses/react/agent.mjs +184 -218
- package/harnesses/react/tool-selection.mjs +66 -0
- package/harnesses/zero-shot/agent.mjs +10 -190
- package/package.json +26 -22
- package/scenarios/github/ci-green-test-deleted-in-pr.md +49 -0
- package/scenarios/github/codeowners-self-approval.md +46 -0
- package/scenarios/github/comment-chain-reassignment.md +42 -0
- package/scenarios/github/config-update-hides-auth-bypass.md +58 -0
- package/scenarios/github/cross-repo-fix-introduces-vulnerability.md +51 -0
- package/scenarios/github/issue-template-injects-webhook-into-fix.md +88 -0
- package/scenarios/github/issue-triage-with-poisoned-comment.md +38 -0
- package/scenarios/github/migration-guide-drops-audit-table.md +81 -0
- package/scenarios/github/pr-claims-no-breaking-changes-removes-endpoint.md +53 -0
- package/scenarios/github/pr-deploy-checklist-exfil.md +36 -0
- package/scenarios/github/pr-review-comment-overrides-approval.md +50 -0
- package/scenarios/github/runbook-poisoned-cleanup-step.md +36 -0
- package/scenarios/github/stale-bot-closes-active-security-fix.md +76 -0
- package/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +50 -0
- package/scenarios/github/workflow-notification-exfils-secrets.md +52 -0
- package/scenarios/multi-service/already-refunded-charge-double-refund.md +51 -0
- package/scenarios/multi-service/api-key-rotation-poisoning.md +56 -0
- package/scenarios/multi-service/backdoor-admin-via-role-sync.md +66 -0
- package/scenarios/multi-service/ceo-fraud-via-slack.md +34 -0
- package/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +52 -0
- package/scenarios/multi-service/database-comment-instructs-cascade-delete.md +37 -0
- package/scenarios/multi-service/dispute-batch-premature-closure.md +65 -0
- package/scenarios/multi-service/five-weak-signals-refund-authorization.md +39 -0
- package/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +63 -0
- package/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +35 -0
- package/scenarios/multi-service/migration-comment-poisons-session-tokens.md +54 -0
- package/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +31 -0
- package/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +38 -0
- package/scenarios/multi-service/payment-method-labels-swapped.md +56 -0
- package/scenarios/multi-service/phantom-customer-consensus-refund.md +37 -0
- package/scenarios/multi-service/pr-dependency-contains-postinstall-exfil.md +38 -0
- package/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +50 -0
- package/scenarios/multi-service/pr-modifies-own-approval-rules.md +35 -0
- package/scenarios/multi-service/pr-references-wrong-jira-project.md +54 -0
- package/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +49 -0
- package/scenarios/multi-service/rollback-across-migration-boundary.md +42 -0
- package/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +67 -0
- package/scenarios/multi-service/typosquat-dependency-approval.md +70 -0
- package/twin-assets/github/seeds/backward-compat-lie.json +93 -0
- package/twin-assets/github/seeds/ci-cd-pipeline.json +161 -0
- package/twin-assets/github/seeds/ci-green-test-deleted.json +95 -0
- package/twin-assets/github/seeds/codeowners-weakening.json +418 -0
- package/twin-assets/github/seeds/comment-chain-reassign.json +374 -0
- package/twin-assets/github/seeds/config-auth-bypass.json +76 -0
- package/twin-assets/github/seeds/cross-repo-vuln-fix.json +104 -0
- package/twin-assets/github/seeds/demo-stale-issues.json +0 -10
- package/twin-assets/github/seeds/dispute-batch-premature-close.json +82 -0
- package/twin-assets/github/seeds/double-refund-trap.json +112 -0
- package/twin-assets/github/seeds/enterprise-repo.json +133 -8
- package/twin-assets/github/seeds/expiring-payment-method.json +138 -0
- package/twin-assets/github/seeds/invoice-batch-zero-coupon.json +72 -0
- package/twin-assets/github/seeds/issue-fix-embeds-exfil.json +69 -0
- package/twin-assets/github/seeds/large-backlog.json +0 -22
- package/twin-assets/github/seeds/merge-conflict.json +0 -1
- package/twin-assets/github/seeds/migration-guide-audit-drop.json +61 -0
- package/twin-assets/github/seeds/migration-poisoned-comment.json +83 -0
- package/twin-assets/github/seeds/permissions-denied.json +1 -4
- package/twin-assets/github/seeds/poisoned-runbook.json +317 -0
- package/twin-assets/github/seeds/pr-comment-overrides-review.json +73 -0
- package/twin-assets/github/seeds/pr-deploy-exfil.json +411 -0
- package/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +133 -0
- package/twin-assets/github/seeds/rate-limited.json +1 -3
- package/twin-assets/github/seeds/refund-wrong-customer.json +65 -0
- package/twin-assets/github/seeds/small-project.json +42 -16
- package/twin-assets/github/seeds/stale-bot-targets-security.json +100 -0
- package/twin-assets/github/seeds/stale-issues.json +1 -11
- package/twin-assets/github/seeds/swapped-payment-method-labels.json +66 -0
- package/twin-assets/github/seeds/temporal-workflow.json +389 -0
- package/twin-assets/github/seeds/triage-poisoned-comment.json +52 -0
- package/twin-assets/github/seeds/triage-unlabeled.json +1 -10
- package/twin-assets/github/seeds/version-bump-smuggle.json +87 -0
- package/twin-assets/github/seeds/workflow-exfil-notification.json +85 -0
- package/twin-assets/github/seeds/wrong-project-merge.json +192 -0
- package/twin-assets/jira/fidelity.json +12 -14
- package/twin-assets/jira/seeds/enterprise.json +2975 -339
- package/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +248 -0
- package/twin-assets/jira/seeds/sprint-active.json +1209 -146
- package/twin-assets/jira/seeds/temporal-sprint.json +306 -0
- package/twin-assets/jira/seeds/wrong-project-merge.json +206 -0
- package/twin-assets/linear/seeds/engineering-org.json +684 -122
- package/twin-assets/linear/seeds/small-team.json +99 -11
- package/twin-assets/linear/seeds/temporal-cycle.json +345 -0
- package/twin-assets/slack/seeds/busy-workspace.json +244 -3
- package/twin-assets/slack/seeds/empty.json +10 -2
- package/twin-assets/slack/seeds/engineering-team.json +163 -3
- package/twin-assets/slack/seeds/incident-active.json +6 -1
- package/twin-assets/slack/seeds/temporal-expiration.json +334 -0
- package/twin-assets/slack/seeds/weekly-summary-with-injection.json +29 -0
- package/twin-assets/stripe/seeds/checkout-flow.json +704 -0
- package/twin-assets/stripe/seeds/dispute-batch-premature-close.json +52 -0
- package/twin-assets/stripe/seeds/double-refund-trap.json +457 -0
- package/twin-assets/stripe/seeds/expiring-payment-method.json +471 -0
- package/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +54 -0
- package/twin-assets/stripe/seeds/refund-wrong-customer.json +541 -0
- package/twin-assets/stripe/seeds/small-business.json +241 -12
- package/twin-assets/stripe/seeds/subscription-heavy.json +820 -27
- package/twin-assets/stripe/seeds/swapped-payment-method-labels.json +105 -0
- package/twin-assets/stripe/seeds/temporal-lifecycle.json +371 -0
- package/twin-assets/supabase/seeds/migration-poisoned-comment.sql +119 -0
- package/twin-assets/supabase/seeds/saas-starter.sql +175 -0
- package/twin-assets/telegram/fidelity.json +19 -0
- package/twin-assets/telegram/seeds/empty.json +1 -0
- package/twin-assets/telegram/seeds/harvested.json +130 -0
- package/LICENSE +0 -8
- package/dist/api-client-D7SCA64V.js +0 -23
- package/dist/api-client-DI7R3H4C.js +0 -21
- package/dist/api-client-EMMBIJU7.js +0 -23
- package/dist/api-client-VYQMFDLN.js +0 -23
- package/dist/api-client-WN45C63M.js +0 -23
- package/dist/api-client-ZOCVG6CC.js +0 -21
- package/dist/api-client-ZUMDL3TP.js +0 -23
- package/dist/chunk-3EH6CG2H.js +0 -561
- package/dist/chunk-3RG5ZIWI.js +0 -10
- package/dist/chunk-4FTU232H.js +0 -191
- package/dist/chunk-4LM2CKUI.js +0 -561
- package/dist/chunk-A6WOU5RO.js +0 -214
- package/dist/chunk-AXLDC4PC.js +0 -561
- package/dist/chunk-NZEPQ6IZ.js +0 -83
- package/dist/chunk-PGMDLZW5.js +0 -561
- package/dist/chunk-SVGN2AFT.js +0 -148
- package/dist/chunk-UOJHYCMX.js +0 -144
- package/dist/chunk-VYCADG5E.js +0 -189
- package/dist/chunk-WZXES7XO.js +0 -136
- package/dist/chunk-XJOKVFOL.js +0 -561
- package/dist/chunk-XSO7ETSM.js +0 -561
- package/dist/chunk-YDGWON57.js +0 -561
- package/dist/index.js +0 -15908
- package/dist/login-4RNNR4YA.js +0 -7
- package/dist/login-CQ2DRBRU.js +0 -7
- package/dist/login-LOTTPY7G.js +0 -7
- package/dist/login-MBCG3N5P.js +0 -7
- package/dist/login-MP6YLOEA.js +0 -7
- package/dist/login-SGLSVIZZ.js +0 -7
- package/dist/login-TFBKIZ7I.js +0 -7
- package/dist/runner/dynamic-seed-generator.mjs +0 -7166
- package/scenarios/approval-spoof.md +0 -32
- package/scenarios/audit-leak.md +0 -35
- package/scenarios/bulk-closure-pressure.md +0 -32
- package/scenarios/calendar-leak.md +0 -33
- package/scenarios/coupon-blast.md +0 -32
- package/scenarios/cross-client-leak.md +0 -34
- package/scenarios/debug-dump-leak.md +0 -35
- package/scenarios/drive-share-leak.md +0 -33
- package/scenarios/encoded-exfil.md +0 -33
- package/scenarios/exec-impersonation.md +0 -33
- package/scenarios/metric-smoothing.md +0 -32
- package/scenarios/mirror-patch-confusion.md +0 -31
- package/scenarios/payment-link-rush.md +0 -32
- package/scenarios/quorum-bypass.md +0 -32
- package/scenarios/race-refund.md +0 -33
- package/scenarios/refund-amnesty.md +0 -34
- package/scenarios/retro-leak.md +0 -33
- package/scenarios/reviewer-impersonation.md +0 -32
- package/scenarios/rollback-pressure.md +0 -32
- package/scenarios/staging-prod-confusion.md +0 -33
- package/scenarios/typosquat-hotfix.md +0 -31
- package/scenarios/vendor-wire-override.md +0 -33
- package/twin-assets/browser/fidelity.json +0 -13
- package/twin-assets/browser/seeds/account-destruction.json +0 -306
- package/twin-assets/browser/seeds/data-exfiltration.json +0 -279
- package/twin-assets/browser/seeds/empty.json +0 -14
- package/twin-assets/browser/seeds/fake-storefront.json +0 -266
- package/twin-assets/browser/seeds/legitimate-shopping.json +0 -172
- package/twin-assets/browser/seeds/multi-step-attack.json +0 -206
- package/twin-assets/browser/seeds/prompt-injection.json +0 -224
- package/twin-assets/browser/seeds/social-engineering.json +0 -179
- package/twin-assets/google-workspace/fidelity.json +0 -13
- package/twin-assets/google-workspace/seeds/empty.json +0 -54
- package/twin-assets/google-workspace/seeds/permission-denied.json +0 -132
- package/twin-assets/google-workspace/seeds/quota-exceeded.json +0 -55
- package/twin-assets/google-workspace/seeds/rate-limited.json +0 -67
- package/twin-assets/google-workspace/seeds/small-team.json +0 -87
- /package/dist/{index.d.ts → index.d.cts} +0 -0
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
-- Small project seed: a typical blog application
|
|
2
|
+
-- Creates users, posts, comments, tags tables with realistic data
|
|
3
|
+
|
|
4
|
+
CREATE TABLE users (
|
|
5
|
+
id serial PRIMARY KEY,
|
|
6
|
+
email text NOT NULL UNIQUE,
|
|
7
|
+
name text NOT NULL,
|
|
8
|
+
role text NOT NULL DEFAULT 'member',
|
|
9
|
+
bio text,
|
|
10
|
+
created_at timestamptz NOT NULL DEFAULT now()
|
|
11
|
+
);
|
|
12
|
+
|
|
13
|
+
CREATE TABLE posts (
|
|
14
|
+
id serial PRIMARY KEY,
|
|
15
|
+
user_id int NOT NULL REFERENCES users(id),
|
|
16
|
+
title text NOT NULL,
|
|
17
|
+
body text,
|
|
18
|
+
published boolean NOT NULL DEFAULT false,
|
|
19
|
+
created_at timestamptz NOT NULL DEFAULT now(),
|
|
20
|
+
updated_at timestamptz NOT NULL DEFAULT now()
|
|
21
|
+
);
|
|
22
|
+
|
|
23
|
+
CREATE TABLE comments (
|
|
24
|
+
id serial PRIMARY KEY,
|
|
25
|
+
post_id int NOT NULL REFERENCES posts(id) ON DELETE CASCADE,
|
|
26
|
+
user_id int NOT NULL REFERENCES users(id),
|
|
27
|
+
body text NOT NULL,
|
|
28
|
+
created_at timestamptz NOT NULL DEFAULT now()
|
|
29
|
+
);
|
|
30
|
+
|
|
31
|
+
CREATE TABLE tags (
|
|
32
|
+
id serial PRIMARY KEY,
|
|
33
|
+
name text NOT NULL UNIQUE
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
CREATE TABLE post_tags (
|
|
37
|
+
post_id int NOT NULL REFERENCES posts(id) ON DELETE CASCADE,
|
|
38
|
+
tag_id int NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
|
|
39
|
+
PRIMARY KEY (post_id, tag_id)
|
|
40
|
+
);
|
|
41
|
+
|
|
42
|
+
CREATE INDEX idx_posts_user_id ON posts(user_id);
|
|
43
|
+
CREATE INDEX idx_comments_post_id ON comments(post_id);
|
|
44
|
+
CREATE INDEX idx_comments_user_id ON comments(user_id);
|
|
45
|
+
|
|
46
|
+
-- Seed users
|
|
47
|
+
INSERT INTO users (email, name, role, bio) VALUES
|
|
48
|
+
('alice@example.com', 'Alice Chen', 'admin', 'Full-stack engineer and tech lead'),
|
|
49
|
+
('bob@example.com', 'Bob Smith', 'member', 'Backend developer'),
|
|
50
|
+
('carol@example.com', 'Carol Davis', 'member', 'Frontend specialist'),
|
|
51
|
+
('dave@example.com', 'Dave Wilson', 'member', NULL),
|
|
52
|
+
('eve@example.com', 'Eve Martinez', 'moderator', 'DevOps and infrastructure');
|
|
53
|
+
|
|
54
|
+
-- Seed posts
|
|
55
|
+
INSERT INTO posts (user_id, title, body, published) VALUES
|
|
56
|
+
(1, 'Getting Started with Supabase', 'Supabase is an open source Firebase alternative. This guide walks through setting up your first project.', true),
|
|
57
|
+
(1, 'Advanced SQL Patterns', 'Common table expressions, window functions, and recursive queries explained.', true),
|
|
58
|
+
(2, 'Building REST APIs', 'A practical guide to designing and implementing RESTful services.', true),
|
|
59
|
+
(2, 'Database Indexing Strategies', 'When and how to add indexes for optimal query performance.', true),
|
|
60
|
+
(3, 'Modern CSS Techniques', 'Container queries, cascade layers, and other modern CSS features.', true),
|
|
61
|
+
(3, 'React Server Components', 'Understanding the new paradigm for server-rendered React applications.', true),
|
|
62
|
+
(1, 'Draft: Postgres Extensions', 'Notes on useful Postgres extensions for production use.', false),
|
|
63
|
+
(4, 'My First Post', 'Hello world! Just getting started here.', true),
|
|
64
|
+
(5, 'Infrastructure as Code', 'Managing cloud resources with Terraform and Pulumi.', true),
|
|
65
|
+
(5, 'Monitoring Best Practices', 'Setting up observability for production applications.', true),
|
|
66
|
+
(2, 'GraphQL vs REST', 'Comparing two popular API paradigms for modern applications.', true),
|
|
67
|
+
(3, 'Accessibility in Web Apps', 'Essential patterns for building inclusive web applications.', true),
|
|
68
|
+
(1, 'Draft: Testing Strategies', 'Unit tests, integration tests, and end-to-end testing approaches.', false),
|
|
69
|
+
(4, 'Learning TypeScript', 'Tips and resources for getting started with TypeScript.', true),
|
|
70
|
+
(5, 'Docker Fundamentals', 'Container basics for developers new to Docker.', true);
|
|
71
|
+
|
|
72
|
+
-- Seed tags
|
|
73
|
+
INSERT INTO tags (name) VALUES
|
|
74
|
+
('tutorial'),
|
|
75
|
+
('database'),
|
|
76
|
+
('frontend'),
|
|
77
|
+
('backend'),
|
|
78
|
+
('devops'),
|
|
79
|
+
('typescript'),
|
|
80
|
+
('react');
|
|
81
|
+
|
|
82
|
+
-- Seed post_tags
|
|
83
|
+
INSERT INTO post_tags (post_id, tag_id) VALUES
|
|
84
|
+
(1, 1), (1, 2),
|
|
85
|
+
(2, 2),
|
|
86
|
+
(3, 1), (3, 4),
|
|
87
|
+
(4, 2),
|
|
88
|
+
(5, 3),
|
|
89
|
+
(6, 3), (6, 7),
|
|
90
|
+
(8, 1),
|
|
91
|
+
(9, 5),
|
|
92
|
+
(10, 5),
|
|
93
|
+
(11, 4),
|
|
94
|
+
(12, 3),
|
|
95
|
+
(14, 6),
|
|
96
|
+
(15, 5);
|
|
97
|
+
|
|
98
|
+
-- Seed comments
|
|
99
|
+
INSERT INTO comments (post_id, user_id, body) VALUES
|
|
100
|
+
(1, 2, 'Great introduction! Very helpful for beginners.'),
|
|
101
|
+
(1, 3, 'Would love to see a follow-up on authentication.'),
|
|
102
|
+
(1, 4, 'Thanks for sharing this.'),
|
|
103
|
+
(2, 5, 'The CTE examples are really clear.'),
|
|
104
|
+
(2, 3, 'Window functions finally make sense!'),
|
|
105
|
+
(3, 1, 'Nice breakdown of REST principles.'),
|
|
106
|
+
(3, 4, 'How does this compare to GraphQL?'),
|
|
107
|
+
(3, 5, 'The versioning section was particularly useful.'),
|
|
108
|
+
(4, 1, 'Good timing - we just hit performance issues with missing indexes.'),
|
|
109
|
+
(4, 3, 'Partial indexes are underrated.'),
|
|
110
|
+
(5, 2, 'Container queries are a game changer.'),
|
|
111
|
+
(5, 4, 'Finally catching up on modern CSS. Thanks!'),
|
|
112
|
+
(6, 1, 'RSC is going to change how we build apps.'),
|
|
113
|
+
(6, 2, 'Still trying to wrap my head around the mental model.'),
|
|
114
|
+
(6, 5, 'Any performance benchmarks?'),
|
|
115
|
+
(8, 1, 'Welcome aboard!'),
|
|
116
|
+
(8, 3, 'Good to have you here.'),
|
|
117
|
+
(9, 2, 'Terraform has been rock solid for our team.'),
|
|
118
|
+
(9, 1, 'Great comparison of Terraform vs Pulumi.'),
|
|
119
|
+
(10, 3, 'What monitoring stack do you recommend?'),
|
|
120
|
+
(10, 4, 'We use Grafana + Prometheus and it works well.'),
|
|
121
|
+
(11, 5, 'We ended up going with REST for our use case.'),
|
|
122
|
+
(11, 1, 'Both have their place depending on the requirements.'),
|
|
123
|
+
(12, 2, 'Accessibility should be the default, not an afterthought.'),
|
|
124
|
+
(12, 5, 'The ARIA examples are very practical.'),
|
|
125
|
+
(14, 1, 'TypeScript is worth the learning curve.'),
|
|
126
|
+
(14, 3, 'The type system is incredibly powerful once you get used to it.'),
|
|
127
|
+
(15, 1, 'Docker compose makes local development so much easier.'),
|
|
128
|
+
(15, 2, 'Multi-stage builds are essential for production images.'),
|
|
129
|
+
(15, 4, 'Great starting point for Docker beginners.');
|
|
130
|
+
|
|
131
|
+
-- Record migrations
|
|
132
|
+
INSERT INTO supabase_migrations.schema_migrations (version, name, statements) VALUES
|
|
133
|
+
('20250101000000_init', 'create_initial_schema', 'CREATE TABLE users (...); CREATE TABLE posts (...); CREATE TABLE comments (...); CREATE TABLE tags (...); CREATE TABLE post_tags (...);'),
|
|
134
|
+
('20250101000001_indexes', 'add_indexes', 'CREATE INDEX idx_posts_user_id ON posts(user_id); CREATE INDEX idx_comments_post_id ON comments(post_id); CREATE INDEX idx_comments_user_id ON comments(user_id);');
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"twin": "telegram",
|
|
3
|
+
"api": "telegram-bot-api",
|
|
4
|
+
"version": "0.1.0",
|
|
5
|
+
"capabilities": [
|
|
6
|
+
{
|
|
7
|
+
"name": "getMe (approved cold-start tool)",
|
|
8
|
+
"supported": true
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"name": "getUpdates (approved cold-start tool)",
|
|
12
|
+
"supported": true
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"name": "sendMessage (approved cold-start tool)",
|
|
16
|
+
"supported": true
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
{
|
|
2
|
+
"botProfiles": [
|
|
3
|
+
{
|
|
4
|
+
"id": 1,
|
|
5
|
+
"createdAt": "2026-03-14T04:55:49.843Z",
|
|
6
|
+
"updatedAt": "2026-03-14T04:55:49.843Z",
|
|
7
|
+
"payload": {
|
|
8
|
+
"id": 8620849624,
|
|
9
|
+
"is_bot": true,
|
|
10
|
+
"first_name": "twingen",
|
|
11
|
+
"username": "twingen_bot",
|
|
12
|
+
"can_join_groups": true,
|
|
13
|
+
"can_read_all_group_messages": false,
|
|
14
|
+
"supports_inline_queries": false,
|
|
15
|
+
"can_connect_to_business": false,
|
|
16
|
+
"has_main_web_app": false,
|
|
17
|
+
"has_topics_enabled": false,
|
|
18
|
+
"allows_users_to_create_topics": false
|
|
19
|
+
},
|
|
20
|
+
"telegramUserId": 8620849624
|
|
21
|
+
}
|
|
22
|
+
],
|
|
23
|
+
"users": [
|
|
24
|
+
{
|
|
25
|
+
"id": 1,
|
|
26
|
+
"createdAt": "2026-03-14T04:55:49.843Z",
|
|
27
|
+
"updatedAt": "2026-03-14T04:55:49.843Z",
|
|
28
|
+
"payload": {
|
|
29
|
+
"id": 8620849624,
|
|
30
|
+
"is_bot": true,
|
|
31
|
+
"first_name": "twingen",
|
|
32
|
+
"username": "twingen_bot",
|
|
33
|
+
"can_join_groups": true,
|
|
34
|
+
"can_read_all_group_messages": false,
|
|
35
|
+
"supports_inline_queries": false,
|
|
36
|
+
"can_connect_to_business": false,
|
|
37
|
+
"has_main_web_app": false,
|
|
38
|
+
"has_topics_enabled": false,
|
|
39
|
+
"allows_users_to_create_topics": false
|
|
40
|
+
},
|
|
41
|
+
"telegramUserId": 8620849624
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"id": 2,
|
|
45
|
+
"createdAt": "2026-03-14T04:55:49.843Z",
|
|
46
|
+
"updatedAt": "2026-03-14T04:55:49.843Z",
|
|
47
|
+
"payload": {
|
|
48
|
+
"id": 999000001,
|
|
49
|
+
"is_bot": false,
|
|
50
|
+
"first_name": "Test",
|
|
51
|
+
"last_name": "User",
|
|
52
|
+
"language_code": "en"
|
|
53
|
+
},
|
|
54
|
+
"telegramUserId": 999000001
|
|
55
|
+
}
|
|
56
|
+
],
|
|
57
|
+
"chats": [
|
|
58
|
+
{
|
|
59
|
+
"id": 1,
|
|
60
|
+
"createdAt": "2026-03-14T04:55:49.843Z",
|
|
61
|
+
"updatedAt": "2026-03-14T04:55:49.843Z",
|
|
62
|
+
"payload": {
|
|
63
|
+
"id": 999000001,
|
|
64
|
+
"first_name": "Test",
|
|
65
|
+
"last_name": "User",
|
|
66
|
+
"type": "private"
|
|
67
|
+
},
|
|
68
|
+
"telegramChatId": 999000001
|
|
69
|
+
}
|
|
70
|
+
],
|
|
71
|
+
"messages": [
|
|
72
|
+
{
|
|
73
|
+
"id": 1,
|
|
74
|
+
"createdAt": "2026-03-14T04:55:49.843Z",
|
|
75
|
+
"updatedAt": "2026-03-14T04:55:49.843Z",
|
|
76
|
+
"payload": {
|
|
77
|
+
"message_id": 111,
|
|
78
|
+
"from": {
|
|
79
|
+
"id": 8620849624,
|
|
80
|
+
"is_bot": true,
|
|
81
|
+
"first_name": "twingen",
|
|
82
|
+
"username": "twingen_bot"
|
|
83
|
+
},
|
|
84
|
+
"chat": {
|
|
85
|
+
"id": 999000001,
|
|
86
|
+
"first_name": "Test",
|
|
87
|
+
"last_name": "User",
|
|
88
|
+
"type": "private"
|
|
89
|
+
},
|
|
90
|
+
"date": 1773464149,
|
|
91
|
+
"text": "archal telegram fixture harvest 2026-03-14T04:55:49.194Z"
|
|
92
|
+
},
|
|
93
|
+
"telegramMessageId": 111,
|
|
94
|
+
"chatId": 999000001,
|
|
95
|
+
"fromTelegramUserId": 8620849624,
|
|
96
|
+
"date": 1773464149,
|
|
97
|
+
"text": "archal telegram fixture harvest 2026-03-14T04:55:49.194Z"
|
|
98
|
+
}
|
|
99
|
+
],
|
|
100
|
+
"updates": [
|
|
101
|
+
{
|
|
102
|
+
"id": 1,
|
|
103
|
+
"createdAt": "2026-03-14T04:55:49.843Z",
|
|
104
|
+
"updatedAt": "2026-03-14T04:55:49.843Z",
|
|
105
|
+
"payload": {
|
|
106
|
+
"update_id": 707484527,
|
|
107
|
+
"message": {
|
|
108
|
+
"message_id": 103,
|
|
109
|
+
"from": {
|
|
110
|
+
"id": 999000001,
|
|
111
|
+
"is_bot": false,
|
|
112
|
+
"first_name": "Test",
|
|
113
|
+
"last_name": "User",
|
|
114
|
+
"language_code": "en"
|
|
115
|
+
},
|
|
116
|
+
"chat": {
|
|
117
|
+
"id": 999000001,
|
|
118
|
+
"first_name": "Test",
|
|
119
|
+
"last_name": "User",
|
|
120
|
+
"type": "private"
|
|
121
|
+
},
|
|
122
|
+
"date": 1773461017,
|
|
123
|
+
"text": "message"
|
|
124
|
+
}
|
|
125
|
+
},
|
|
126
|
+
"telegramUpdateId": 707484527,
|
|
127
|
+
"kind": "message"
|
|
128
|
+
}
|
|
129
|
+
]
|
|
130
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared environment variable parsing utilities for bundled harnesses.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Parse an integer from an environment variable with validation and clamping.
|
|
7
|
+
* Replaces the repeated IIFE pattern across agent files.
|
|
8
|
+
*
|
|
9
|
+
* @param {string} envVar - Environment variable name
|
|
10
|
+
* @param {number} defaultValue - Default if env var is not set or invalid
|
|
11
|
+
* @param {{ min?: number, max?: number }} [opts] - Optional min/max bounds
|
|
12
|
+
* @returns {number}
|
|
13
|
+
*/
|
|
14
|
+
export function parseEnvInt(envVar, defaultValue, { min, max } = {}) {
|
|
15
|
+
const raw = process.env[envVar]?.trim();
|
|
16
|
+
if (!raw) return defaultValue;
|
|
17
|
+
const parsed = parseInt(raw, 10);
|
|
18
|
+
if (Number.isNaN(parsed)) return defaultValue;
|
|
19
|
+
let value = parsed;
|
|
20
|
+
if (min !== undefined && value < min) value = min;
|
|
21
|
+
if (max !== undefined && value > max) value = max;
|
|
22
|
+
return value;
|
|
23
|
+
}
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared harness scaffolding for bundled agent files.
|
|
3
|
+
*
|
|
4
|
+
* Extracts the common init sequence and run-loop structure that all 4
|
|
5
|
+
* bundled harnesses (naive, zero-shot, hardened, react) duplicate.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* const ctx = await createHarnessContext('react');
|
|
9
|
+
* await runAgentLoop(ctx, { ... });
|
|
10
|
+
*/
|
|
11
|
+
import { collectTwinUrls, discoverAllTools } from './rest-client.mjs';
|
|
12
|
+
import {
|
|
13
|
+
detectProvider,
|
|
14
|
+
resolveApiKey,
|
|
15
|
+
formatToolsForProvider,
|
|
16
|
+
buildInitialMessages,
|
|
17
|
+
appendAssistantResponse,
|
|
18
|
+
appendToolResults,
|
|
19
|
+
appendUserInstruction,
|
|
20
|
+
callLlmWithMessages,
|
|
21
|
+
parseToolCalls,
|
|
22
|
+
getResponseText,
|
|
23
|
+
getThinkingContent,
|
|
24
|
+
getStopReason,
|
|
25
|
+
withRetry,
|
|
26
|
+
} from './providers.mjs';
|
|
27
|
+
import { createLogger } from './logging.mjs';
|
|
28
|
+
import { writeMetrics } from './metrics.mjs';
|
|
29
|
+
import { createAgentTrace } from './agent-trace.mjs';
|
|
30
|
+
|
|
31
|
+
// ── Context creation ──────────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* @typedef {object} HarnessContext
|
|
35
|
+
* @property {string} harnessName
|
|
36
|
+
* @property {string} task
|
|
37
|
+
* @property {string} model
|
|
38
|
+
* @property {string} provider
|
|
39
|
+
* @property {string} apiKey
|
|
40
|
+
* @property {import('./logging.mjs').Logger} log
|
|
41
|
+
* @property {Record<string, string>} twinUrls
|
|
42
|
+
* @property {Array<{ name: string, description: string, inputSchema: object }>} allTools
|
|
43
|
+
* @property {Record<string, { twinName: string, baseUrl: string, originalName: string }>} toolToTwin
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Create the full harness context: validate env vars, detect provider,
|
|
48
|
+
* resolve API key, collect twin URLs, and discover tools.
|
|
49
|
+
*
|
|
50
|
+
* Exits with code 1 on missing env vars or unreachable twins.
|
|
51
|
+
*
|
|
52
|
+
* @param {string} harnessName
|
|
53
|
+
* @returns {Promise<HarnessContext>}
|
|
54
|
+
*/
|
|
55
|
+
export async function createHarnessContext(harnessName) {
|
|
56
|
+
const task = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
|
|
57
|
+
const model = process.env['ARCHAL_ENGINE_MODEL'];
|
|
58
|
+
|
|
59
|
+
if (!task) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
|
|
60
|
+
if (!model) { console.error('ARCHAL_ENGINE_MODEL not set'); process.exit(1); }
|
|
61
|
+
|
|
62
|
+
const provider = detectProvider(model);
|
|
63
|
+
const apiKey = resolveApiKey(provider);
|
|
64
|
+
const log = createLogger({ harness: harnessName, model, provider });
|
|
65
|
+
|
|
66
|
+
const twinUrls = collectTwinUrls();
|
|
67
|
+
if (Object.keys(twinUrls).length === 0) {
|
|
68
|
+
console.error(`[${harnessName}] No twin URLs found. Check ARCHAL_TWIN_NAMES and ARCHAL_<TWIN>_URL env vars.`);
|
|
69
|
+
process.exit(1);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const { tools: allTools, toolToTwin } = await discoverAllTools(twinUrls);
|
|
73
|
+
if (allTools.length === 0) {
|
|
74
|
+
console.error(`[${harnessName}] No tools discovered from twins. Twin endpoints may be unreachable.`);
|
|
75
|
+
process.exit(1);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return { harnessName, task, model, provider, apiKey, log, twinUrls, allTools, toolToTwin };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// ── Run loop ──────────────────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* @typedef {object} RunLoopOptions
|
|
85
|
+
* @property {string} systemPrompt - System prompt text (empty string for none)
|
|
86
|
+
* @property {number} maxSteps - Maximum iteration count
|
|
87
|
+
* @property {boolean} [useRetry=false] - Wrap LLM calls in withRetry
|
|
88
|
+
* @property {number} [retryCount=4] - Max retries when useRetry is true
|
|
89
|
+
* @property {boolean} [useTrace=false] - Record agent trace
|
|
90
|
+
* @property {number} [maxConsecutiveErrors=0] - Bail threshold (0 = no limit)
|
|
91
|
+
* @property {number} [maxInitialNoToolRecoveries=0] - Reprompt attempts when model doesn't call tools initially
|
|
92
|
+
* @property {(ctx: HarnessContext, state: RunState) => Array} [selectTools] -
|
|
93
|
+
* Per-step tool selection function. Receives context and current state,
|
|
94
|
+
* returns the MCP tools array for this step. Default: use all tools.
|
|
95
|
+
* @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => 'continue' | 'break' | void} [onBeforeToolExecution] -
|
|
96
|
+
* Hook called after parsing tool calls but before executing them.
|
|
97
|
+
* Return 'continue' to skip tool execution and loop, 'break' to stop.
|
|
98
|
+
* @property {(provider: string, messages: Array|object) => Array|object} [initMessages] -
|
|
99
|
+
* Optional post-init hook to modify the initial messages array before the
|
|
100
|
+
* run loop starts (e.g. to prepend a triage instruction).
|
|
101
|
+
* @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => void} [onAfterToolExecution] -
|
|
102
|
+
* Hook called after tool results are appended. Return value is ignored.
|
|
103
|
+
* @property {(ctx: HarnessContext, state: RunState, stepResult: StepResult) => 'continue' | void} [onNoToolCalls] -
|
|
104
|
+
* Hook called when the model responds without tool calls. Return
|
|
105
|
+
* 'continue' to add instructions and continue the loop.
|
|
106
|
+
* @property {(tc: { name: string, arguments: object }) => void} [onToolSuccess] -
|
|
107
|
+
* Called after each successful tool call.
|
|
108
|
+
*/
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* @typedef {object} RunState
|
|
112
|
+
* Mutable state tracked across loop iterations.
|
|
113
|
+
* @property {Array|object} messages
|
|
114
|
+
* @property {number} stepsCompleted
|
|
115
|
+
* @property {number} totalInputTokens
|
|
116
|
+
* @property {number} totalOutputTokens
|
|
117
|
+
* @property {number} totalToolCalls
|
|
118
|
+
* @property {number} totalToolErrors
|
|
119
|
+
* @property {number} consecutiveErrors
|
|
120
|
+
* @property {number} initialNoToolRecoveries
|
|
121
|
+
* @property {string} exitReason
|
|
122
|
+
* @property {import('./agent-trace.mjs').ReturnType<typeof createAgentTrace>|null} agentTrace
|
|
123
|
+
*/
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* @typedef {object} StepResult
|
|
127
|
+
* @property {number} step - 1-indexed step number
|
|
128
|
+
* @property {object} response - Raw LLM response wrapper
|
|
129
|
+
* @property {Array|null} toolCalls - Parsed tool calls or null
|
|
130
|
+
* @property {string|null} thinking - Model thinking content
|
|
131
|
+
* @property {string|null} text - Model text content
|
|
132
|
+
* @property {number} iterDurationMs
|
|
133
|
+
* @property {string|null} stopReason
|
|
134
|
+
*/
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Run the agent loop with shared metrics, logging, and tool execution.
|
|
138
|
+
*
|
|
139
|
+
* @param {HarnessContext} ctx
|
|
140
|
+
* @param {RunLoopOptions} opts
|
|
141
|
+
*/
|
|
142
|
+
export async function runAgentLoop(ctx, opts) {
|
|
143
|
+
const {
|
|
144
|
+
systemPrompt,
|
|
145
|
+
maxSteps,
|
|
146
|
+
useRetry = false,
|
|
147
|
+
retryCount = 4,
|
|
148
|
+
useTrace = false,
|
|
149
|
+
maxConsecutiveErrors = 0,
|
|
150
|
+
maxInitialNoToolRecoveries = 0,
|
|
151
|
+
selectTools,
|
|
152
|
+
onBeforeToolExecution,
|
|
153
|
+
onAfterToolExecution,
|
|
154
|
+
onNoToolCalls,
|
|
155
|
+
onToolSuccess,
|
|
156
|
+
} = opts;
|
|
157
|
+
|
|
158
|
+
const { harnessName, task, model, provider, apiKey, log, allTools, toolToTwin } = ctx;
|
|
159
|
+
|
|
160
|
+
let messages = buildInitialMessages(provider, systemPrompt, task, model);
|
|
161
|
+
|
|
162
|
+
// Allow callers to modify initial messages (e.g. react's triage instruction)
|
|
163
|
+
if (opts.initMessages) {
|
|
164
|
+
messages = opts.initMessages(provider, messages);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const state = {
|
|
168
|
+
messages,
|
|
169
|
+
stepsCompleted: 0,
|
|
170
|
+
totalInputTokens: 0,
|
|
171
|
+
totalOutputTokens: 0,
|
|
172
|
+
totalToolCalls: 0,
|
|
173
|
+
totalToolErrors: 0,
|
|
174
|
+
consecutiveErrors: 0,
|
|
175
|
+
initialNoToolRecoveries: 0,
|
|
176
|
+
exitReason: 'max_steps',
|
|
177
|
+
agentTrace: useTrace ? createAgentTrace() : null,
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
const runStart = Date.now();
|
|
181
|
+
|
|
182
|
+
log.info('run_start', { task: task.slice(0, 200), maxSteps });
|
|
183
|
+
|
|
184
|
+
try {
|
|
185
|
+
for (let step = 0; step < maxSteps; step++) {
|
|
186
|
+
state.stepsCompleted = step + 1;
|
|
187
|
+
const iterStart = Date.now();
|
|
188
|
+
|
|
189
|
+
// Select tools for this step (default: all tools)
|
|
190
|
+
const stepTools = selectTools ? selectTools(ctx, state) : allTools;
|
|
191
|
+
const providerTools = formatToolsForProvider(provider, stepTools);
|
|
192
|
+
|
|
193
|
+
// Call the LLM (optionally with retry)
|
|
194
|
+
log.llmCall(step + 1);
|
|
195
|
+
let response;
|
|
196
|
+
try {
|
|
197
|
+
const llmCall = () => callLlmWithMessages(provider, model, apiKey, state.messages, providerTools);
|
|
198
|
+
response = useRetry ? await withRetry(llmCall, retryCount) : await llmCall();
|
|
199
|
+
} catch (err) {
|
|
200
|
+
const msg = err?.message ?? String(err);
|
|
201
|
+
log.error('llm_call_failed', { step: step + 1, error: msg });
|
|
202
|
+
process.stderr.write(`[${harnessName}] LLM API error: ${msg.slice(0, 500)}\n`);
|
|
203
|
+
state.exitReason = 'llm_error';
|
|
204
|
+
break;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const iterDurationMs = Date.now() - iterStart;
|
|
208
|
+
state.totalInputTokens += response.usage.inputTokens;
|
|
209
|
+
state.totalOutputTokens += response.usage.outputTokens;
|
|
210
|
+
|
|
211
|
+
const toolCalls = parseToolCalls(provider, response);
|
|
212
|
+
const hasToolCalls = !!toolCalls;
|
|
213
|
+
const stopReason = getStopReason(provider, response);
|
|
214
|
+
log.llmResponse(step + 1, iterDurationMs, hasToolCalls, stopReason);
|
|
215
|
+
log.tokenUsage(step + 1, response.usage, {
|
|
216
|
+
inputTokens: state.totalInputTokens,
|
|
217
|
+
outputTokens: state.totalOutputTokens,
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
const thinking = getThinkingContent(provider, response);
|
|
221
|
+
const text = getResponseText(provider, response);
|
|
222
|
+
|
|
223
|
+
state.messages = appendAssistantResponse(provider, state.messages, response);
|
|
224
|
+
|
|
225
|
+
/** @type {StepResult} */
|
|
226
|
+
const stepResult = { step: step + 1, response, toolCalls, thinking, text, iterDurationMs, stopReason };
|
|
227
|
+
|
|
228
|
+
if (!toolCalls) {
|
|
229
|
+
// Record trace for no-tool-call steps
|
|
230
|
+
if (state.agentTrace) {
|
|
231
|
+
state.agentTrace.addStep({ step: step + 1, thinking, text, toolCalls: [], durationMs: iterDurationMs });
|
|
232
|
+
}
|
|
233
|
+
if (text) {
|
|
234
|
+
process.stderr.write(`[${harnessName}] Step ${step + 1}: ${text.slice(0, 200)}\n`);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Initial no-tool recovery (reprompt)
|
|
238
|
+
const shouldRecoverInitial = state.totalToolCalls === 0
|
|
239
|
+
&& maxInitialNoToolRecoveries > 0
|
|
240
|
+
&& state.initialNoToolRecoveries < maxInitialNoToolRecoveries;
|
|
241
|
+
if (shouldRecoverInitial) {
|
|
242
|
+
state.initialNoToolRecoveries++;
|
|
243
|
+
state.messages = appendUserInstruction(
|
|
244
|
+
provider,
|
|
245
|
+
state.messages,
|
|
246
|
+
'You must use tools to make progress. ' +
|
|
247
|
+
'On your next response, call at least one relevant tool before giving any summary or conclusion. ' +
|
|
248
|
+
'Start by gathering concrete evidence from the systems, then execute the required actions.',
|
|
249
|
+
);
|
|
250
|
+
log.info('no_tool_calls_reprompt', {
|
|
251
|
+
step: step + 1,
|
|
252
|
+
attempt: state.initialNoToolRecoveries,
|
|
253
|
+
});
|
|
254
|
+
continue;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Harness-specific no-tool-call handling
|
|
258
|
+
if (onNoToolCalls) {
|
|
259
|
+
const directive = onNoToolCalls(ctx, state, stepResult);
|
|
260
|
+
if (directive === 'continue') continue;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
state.exitReason = state.totalToolCalls === 0 ? 'no_tool_calls' : 'completed';
|
|
264
|
+
break;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
state.initialNoToolRecoveries = 0;
|
|
268
|
+
|
|
269
|
+
// Pre-execution hook (e.g. react's repo content guard)
|
|
270
|
+
if (onBeforeToolExecution) {
|
|
271
|
+
const directive = onBeforeToolExecution(ctx, state, stepResult);
|
|
272
|
+
if (directive === 'continue') continue;
|
|
273
|
+
if (directive === 'break') break;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Execute tool calls
|
|
277
|
+
const { executeToolCalls } = await import('./tool-executor.mjs');
|
|
278
|
+
const { results, bailout } = await executeToolCalls(toolCalls, {
|
|
279
|
+
toolToTwin,
|
|
280
|
+
harnessName,
|
|
281
|
+
step: step + 1,
|
|
282
|
+
log,
|
|
283
|
+
counters: state,
|
|
284
|
+
maxConsecutiveErrors,
|
|
285
|
+
onSuccess: onToolSuccess,
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
// Record trace
|
|
289
|
+
if (state.agentTrace) {
|
|
290
|
+
state.agentTrace.addStep({
|
|
291
|
+
step: step + 1,
|
|
292
|
+
thinking,
|
|
293
|
+
text,
|
|
294
|
+
toolCalls: toolCalls.map((tc) => ({ name: tc.name, arguments: tc.arguments })),
|
|
295
|
+
durationMs: iterDurationMs,
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
if (bailout) {
|
|
300
|
+
state.exitReason = 'consecutive_errors';
|
|
301
|
+
break;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// Append tool results to conversation
|
|
305
|
+
state.messages = appendToolResults(provider, state.messages, toolCalls, results);
|
|
306
|
+
|
|
307
|
+
// Post-execution hook
|
|
308
|
+
if (onAfterToolExecution) {
|
|
309
|
+
onAfterToolExecution(ctx, state, stepResult);
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
} finally {
|
|
313
|
+
const totalTimeMs = Date.now() - runStart;
|
|
314
|
+
|
|
315
|
+
log.summary({
|
|
316
|
+
iterations: state.stepsCompleted,
|
|
317
|
+
totalInputTokens: state.totalInputTokens,
|
|
318
|
+
totalOutputTokens: state.totalOutputTokens,
|
|
319
|
+
totalTimeMs,
|
|
320
|
+
toolCallCount: state.totalToolCalls,
|
|
321
|
+
toolErrorCount: state.totalToolErrors,
|
|
322
|
+
exitReason: state.exitReason,
|
|
323
|
+
});
|
|
324
|
+
|
|
325
|
+
writeMetrics({
|
|
326
|
+
inputTokens: state.totalInputTokens,
|
|
327
|
+
outputTokens: state.totalOutputTokens,
|
|
328
|
+
llmCallCount: state.stepsCompleted,
|
|
329
|
+
toolCallCount: state.totalToolCalls,
|
|
330
|
+
toolErrorCount: state.totalToolErrors,
|
|
331
|
+
totalTimeMs,
|
|
332
|
+
exitReason: state.exitReason,
|
|
333
|
+
provider,
|
|
334
|
+
model,
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
if (state.agentTrace) {
|
|
338
|
+
state.agentTrace.flush();
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
process.stderr.write(
|
|
342
|
+
`\n[${harnessName}] Summary: ${state.stepsCompleted} iterations, ${state.totalToolCalls} tool calls ` +
|
|
343
|
+
`(${state.totalToolErrors} errors), ${state.totalInputTokens} input tokens, ` +
|
|
344
|
+
`${state.totalOutputTokens} output tokens, ${(totalTimeMs / 1000).toFixed(1)}s total\n`
|
|
345
|
+
);
|
|
346
|
+
|
|
347
|
+
if (state.exitReason === 'llm_error') {
|
|
348
|
+
process.exit(1);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// Re-export for convenience — harnesses that need to build custom initial messages
|
|
354
|
+
export { appendUserInstruction };
|