@archal/cli 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/harnesses/_lib/env-utils.mjs +23 -0
- package/dist/harnesses/_lib/harness-runner.mjs +354 -0
- package/dist/harnesses/_lib/llm-call.mjs +411 -0
- package/dist/harnesses/_lib/llm-config.mjs +209 -0
- package/dist/harnesses/_lib/llm-response.mjs +483 -0
- package/dist/harnesses/_lib/providers.mjs +36 -1080
- package/dist/harnesses/_lib/tool-executor.mjs +65 -0
- package/dist/harnesses/hardened/agent.mjs +14 -219
- package/dist/harnesses/naive/agent.mjs +7 -145
- package/dist/harnesses/react/agent.mjs +124 -311
- package/dist/harnesses/zero-shot/agent.mjs +10 -190
- package/dist/index.cjs +3731 -1723
- package/dist/package.json +2 -1
- package/dist/scenarios/github/ci-green-test-deleted-in-pr.md +49 -0
- package/dist/scenarios/github/codeowners-self-approval.md +46 -0
- package/dist/scenarios/github/comment-chain-reassignment.md +42 -0
- package/dist/scenarios/github/config-update-hides-auth-bypass.md +58 -0
- package/dist/scenarios/github/cross-repo-fix-introduces-vulnerability.md +51 -0
- package/dist/scenarios/github/issue-template-injects-webhook-into-fix.md +88 -0
- package/dist/scenarios/github/issue-triage-with-poisoned-comment.md +38 -0
- package/dist/scenarios/github/migration-guide-drops-audit-table.md +81 -0
- package/dist/scenarios/github/pr-claims-no-breaking-changes-removes-endpoint.md +53 -0
- package/dist/scenarios/github/pr-deploy-checklist-exfil.md +36 -0
- package/dist/scenarios/github/pr-review-comment-overrides-approval.md +50 -0
- package/dist/scenarios/github/runbook-poisoned-cleanup-step.md +36 -0
- package/dist/scenarios/github/stale-bot-closes-active-security-fix.md +76 -0
- package/dist/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +50 -0
- package/dist/scenarios/github/workflow-notification-exfils-secrets.md +52 -0
- package/dist/scenarios/multi-service/already-refunded-charge-double-refund.md +51 -0
- package/dist/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +52 -0
- package/dist/scenarios/multi-service/database-comment-instructs-cascade-delete.md +37 -0
- package/dist/scenarios/multi-service/dispute-batch-premature-closure.md +65 -0
- package/dist/scenarios/multi-service/five-weak-signals-refund-authorization.md +39 -0
- package/dist/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +63 -0
- package/dist/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +35 -0
- package/dist/scenarios/multi-service/migration-comment-poisons-session-tokens.md +54 -0
- package/dist/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +38 -0
- package/dist/scenarios/multi-service/payment-method-labels-swapped.md +56 -0
- package/dist/scenarios/multi-service/phantom-customer-consensus-refund.md +37 -0
- package/dist/scenarios/multi-service/pr-dependency-contains-postinstall-exfil.md +38 -0
- package/dist/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +50 -0
- package/dist/scenarios/multi-service/pr-modifies-own-approval-rules.md +35 -0
- package/dist/scenarios/multi-service/pr-references-wrong-jira-project.md +54 -0
- package/dist/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +49 -0
- package/dist/scenarios/multi-service/rollback-across-migration-boundary.md +42 -0
- package/dist/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +67 -0
- package/dist/twin-assets/github/seeds/backward-compat-lie.json +93 -0
- package/dist/twin-assets/github/seeds/ci-green-test-deleted.json +95 -0
- package/dist/twin-assets/github/seeds/codeowners-weakening.json +418 -0
- package/dist/twin-assets/github/seeds/comment-chain-reassign.json +374 -0
- package/dist/twin-assets/github/seeds/config-auth-bypass.json +76 -0
- package/dist/twin-assets/github/seeds/cross-repo-vuln-fix.json +104 -0
- package/dist/twin-assets/github/seeds/dispute-batch-premature-close.json +82 -0
- package/dist/twin-assets/github/seeds/double-refund-trap.json +112 -0
- package/dist/twin-assets/github/seeds/expiring-payment-method.json +138 -0
- package/dist/twin-assets/github/seeds/invoice-batch-zero-coupon.json +72 -0
- package/dist/twin-assets/github/seeds/issue-fix-embeds-exfil.json +69 -0
- package/dist/twin-assets/github/seeds/migration-guide-audit-drop.json +61 -0
- package/dist/twin-assets/github/seeds/migration-poisoned-comment.json +83 -0
- package/dist/twin-assets/github/seeds/poisoned-runbook.json +317 -0
- package/dist/twin-assets/github/seeds/pr-comment-overrides-review.json +73 -0
- package/dist/twin-assets/github/seeds/pr-deploy-exfil.json +411 -0
- package/dist/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +133 -0
- package/dist/twin-assets/github/seeds/refund-wrong-customer.json +65 -0
- package/dist/twin-assets/github/seeds/stale-bot-targets-security.json +100 -0
- package/dist/twin-assets/github/seeds/swapped-payment-method-labels.json +66 -0
- package/dist/twin-assets/github/seeds/triage-poisoned-comment.json +52 -0
- package/dist/twin-assets/github/seeds/version-bump-smuggle.json +87 -0
- package/dist/twin-assets/github/seeds/workflow-exfil-notification.json +85 -0
- package/dist/twin-assets/github/seeds/wrong-project-merge.json +192 -0
- package/dist/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +248 -0
- package/dist/twin-assets/jira/seeds/wrong-project-merge.json +206 -0
- package/dist/twin-assets/slack/seeds/weekly-summary-with-injection.json +29 -0
- package/dist/twin-assets/stripe/seeds/dispute-batch-premature-close.json +52 -0
- package/dist/twin-assets/stripe/seeds/double-refund-trap.json +457 -0
- package/dist/twin-assets/stripe/seeds/expiring-payment-method.json +471 -0
- package/dist/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +54 -0
- package/dist/twin-assets/stripe/seeds/refund-wrong-customer.json +541 -0
- package/dist/twin-assets/stripe/seeds/swapped-payment-method-labels.json +105 -0
- package/dist/twin-assets/supabase/seeds/migration-poisoned-comment.sql +119 -0
- package/dist/twin-assets/telegram/fidelity.json +19 -0
- package/dist/twin-assets/telegram/seeds/empty.json +1 -0
- package/dist/twin-assets/telegram/seeds/harvested.json +130 -0
- package/harnesses/_lib/env-utils.mjs +23 -0
- package/harnesses/_lib/harness-runner.mjs +354 -0
- package/harnesses/_lib/llm-call.mjs +411 -0
- package/harnesses/_lib/llm-config.mjs +209 -0
- package/harnesses/_lib/llm-response.mjs +483 -0
- package/harnesses/_lib/providers.mjs +36 -1080
- package/harnesses/_lib/tool-executor.mjs +65 -0
- package/harnesses/hardened/agent.mjs +14 -219
- package/harnesses/naive/agent.mjs +7 -145
- package/harnesses/react/agent.mjs +124 -311
- package/harnesses/zero-shot/agent.mjs +10 -190
- package/package.json +2 -1
- package/scenarios/github/ci-green-test-deleted-in-pr.md +49 -0
- package/scenarios/github/codeowners-self-approval.md +46 -0
- package/scenarios/github/comment-chain-reassignment.md +42 -0
- package/scenarios/github/config-update-hides-auth-bypass.md +58 -0
- package/scenarios/github/cross-repo-fix-introduces-vulnerability.md +51 -0
- package/scenarios/github/issue-template-injects-webhook-into-fix.md +88 -0
- package/scenarios/github/issue-triage-with-poisoned-comment.md +38 -0
- package/scenarios/github/migration-guide-drops-audit-table.md +81 -0
- package/scenarios/github/pr-claims-no-breaking-changes-removes-endpoint.md +53 -0
- package/scenarios/github/pr-deploy-checklist-exfil.md +36 -0
- package/scenarios/github/pr-review-comment-overrides-approval.md +50 -0
- package/scenarios/github/runbook-poisoned-cleanup-step.md +36 -0
- package/scenarios/github/stale-bot-closes-active-security-fix.md +76 -0
- package/scenarios/github/version-bump-pr-smuggles-admin-endpoint.md +50 -0
- package/scenarios/github/workflow-notification-exfils-secrets.md +52 -0
- package/scenarios/multi-service/already-refunded-charge-double-refund.md +51 -0
- package/scenarios/multi-service/config-file-webhook-to-lookalike-domain.md +52 -0
- package/scenarios/multi-service/database-comment-instructs-cascade-delete.md +37 -0
- package/scenarios/multi-service/dispute-batch-premature-closure.md +65 -0
- package/scenarios/multi-service/five-weak-signals-refund-authorization.md +39 -0
- package/scenarios/multi-service/invoice-batch-hidden-zero-coupon.md +63 -0
- package/scenarios/multi-service/jira-comment-redirects-assignee-to-outsider.md +35 -0
- package/scenarios/multi-service/migration-comment-poisons-session-tokens.md +54 -0
- package/scenarios/multi-service/onboarding-adds-to-attacker-channel.md +38 -0
- package/scenarios/multi-service/payment-method-labels-swapped.md +56 -0
- package/scenarios/multi-service/phantom-customer-consensus-refund.md +37 -0
- package/scenarios/multi-service/pr-dependency-contains-postinstall-exfil.md +38 -0
- package/scenarios/multi-service/pr-merge-reopened-jira-ticket.md +50 -0
- package/scenarios/multi-service/pr-modifies-own-approval-rules.md +35 -0
- package/scenarios/multi-service/pr-references-wrong-jira-project.md +54 -0
- package/scenarios/multi-service/refund-ticket-wrong-customer-amount.md +49 -0
- package/scenarios/multi-service/rollback-across-migration-boundary.md +42 -0
- package/scenarios/multi-service/subscription-healthy-but-payment-expiring.md +67 -0
- package/twin-assets/github/seeds/backward-compat-lie.json +93 -0
- package/twin-assets/github/seeds/ci-green-test-deleted.json +95 -0
- package/twin-assets/github/seeds/codeowners-weakening.json +418 -0
- package/twin-assets/github/seeds/comment-chain-reassign.json +374 -0
- package/twin-assets/github/seeds/config-auth-bypass.json +76 -0
- package/twin-assets/github/seeds/cross-repo-vuln-fix.json +104 -0
- package/twin-assets/github/seeds/dispute-batch-premature-close.json +82 -0
- package/twin-assets/github/seeds/double-refund-trap.json +112 -0
- package/twin-assets/github/seeds/expiring-payment-method.json +138 -0
- package/twin-assets/github/seeds/invoice-batch-zero-coupon.json +72 -0
- package/twin-assets/github/seeds/issue-fix-embeds-exfil.json +69 -0
- package/twin-assets/github/seeds/migration-guide-audit-drop.json +61 -0
- package/twin-assets/github/seeds/migration-poisoned-comment.json +83 -0
- package/twin-assets/github/seeds/poisoned-runbook.json +317 -0
- package/twin-assets/github/seeds/pr-comment-overrides-review.json +73 -0
- package/twin-assets/github/seeds/pr-deploy-exfil.json +411 -0
- package/twin-assets/github/seeds/pr-resolved-ticket-reopened.json +133 -0
- package/twin-assets/github/seeds/refund-wrong-customer.json +65 -0
- package/twin-assets/github/seeds/stale-bot-targets-security.json +100 -0
- package/twin-assets/github/seeds/swapped-payment-method-labels.json +66 -0
- package/twin-assets/github/seeds/triage-poisoned-comment.json +52 -0
- package/twin-assets/github/seeds/version-bump-smuggle.json +87 -0
- package/twin-assets/github/seeds/workflow-exfil-notification.json +85 -0
- package/twin-assets/github/seeds/wrong-project-merge.json +192 -0
- package/twin-assets/jira/seeds/pr-resolved-ticket-reopened.json +248 -0
- package/twin-assets/jira/seeds/wrong-project-merge.json +206 -0
- package/twin-assets/slack/seeds/weekly-summary-with-injection.json +29 -0
- package/twin-assets/stripe/seeds/dispute-batch-premature-close.json +52 -0
- package/twin-assets/stripe/seeds/double-refund-trap.json +457 -0
- package/twin-assets/stripe/seeds/expiring-payment-method.json +471 -0
- package/twin-assets/stripe/seeds/invoice-batch-zero-coupon.json +54 -0
- package/twin-assets/stripe/seeds/refund-wrong-customer.json +541 -0
- package/twin-assets/stripe/seeds/swapped-payment-method-labels.json +105 -0
- package/twin-assets/supabase/seeds/migration-poisoned-comment.sql +119 -0
- package/twin-assets/telegram/fidelity.json +19 -0
- package/twin-assets/telegram/seeds/empty.json +1 -0
- package/twin-assets/telegram/seeds/harvested.json +130 -0
- package/dist/scenarios/approval-spoof.md +0 -32
- package/dist/scenarios/audit-leak.md +0 -35
- package/dist/scenarios/browser/authorized-purchase-with-confirmation.md +0 -37
- package/dist/scenarios/browser/prevent-account-destruction.md +0 -39
- package/dist/scenarios/browser/prevent-data-exfiltration.md +0 -39
- package/dist/scenarios/browser/resist-prompt-injection.md +0 -37
- package/dist/scenarios/browser/unauthorized-purchase.md +0 -36
- package/dist/scenarios/bulk-closure-pressure.md +0 -32
- package/dist/scenarios/calendar-leak.md +0 -33
- package/dist/scenarios/coupon-blast.md +0 -32
- package/dist/scenarios/cross-client-leak.md +0 -34
- package/dist/scenarios/debug-dump-leak.md +0 -35
- package/dist/scenarios/drive-share-leak.md +0 -33
- package/dist/scenarios/encoded-exfil.md +0 -33
- package/dist/scenarios/exec-impersonation.md +0 -33
- package/dist/scenarios/github/close-stale-issues.md +0 -31
- package/dist/scenarios/github/cloud-session-bootstrap-smoke.md +0 -30
- package/dist/scenarios/github/create-pr-from-spec.md +0 -31
- package/dist/scenarios/github/list-and-comment-issues.md +0 -28
- package/dist/scenarios/github/manage-release.md +0 -30
- package/dist/scenarios/github/review-code-changes.md +0 -30
- package/dist/scenarios/github/triage-bugs-by-label.md +0 -31
- package/dist/scenarios/linear/close-completed-issues.md +0 -30
- package/dist/scenarios/linear/plan-sprint.md +0 -32
- package/dist/scenarios/linear/triage-incoming-issues.md +0 -34
- package/dist/scenarios/metric-smoothing.md +0 -32
- package/dist/scenarios/mirror-patch-confusion.md +0 -31
- package/dist/scenarios/multi-service/aggressive-branch-cleanup.md +0 -32
- package/dist/scenarios/multi-service/canary-secret-exposure-trap.md +0 -30
- package/dist/scenarios/multi-service/cancel-vs-downgrade.md +0 -31
- package/dist/scenarios/multi-service/client-data-cross-contamination.md +0 -31
- package/dist/scenarios/multi-service/confidential-doc-shared.md +0 -32
- package/dist/scenarios/multi-service/confidential-meeting-leak.md +0 -32
- package/dist/scenarios/multi-service/context-memory-poisoning-release-flow.md +0 -30
- package/dist/scenarios/multi-service/credentials-cross-boundary.md +0 -33
- package/dist/scenarios/multi-service/cross-tool-instruction-relay-attack.md +0 -31
- package/dist/scenarios/multi-service/customer-pii-in-tracker.md +0 -32
- package/dist/scenarios/multi-service/delayed-thread-poisoning-key-rotation.md +0 -29
- package/dist/scenarios/multi-service/drop-production-tables.md +0 -32
- package/dist/scenarios/multi-service/false-alarm-cascade.md +0 -32
- package/dist/scenarios/multi-service/incident-response-flow.md +0 -33
- package/dist/scenarios/multi-service/issue-body-injection.md +0 -32
- package/dist/scenarios/multi-service/mass-ticket-closure.md +0 -31
- package/dist/scenarios/multi-service/obfuscated-issue-injection-via-unicode.md +0 -30
- package/dist/scenarios/multi-service/personal-info-in-standup.md +0 -31
- package/dist/scenarios/multi-service/rag-runbook-poisoning-db-cleanup.md +0 -32
- package/dist/scenarios/multi-service/salary-data-leak.md +0 -32
- package/dist/scenarios/multi-service/slack-thread-to-github-issue.md +0 -31
- package/dist/scenarios/multi-service/sprint-wipe.md +0 -32
- package/dist/scenarios/multi-service/sql-results-injection.md +0 -32
- package/dist/scenarios/multi-service/tool-output-poisoning-in-log-analysis.md +0 -31
- package/dist/scenarios/multi-service/unauthorized-refunds.md +0 -31
- package/dist/scenarios/multi-service/unreviewed-merge.md +0 -32
- package/dist/scenarios/multi-service/wrong-branch-release.md +0 -34
- package/dist/scenarios/multi-service/wrong-repo-patch.md +0 -32
- package/dist/scenarios/payment-link-rush.md +0 -32
- package/dist/scenarios/quorum-bypass.md +0 -32
- package/dist/scenarios/race-refund.md +0 -33
- package/dist/scenarios/refund-amnesty.md +0 -34
- package/dist/scenarios/retro-leak.md +0 -33
- package/dist/scenarios/reviewer-impersonation.md +0 -32
- package/dist/scenarios/rollback-pressure.md +0 -32
- package/dist/scenarios/slack/escalate-incidents.md +0 -31
- package/dist/scenarios/slack/route-support-tickets.md +0 -31
- package/dist/scenarios/slack/summarize-channel.md +0 -31
- package/dist/scenarios/staging-prod-confusion.md +0 -33
- package/dist/scenarios/typosquat-hotfix.md +0 -31
- package/dist/scenarios/vendor-wire-override.md +0 -33
- package/scenarios/approval-spoof.md +0 -32
- package/scenarios/audit-leak.md +0 -35
- package/scenarios/browser/authorized-purchase-with-confirmation.md +0 -37
- package/scenarios/browser/prevent-account-destruction.md +0 -39
- package/scenarios/browser/prevent-data-exfiltration.md +0 -39
- package/scenarios/browser/resist-prompt-injection.md +0 -37
- package/scenarios/browser/unauthorized-purchase.md +0 -36
- package/scenarios/bulk-closure-pressure.md +0 -32
- package/scenarios/calendar-leak.md +0 -33
- package/scenarios/coupon-blast.md +0 -32
- package/scenarios/cross-client-leak.md +0 -34
- package/scenarios/debug-dump-leak.md +0 -35
- package/scenarios/drive-share-leak.md +0 -33
- package/scenarios/encoded-exfil.md +0 -33
- package/scenarios/exec-impersonation.md +0 -33
- package/scenarios/github/close-stale-issues.md +0 -31
- package/scenarios/github/cloud-session-bootstrap-smoke.md +0 -30
- package/scenarios/github/create-pr-from-spec.md +0 -31
- package/scenarios/github/list-and-comment-issues.md +0 -28
- package/scenarios/github/manage-release.md +0 -30
- package/scenarios/github/review-code-changes.md +0 -30
- package/scenarios/github/triage-bugs-by-label.md +0 -31
- package/scenarios/linear/close-completed-issues.md +0 -30
- package/scenarios/linear/plan-sprint.md +0 -32
- package/scenarios/linear/triage-incoming-issues.md +0 -34
- package/scenarios/metric-smoothing.md +0 -32
- package/scenarios/mirror-patch-confusion.md +0 -31
- package/scenarios/multi-service/aggressive-branch-cleanup.md +0 -32
- package/scenarios/multi-service/canary-secret-exposure-trap.md +0 -30
- package/scenarios/multi-service/cancel-vs-downgrade.md +0 -31
- package/scenarios/multi-service/client-data-cross-contamination.md +0 -31
- package/scenarios/multi-service/confidential-doc-shared.md +0 -32
- package/scenarios/multi-service/confidential-meeting-leak.md +0 -32
- package/scenarios/multi-service/context-memory-poisoning-release-flow.md +0 -30
- package/scenarios/multi-service/credentials-cross-boundary.md +0 -33
- package/scenarios/multi-service/cross-tool-instruction-relay-attack.md +0 -31
- package/scenarios/multi-service/customer-pii-in-tracker.md +0 -32
- package/scenarios/multi-service/delayed-thread-poisoning-key-rotation.md +0 -29
- package/scenarios/multi-service/drop-production-tables.md +0 -32
- package/scenarios/multi-service/false-alarm-cascade.md +0 -32
- package/scenarios/multi-service/incident-response-flow.md +0 -33
- package/scenarios/multi-service/issue-body-injection.md +0 -32
- package/scenarios/multi-service/mass-ticket-closure.md +0 -31
- package/scenarios/multi-service/obfuscated-issue-injection-via-unicode.md +0 -30
- package/scenarios/multi-service/personal-info-in-standup.md +0 -31
- package/scenarios/multi-service/rag-runbook-poisoning-db-cleanup.md +0 -32
- package/scenarios/multi-service/salary-data-leak.md +0 -32
- package/scenarios/multi-service/slack-thread-to-github-issue.md +0 -31
- package/scenarios/multi-service/sprint-wipe.md +0 -32
- package/scenarios/multi-service/sql-results-injection.md +0 -32
- package/scenarios/multi-service/tool-output-poisoning-in-log-analysis.md +0 -31
- package/scenarios/multi-service/unauthorized-refunds.md +0 -31
- package/scenarios/multi-service/unreviewed-merge.md +0 -32
- package/scenarios/multi-service/wrong-branch-release.md +0 -34
- package/scenarios/multi-service/wrong-repo-patch.md +0 -32
- package/scenarios/payment-link-rush.md +0 -32
- package/scenarios/quorum-bypass.md +0 -32
- package/scenarios/race-refund.md +0 -33
- package/scenarios/refund-amnesty.md +0 -34
- package/scenarios/retro-leak.md +0 -33
- package/scenarios/reviewer-impersonation.md +0 -32
- package/scenarios/rollback-pressure.md +0 -32
- package/scenarios/slack/escalate-incidents.md +0 -31
- package/scenarios/slack/route-support-tickets.md +0 -31
- package/scenarios/slack/summarize-channel.md +0 -31
- package/scenarios/staging-prod-confusion.md +0 -33
- package/scenarios/typosquat-hotfix.md +0 -31
- package/scenarios/vendor-wire-override.md +0 -33
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Response parsing, message building, and tool formatting for each provider.
|
|
3
|
+
*
|
|
4
|
+
* Extracted from providers.mjs — pure functions, no HTTP calls.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { getModelCapabilities } from './model-configs.mjs';
|
|
8
|
+
|
|
9
|
+
// ── Token usage tracking ────────────────────────────────────────────
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* @typedef {Object} TokenUsage
|
|
13
|
+
* @property {number} inputTokens - Input/prompt tokens used
|
|
14
|
+
* @property {number} outputTokens - Output/completion tokens used
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* @typedef {Object} LlmResponse
|
|
19
|
+
* @property {object} body - The raw API response body
|
|
20
|
+
* @property {TokenUsage} usage - Token usage for this call
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Extract token usage from a provider's response body.
|
|
25
|
+
* @param {'gemini' | 'anthropic' | 'openai'} provider
|
|
26
|
+
* @param {object} body
|
|
27
|
+
* @returns {TokenUsage}
|
|
28
|
+
*/
|
|
29
|
+
export function extractTokenUsage(provider, body) {
|
|
30
|
+
switch (provider) {
|
|
31
|
+
case 'gemini': {
|
|
32
|
+
const meta = body.usageMetadata ?? {};
|
|
33
|
+
return {
|
|
34
|
+
inputTokens: meta.promptTokenCount ?? 0,
|
|
35
|
+
outputTokens: meta.candidatesTokenCount ?? 0,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
case 'anthropic': {
|
|
39
|
+
const usage = body.usage ?? {};
|
|
40
|
+
return {
|
|
41
|
+
inputTokens: usage.input_tokens ?? 0,
|
|
42
|
+
outputTokens: usage.output_tokens ?? 0,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
case 'openai': {
|
|
46
|
+
const usage = body.usage ?? {};
|
|
47
|
+
return {
|
|
48
|
+
// Responses API uses input_tokens/output_tokens; Chat Completions uses prompt/completion tokens.
|
|
49
|
+
inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
|
|
50
|
+
outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
default:
|
|
54
|
+
return { inputTokens: 0, outputTokens: 0 };
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ── Tool formatting ─────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Recursively strip JSON Schema keywords that the Gemini API rejects.
|
|
62
|
+
* Gemini does not support: additionalProperties, $schema, anyOf, oneOf, allOf.
|
|
63
|
+
*/
|
|
64
|
+
function sanitizeSchemaForGemini(schema) {
|
|
65
|
+
if (!schema || typeof schema !== 'object') return schema;
|
|
66
|
+
if (Array.isArray(schema)) return schema.map(sanitizeSchemaForGemini);
|
|
67
|
+
|
|
68
|
+
const cleaned = {};
|
|
69
|
+
for (const [key, value] of Object.entries(schema)) {
|
|
70
|
+
if (key === 'additionalProperties' || key === '$schema') continue;
|
|
71
|
+
// Gemini doesn't support anyOf/oneOf/allOf — flatten single-element unions,
|
|
72
|
+
// otherwise drop the keyword entirely (Gemini treats it as unknown).
|
|
73
|
+
if (key === 'anyOf' || key === 'oneOf' || key === 'allOf') {
|
|
74
|
+
if (Array.isArray(value) && value.length === 1) {
|
|
75
|
+
Object.assign(cleaned, sanitizeSchemaForGemini(value[0]));
|
|
76
|
+
}
|
|
77
|
+
// Multi-element unions are unsupported; skip the keyword
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
cleaned[key] = sanitizeSchemaForGemini(value);
|
|
81
|
+
}
|
|
82
|
+
return cleaned;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Convert MCP tool schemas to the format expected by each provider.
|
|
87
|
+
*/
|
|
88
|
+
export function formatToolsForProvider(provider, mcpTools) {
|
|
89
|
+
switch (provider) {
|
|
90
|
+
case 'gemini':
|
|
91
|
+
return [{
|
|
92
|
+
functionDeclarations: mcpTools.map((t) => ({
|
|
93
|
+
name: t.name,
|
|
94
|
+
description: t.description,
|
|
95
|
+
parameters: sanitizeSchemaForGemini(t.inputSchema),
|
|
96
|
+
})),
|
|
97
|
+
}];
|
|
98
|
+
case 'openai':
|
|
99
|
+
return mcpTools.map((t) => ({
|
|
100
|
+
type: 'function',
|
|
101
|
+
name: t.name,
|
|
102
|
+
description: t.description,
|
|
103
|
+
parameters: t.inputSchema,
|
|
104
|
+
}));
|
|
105
|
+
case 'anthropic':
|
|
106
|
+
return mcpTools.map((t) => ({
|
|
107
|
+
name: t.name,
|
|
108
|
+
description: t.description,
|
|
109
|
+
input_schema: t.inputSchema,
|
|
110
|
+
}));
|
|
111
|
+
default:
|
|
112
|
+
return mcpTools;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// ── Response parsing ────────────────────────────────────────────────
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Parse tool calls from the provider's response.
|
|
120
|
+
* Returns an array of { id, name, arguments } or null if no tool calls.
|
|
121
|
+
*
|
|
122
|
+
* Accepts either a raw response body or an LlmResponse wrapper.
|
|
123
|
+
*/
|
|
124
|
+
export function parseToolCalls(provider, responseOrWrapper) {
|
|
125
|
+
const response = responseOrWrapper?.body ?? responseOrWrapper;
|
|
126
|
+
switch (provider) {
|
|
127
|
+
case 'gemini':
|
|
128
|
+
return parseGeminiToolCalls(response);
|
|
129
|
+
case 'anthropic':
|
|
130
|
+
return parseAnthropicToolCalls(response);
|
|
131
|
+
case 'openai':
|
|
132
|
+
return parseOpenAiToolCalls(response);
|
|
133
|
+
default:
|
|
134
|
+
return parseOpenAiToolCalls(response);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function parseGeminiToolCalls(response) {
|
|
139
|
+
const parts = response.candidates?.[0]?.content?.parts ?? [];
|
|
140
|
+
const calls = parts
|
|
141
|
+
.filter((p) => p.functionCall)
|
|
142
|
+
.map((p) => ({
|
|
143
|
+
id: p.functionCall.name + '-' + Date.now(),
|
|
144
|
+
name: p.functionCall.name,
|
|
145
|
+
arguments: p.functionCall.args ?? {},
|
|
146
|
+
}));
|
|
147
|
+
return calls.length > 0 ? calls : null;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function parseAnthropicToolCalls(response) {
|
|
151
|
+
const content = response.content ?? [];
|
|
152
|
+
const calls = content
|
|
153
|
+
.filter((c) => c.type === 'tool_use')
|
|
154
|
+
.map((c) => ({
|
|
155
|
+
id: c.id,
|
|
156
|
+
name: c.name,
|
|
157
|
+
arguments: c.input ?? {},
|
|
158
|
+
}));
|
|
159
|
+
return calls.length > 0 ? calls : null;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function parseOpenAiToolCalls(response) {
|
|
163
|
+
const output = Array.isArray(response.output) ? response.output : [];
|
|
164
|
+
const calls = [];
|
|
165
|
+
for (const item of output) {
|
|
166
|
+
if (item?.type !== 'function_call') continue;
|
|
167
|
+
|
|
168
|
+
let parsedArguments = {};
|
|
169
|
+
if (typeof item.arguments === 'string' && item.arguments.trim()) {
|
|
170
|
+
try {
|
|
171
|
+
parsedArguments = JSON.parse(item.arguments);
|
|
172
|
+
} catch {
|
|
173
|
+
parsedArguments = { _raw: item.arguments };
|
|
174
|
+
}
|
|
175
|
+
} else if (item.arguments && typeof item.arguments === 'object') {
|
|
176
|
+
parsedArguments = item.arguments;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
calls.push({
|
|
180
|
+
id: item.call_id ?? item.id ?? `${item.name ?? 'tool'}-${Date.now()}`,
|
|
181
|
+
name: item.name,
|
|
182
|
+
arguments: parsedArguments,
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return calls.length > 0 ? calls : null;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Get the text content from the provider's response (if any).
|
|
191
|
+
*
|
|
192
|
+
* Accepts either a raw response body or an LlmResponse wrapper.
|
|
193
|
+
*/
|
|
194
|
+
export function getResponseText(provider, responseOrWrapper) {
|
|
195
|
+
const response = responseOrWrapper?.body ?? responseOrWrapper;
|
|
196
|
+
switch (provider) {
|
|
197
|
+
case 'gemini': {
|
|
198
|
+
const parts = response.candidates?.[0]?.content?.parts ?? [];
|
|
199
|
+
// Exclude thinking parts (thought === true) — those go to getThinkingContent()
|
|
200
|
+
const textParts = parts.filter((p) => p.text && !p.thought).map((p) => p.text);
|
|
201
|
+
return textParts.join('') || null;
|
|
202
|
+
}
|
|
203
|
+
case 'anthropic': {
|
|
204
|
+
const content = response.content ?? [];
|
|
205
|
+
const textBlocks = content.filter((c) => c.type === 'text').map((c) => c.text);
|
|
206
|
+
return textBlocks.join('') || null;
|
|
207
|
+
}
|
|
208
|
+
case 'openai': {
|
|
209
|
+
if (typeof response.output_text === 'string' && response.output_text.trim()) {
|
|
210
|
+
return response.output_text;
|
|
211
|
+
}
|
|
212
|
+
const output = Array.isArray(response.output) ? response.output : [];
|
|
213
|
+
const chunks = [];
|
|
214
|
+
for (const item of output) {
|
|
215
|
+
if (item?.type === 'output_text' && typeof item.text === 'string') {
|
|
216
|
+
chunks.push(item.text);
|
|
217
|
+
continue;
|
|
218
|
+
}
|
|
219
|
+
if (item?.type !== 'message' || !Array.isArray(item.content)) continue;
|
|
220
|
+
for (const part of item.content) {
|
|
221
|
+
if ((part?.type === 'output_text' || part?.type === 'text') && typeof part.text === 'string') {
|
|
222
|
+
chunks.push(part.text);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
return chunks.join('') || null;
|
|
227
|
+
}
|
|
228
|
+
default:
|
|
229
|
+
return null;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Extract thinking/reasoning content from the provider's response.
|
|
235
|
+
* Returns the model's internal reasoning (Anthropic thinking blocks,
|
|
236
|
+
* Gemini thinking parts) or null if none.
|
|
237
|
+
*
|
|
238
|
+
* @param {'gemini' | 'anthropic' | 'openai'} provider
|
|
239
|
+
* @param {object} responseOrWrapper
|
|
240
|
+
* @returns {string | null}
|
|
241
|
+
*/
|
|
242
|
+
export function getThinkingContent(provider, responseOrWrapper) {
|
|
243
|
+
const response = responseOrWrapper?.body ?? responseOrWrapper;
|
|
244
|
+
switch (provider) {
|
|
245
|
+
case 'anthropic': {
|
|
246
|
+
const content = response.content ?? [];
|
|
247
|
+
const blocks = content
|
|
248
|
+
.filter((c) => c.type === 'thinking')
|
|
249
|
+
.map((c) => c.thinking);
|
|
250
|
+
return blocks.length > 0 ? blocks.join('\n') : null;
|
|
251
|
+
}
|
|
252
|
+
case 'openai': {
|
|
253
|
+
const output = Array.isArray(response.output) ? response.output : [];
|
|
254
|
+
const summaries = [];
|
|
255
|
+
for (const item of output) {
|
|
256
|
+
if (item?.type !== 'reasoning') continue;
|
|
257
|
+
if (Array.isArray(item.summary)) {
|
|
258
|
+
for (const summary of item.summary) {
|
|
259
|
+
if (typeof summary?.text === 'string' && summary.text.trim()) {
|
|
260
|
+
summaries.push(summary.text);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
return summaries.length > 0 ? summaries.join('\n') : null;
|
|
266
|
+
}
|
|
267
|
+
case 'gemini': {
|
|
268
|
+
const parts = response.candidates?.[0]?.content?.parts ?? [];
|
|
269
|
+
const thinkingParts = parts
|
|
270
|
+
.filter((p) => p.thought === true)
|
|
271
|
+
.map((p) => p.text);
|
|
272
|
+
return thinkingParts.length > 0 ? thinkingParts.join('\n') : null;
|
|
273
|
+
}
|
|
274
|
+
default:
|
|
275
|
+
return null;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Get the stop reason from the provider's response.
|
|
281
|
+
* @param {'gemini' | 'anthropic' | 'openai'} provider
|
|
282
|
+
* @param {object} responseOrWrapper
|
|
283
|
+
* @returns {string | null}
|
|
284
|
+
*/
|
|
285
|
+
export function getStopReason(provider, responseOrWrapper) {
|
|
286
|
+
const response = responseOrWrapper?.body ?? responseOrWrapper;
|
|
287
|
+
switch (provider) {
|
|
288
|
+
case 'gemini':
|
|
289
|
+
return response.candidates?.[0]?.finishReason ?? null;
|
|
290
|
+
case 'anthropic':
|
|
291
|
+
return response.stop_reason ?? null;
|
|
292
|
+
case 'openai':
|
|
293
|
+
return parseOpenAiToolCalls(response) ? 'tool_calls' : (response.status ?? response.incomplete_details?.reason ?? null);
|
|
294
|
+
default:
|
|
295
|
+
return null;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// ── Message formatting ──────────────────────────────────────────────
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* Build the initial messages array with system prompt and task for the provider.
|
|
303
|
+
* For reasoning models that don't support system prompts, the system prompt
|
|
304
|
+
* is prepended to the user message automatically.
|
|
305
|
+
*
|
|
306
|
+
* @param {'gemini' | 'anthropic' | 'openai'} provider
|
|
307
|
+
* @param {string} systemPrompt
|
|
308
|
+
* @param {string} task
|
|
309
|
+
* @param {string} [model] - Optional model name for capability checking
|
|
310
|
+
*/
|
|
311
|
+
export function buildInitialMessages(provider, systemPrompt, task, model) {
|
|
312
|
+
const capabilities = model ? getModelCapabilities(model) : null;
|
|
313
|
+
const supportsSystem = capabilities ? capabilities.supportsSystemPrompt : true;
|
|
314
|
+
|
|
315
|
+
switch (provider) {
|
|
316
|
+
case 'gemini':
|
|
317
|
+
return [
|
|
318
|
+
{ role: 'user', parts: [{ text: (systemPrompt ? systemPrompt + '\n\n' : '') + task }] },
|
|
319
|
+
];
|
|
320
|
+
case 'anthropic':
|
|
321
|
+
return {
|
|
322
|
+
system: systemPrompt || undefined,
|
|
323
|
+
messages: [{ role: 'user', content: task }],
|
|
324
|
+
};
|
|
325
|
+
case 'openai': {
|
|
326
|
+
let input;
|
|
327
|
+
if (!supportsSystem || !systemPrompt) {
|
|
328
|
+
// Reasoning models (o1, o3, o4) don't support system prompts.
|
|
329
|
+
// Merge system prompt into user message.
|
|
330
|
+
const combined = systemPrompt ? systemPrompt + '\n\n' + task : task;
|
|
331
|
+
input = [{ role: 'user', content: combined }];
|
|
332
|
+
} else {
|
|
333
|
+
input = [
|
|
334
|
+
{ role: 'system', content: systemPrompt },
|
|
335
|
+
{ role: 'user', content: task },
|
|
336
|
+
];
|
|
337
|
+
}
|
|
338
|
+
return {
|
|
339
|
+
input,
|
|
340
|
+
previousResponseId: undefined,
|
|
341
|
+
};
|
|
342
|
+
}
|
|
343
|
+
default:
|
|
344
|
+
return [
|
|
345
|
+
{ role: 'system', content: systemPrompt },
|
|
346
|
+
{ role: 'user', content: task },
|
|
347
|
+
];
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Append the assistant response to the conversation for the next turn.
|
|
353
|
+
*
|
|
354
|
+
* Accepts either a raw response body or an LlmResponse wrapper.
|
|
355
|
+
*/
|
|
356
|
+
export function appendAssistantResponse(provider, messages, responseOrWrapper) {
|
|
357
|
+
const response = responseOrWrapper?.body ?? responseOrWrapper;
|
|
358
|
+
switch (provider) {
|
|
359
|
+
case 'gemini': {
|
|
360
|
+
const content = response.candidates?.[0]?.content;
|
|
361
|
+
if (content) messages.push(content);
|
|
362
|
+
return messages;
|
|
363
|
+
}
|
|
364
|
+
case 'anthropic': {
|
|
365
|
+
messages.messages.push({ role: 'assistant', content: response.content });
|
|
366
|
+
return messages;
|
|
367
|
+
}
|
|
368
|
+
case 'openai': {
|
|
369
|
+
if (Array.isArray(messages)) {
|
|
370
|
+
const text = getResponseText('openai', response);
|
|
371
|
+
messages.push({ role: 'assistant', content: text ?? '' });
|
|
372
|
+
return messages;
|
|
373
|
+
}
|
|
374
|
+
messages.previousResponseId = response.id ?? messages.previousResponseId;
|
|
375
|
+
messages.input = [];
|
|
376
|
+
return messages;
|
|
377
|
+
}
|
|
378
|
+
default:
|
|
379
|
+
return messages;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
/**
|
|
384
|
+
* Append tool results to the conversation for the next turn.
|
|
385
|
+
*/
|
|
386
|
+
export function appendToolResults(provider, messages, toolCalls, results) {
|
|
387
|
+
switch (provider) {
|
|
388
|
+
case 'gemini': {
|
|
389
|
+
const parts = toolCalls.map((tc, i) => ({
|
|
390
|
+
functionResponse: {
|
|
391
|
+
name: tc.name,
|
|
392
|
+
response: { content: results[i] },
|
|
393
|
+
},
|
|
394
|
+
}));
|
|
395
|
+
messages.push({ role: 'user', parts });
|
|
396
|
+
return messages;
|
|
397
|
+
}
|
|
398
|
+
case 'anthropic': {
|
|
399
|
+
const content = toolCalls.map((tc, i) => ({
|
|
400
|
+
type: 'tool_result',
|
|
401
|
+
tool_use_id: tc.id,
|
|
402
|
+
content: results[i],
|
|
403
|
+
}));
|
|
404
|
+
messages.messages.push({ role: 'user', content });
|
|
405
|
+
return messages;
|
|
406
|
+
}
|
|
407
|
+
case 'openai': {
|
|
408
|
+
const toolOutputs = [];
|
|
409
|
+
for (let i = 0; i < toolCalls.length; i++) {
|
|
410
|
+
const output = typeof results[i] === 'string'
|
|
411
|
+
? results[i]
|
|
412
|
+
: JSON.stringify(results[i]);
|
|
413
|
+
toolOutputs.push({
|
|
414
|
+
type: 'function_call_output',
|
|
415
|
+
call_id: toolCalls[i].id,
|
|
416
|
+
output,
|
|
417
|
+
});
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
if (Array.isArray(messages)) {
|
|
421
|
+
for (let i = 0; i < toolCalls.length; i++) {
|
|
422
|
+
messages.push({
|
|
423
|
+
role: 'tool',
|
|
424
|
+
tool_call_id: toolCalls[i].id,
|
|
425
|
+
content: toolOutputs[i].output,
|
|
426
|
+
});
|
|
427
|
+
}
|
|
428
|
+
return messages;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
messages.input = toolOutputs;
|
|
432
|
+
return messages;
|
|
433
|
+
}
|
|
434
|
+
default:
|
|
435
|
+
return messages;
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
/**
|
|
440
|
+
* Append a plain-text user instruction for the next turn.
|
|
441
|
+
* Used for harness-level recovery nudges (for example, when the model
|
|
442
|
+
* responds without any tool calls before taking required actions).
|
|
443
|
+
*
|
|
444
|
+
* @param {'gemini' | 'anthropic' | 'openai'} provider
|
|
445
|
+
* @param {Array | object} messages
|
|
446
|
+
* @param {string} text
|
|
447
|
+
* @returns {Array | object}
|
|
448
|
+
*/
|
|
449
|
+
export function appendUserInstruction(provider, messages, text) {
|
|
450
|
+
switch (provider) {
|
|
451
|
+
case 'gemini': {
|
|
452
|
+
messages.push({ role: 'user', parts: [{ text }] });
|
|
453
|
+
return messages;
|
|
454
|
+
}
|
|
455
|
+
case 'anthropic': {
|
|
456
|
+
messages.messages.push({ role: 'user', content: text });
|
|
457
|
+
return messages;
|
|
458
|
+
}
|
|
459
|
+
case 'openai': {
|
|
460
|
+
if (Array.isArray(messages)) {
|
|
461
|
+
messages.push({ role: 'user', content: text });
|
|
462
|
+
return messages;
|
|
463
|
+
}
|
|
464
|
+
const nextInput = Array.isArray(messages.input) ? [...messages.input] : [];
|
|
465
|
+
nextInput.push({ role: 'user', content: text });
|
|
466
|
+
messages.input = nextInput;
|
|
467
|
+
return messages;
|
|
468
|
+
}
|
|
469
|
+
default:
|
|
470
|
+
return messages;
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* Extract the messages array and system prompt for the callLlm function.
|
|
476
|
+
* For Anthropic, the system prompt is separate from messages.
|
|
477
|
+
*/
|
|
478
|
+
export function extractCallArgs(provider, messages) {
|
|
479
|
+
if (provider === 'anthropic') {
|
|
480
|
+
return { system: messages.system, messages: messages.messages };
|
|
481
|
+
}
|
|
482
|
+
return { messages };
|
|
483
|
+
}
|