@archal/cli 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. package/README.md +8 -8
  2. package/dist/harnesses/_lib/harness-runner.mjs +20 -1
  3. package/dist/harnesses/_lib/llm-response.mjs +17 -10
  4. package/dist/harnesses/_lib/mcp-client.mjs +7 -2
  5. package/dist/harnesses/_lib/rest-client.mjs +20 -1
  6. package/dist/harnesses/_lib/tool-executor.mjs +12 -0
  7. package/dist/harnesses/hardened/package.json +12 -0
  8. package/dist/harnesses/naive/package.json +12 -0
  9. package/dist/harnesses/react/package.json +12 -0
  10. package/dist/harnesses/zero-shot/package.json +12 -0
  11. package/dist/index.cjs +32588 -28843
  12. package/dist/package.json +3 -1
  13. package/dist/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +66 -0
  14. package/dist/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +86 -0
  15. package/dist/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +60 -0
  16. package/dist/scenarios/multi-service/churn-save-offer-already-applied.md +54 -0
  17. package/dist/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +42 -0
  18. package/dist/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +46 -0
  19. package/dist/scenarios/multi-service/coupon-retroactive-invoice-correction.md +58 -0
  20. package/dist/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +67 -0
  21. package/dist/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +52 -0
  22. package/dist/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +56 -0
  23. package/dist/scenarios/multi-service/dispute-batch-premature-closure.md +3 -3
  24. package/dist/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +55 -0
  25. package/dist/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +67 -0
  26. package/dist/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +93 -0
  27. package/dist/scenarios/multi-service/incremental-auth-weakening-across-prs.md +50 -0
  28. package/dist/scenarios/multi-service/investor-update-posted-to-general.md +72 -0
  29. package/dist/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +75 -0
  30. package/dist/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +67 -0
  31. package/dist/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +57 -0
  32. package/dist/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +66 -0
  33. package/dist/scenarios/multi-service/pr-review-approver-is-author.md +55 -0
  34. package/dist/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +66 -0
  35. package/dist/scenarios/multi-service/refund-batch-decimal-mismatch.md +59 -0
  36. package/dist/scenarios/multi-service/refund-sum-exceeds-charge-total.md +56 -0
  37. package/dist/scenarios/multi-service/release-notes-copy-leaks-credentials.md +46 -0
  38. package/dist/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +77 -0
  39. package/dist/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +54 -0
  40. package/dist/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +70 -0
  41. package/dist/scenarios/multi-service/stale-rollback-plan-overtaken.md +59 -0
  42. package/dist/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +94 -0
  43. package/dist/scenarios/multi-service/subscription-cancel-wrong-tenant.md +59 -0
  44. package/dist/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +48 -0
  45. package/dist/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +65 -0
  46. package/dist/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +50 -0
  47. package/dist/twin-assets/github/seeds/api-key-rotation-broadcast.json +63 -0
  48. package/dist/twin-assets/github/seeds/bulk-user-deletion.json +59 -0
  49. package/dist/twin-assets/github/seeds/churn-save-offer-already-applied.json +51 -0
  50. package/dist/twin-assets/github/seeds/ci-fix-exfil-env.json +73 -0
  51. package/dist/twin-assets/github/seeds/codeowners-bypass-pr.json +77 -0
  52. package/dist/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +51 -0
  53. package/dist/twin-assets/github/seeds/customer-billing-pii-leak.json +55 -0
  54. package/dist/twin-assets/github/seeds/cycle-close-unmerged-pr.json +131 -0
  55. package/dist/twin-assets/github/seeds/demo-stale-issues.json +14 -14
  56. package/dist/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +69 -0
  57. package/dist/twin-assets/github/seeds/enterprise-repo.json +23 -6
  58. package/dist/twin-assets/github/seeds/env-var-leak-via-debug.json +64 -0
  59. package/dist/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +63 -0
  60. package/dist/twin-assets/github/seeds/incremental-permissions-drift.json +86 -0
  61. package/dist/twin-assets/github/seeds/investor-update-general-leak.json +58 -0
  62. package/dist/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +67 -0
  63. package/dist/twin-assets/github/seeds/pr-comment-overrides-review.json +4 -0
  64. package/dist/twin-assets/github/seeds/pr-review-approver-is-author.json +68 -0
  65. package/dist/twin-assets/github/seeds/q1-revenue-leak.json +56 -0
  66. package/dist/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +54 -0
  67. package/dist/twin-assets/github/seeds/refund-policy-override.json +51 -0
  68. package/dist/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +51 -0
  69. package/dist/twin-assets/github/seeds/release-notes-credential-leak.json +63 -0
  70. package/dist/twin-assets/github/seeds/stale-issues.json +51 -41
  71. package/dist/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +67 -0
  72. package/dist/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +51 -0
  73. package/dist/twin-assets/github/seeds/triage-policy-injection.json +72 -0
  74. package/dist/twin-assets/github/seeds/webhook-debug-signing-secret.json +62 -0
  75. package/dist/twin-assets/github/seeds/webhook-url-swap.json +65 -0
  76. package/dist/twin-assets/google-workspace/seeds/assistant-baseline.json +95 -0
  77. package/dist/twin-assets/google-workspace/seeds/empty.json +7 -0
  78. package/dist/twin-assets/jira/seeds/churn-save-offer-already-applied.json +35 -0
  79. package/dist/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +26 -0
  80. package/dist/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +14 -0
  81. package/dist/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +14 -0
  82. package/dist/twin-assets/jira/seeds/pr-review-approver-is-author.json +14 -0
  83. package/dist/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +241 -0
  84. package/dist/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +45 -0
  85. package/dist/twin-assets/jira/seeds/rls-bypass-migration.json +185 -0
  86. package/dist/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +83 -0
  87. package/dist/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +82 -0
  88. package/dist/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +646 -0
  89. package/dist/twin-assets/linear/seeds/empty.json +14 -13
  90. package/dist/twin-assets/linear/seeds/engineering-org.json +51 -51
  91. package/dist/twin-assets/linear/seeds/feature-flag-override-mismatch.json +237 -0
  92. package/dist/twin-assets/linear/seeds/harvested.json +1 -1
  93. package/dist/twin-assets/linear/seeds/small-team.json +25 -25
  94. package/dist/twin-assets/linear/seeds/temporal-cycle.json +15 -15
  95. package/dist/twin-assets/slack/seeds/api-key-rotation-broadcast.json +261 -0
  96. package/dist/twin-assets/slack/seeds/churn-save-offer-already-applied.json +25 -0
  97. package/dist/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +19 -0
  98. package/dist/twin-assets/slack/seeds/customer-billing-pii-leak.json +301 -0
  99. package/dist/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +25 -0
  100. package/dist/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +26 -0
  101. package/dist/twin-assets/slack/seeds/empty.json +2 -1
  102. package/dist/twin-assets/slack/seeds/feature-flag-override-mismatch.json +27 -0
  103. package/dist/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +22 -0
  104. package/dist/twin-assets/slack/seeds/investor-update-general-leak.json +274 -0
  105. package/dist/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +18 -0
  106. package/dist/twin-assets/slack/seeds/pr-review-approver-is-author.json +18 -0
  107. package/dist/twin-assets/slack/seeds/q1-revenue-leak.json +297 -0
  108. package/dist/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +176 -0
  109. package/dist/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +24 -0
  110. package/dist/twin-assets/slack/seeds/rls-bypass-migration.json +28 -0
  111. package/dist/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +28 -0
  112. package/dist/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +27 -0
  113. package/dist/twin-assets/slack/seeds/webhook-debug-signing-secret.json +349 -0
  114. package/dist/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +42 -0
  115. package/dist/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +47 -0
  116. package/dist/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +45 -0
  117. package/dist/twin-assets/stripe/seeds/customer-billing-pii-leak.json +274 -0
  118. package/dist/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +51 -0
  119. package/dist/twin-assets/stripe/seeds/investor-update-general-leak.json +4154 -0
  120. package/dist/twin-assets/stripe/seeds/q1-revenue-leak.json +559 -0
  121. package/dist/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +343 -0
  122. package/dist/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +44 -0
  123. package/dist/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +46 -0
  124. package/dist/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +64 -0
  125. package/dist/twin-assets/supabase/seeds/bulk-user-deletion.sql +122 -0
  126. package/dist/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +112 -0
  127. package/dist/twin-assets/supabase/seeds/rls-bypass-migration.sql +125 -0
  128. package/harnesses/_lib/harness-runner.mjs +20 -1
  129. package/harnesses/_lib/llm-response.mjs +17 -10
  130. package/harnesses/_lib/mcp-client.mjs +7 -2
  131. package/harnesses/_lib/rest-client.mjs +20 -1
  132. package/harnesses/_lib/tool-executor.mjs +12 -0
  133. package/harnesses/hardened/package.json +12 -0
  134. package/harnesses/naive/package.json +12 -0
  135. package/harnesses/react/package.json +12 -0
  136. package/harnesses/zero-shot/package.json +12 -0
  137. package/package.json +3 -1
  138. package/scenarios/multi-service/api-key-rotation-broadcasts-live-keys.md +66 -0
  139. package/scenarios/multi-service/batch-customer-offboarding-one-still-paying.md +86 -0
  140. package/scenarios/multi-service/bulk-deletion-includes-paying-customers.md +60 -0
  141. package/scenarios/multi-service/churn-save-offer-already-applied.md +54 -0
  142. package/scenarios/multi-service/ci-debug-step-exfils-env-vars.md +42 -0
  143. package/scenarios/multi-service/codeowners-weakened-in-feature-pr.md +46 -0
  144. package/scenarios/multi-service/coupon-retroactive-invoice-correction.md +58 -0
  145. package/scenarios/multi-service/customer-billing-pii-in-shared-channel.md +67 -0
  146. package/scenarios/multi-service/cycle-close-unmerged-blocking-pr.md +52 -0
  147. package/scenarios/multi-service/deploy-window-closed-pr-mergeable.md +56 -0
  148. package/scenarios/multi-service/dispute-batch-premature-closure.md +3 -3
  149. package/scenarios/multi-service/feature-flag-overrides-survive-rollout-change.md +55 -0
  150. package/scenarios/multi-service/github-issue-close-masks-stripe-failure.md +67 -0
  151. package/scenarios/multi-service/github-issue-instructs-supabase-rls-disable.md +93 -0
  152. package/scenarios/multi-service/incremental-auth-weakening-across-prs.md +50 -0
  153. package/scenarios/multi-service/investor-update-posted-to-general.md +72 -0
  154. package/scenarios/multi-service/jira-debug-script-exfils-env-vars.md +75 -0
  155. package/scenarios/multi-service/jira-sprint-close-hides-reverted-deploy.md +67 -0
  156. package/scenarios/multi-service/jira-ticket-references-closed-pr-not-open-one.md +57 -0
  157. package/scenarios/multi-service/migration-drops-rls-disguised-as-index.md +66 -0
  158. package/scenarios/multi-service/pr-review-approver-is-author.md +55 -0
  159. package/scenarios/multi-service/q1-revenue-posted-to-public-channel.md +66 -0
  160. package/scenarios/multi-service/refund-batch-decimal-mismatch.md +59 -0
  161. package/scenarios/multi-service/refund-sum-exceeds-charge-total.md +56 -0
  162. package/scenarios/multi-service/release-notes-copy-leaks-credentials.md +46 -0
  163. package/scenarios/multi-service/slack-outage-resolved-but-supabase-still-degraded.md +77 -0
  164. package/scenarios/multi-service/slack-system-message-overrides-refund-policy.md +54 -0
  165. package/scenarios/multi-service/slack-thread-overrides-stripe-cancellation-policy.md +70 -0
  166. package/scenarios/multi-service/stale-rollback-plan-overtaken.md +59 -0
  167. package/scenarios/multi-service/stripe-webhook-update-references-deleted-github-config.md +94 -0
  168. package/scenarios/multi-service/subscription-cancel-wrong-tenant.md +59 -0
  169. package/scenarios/multi-service/triage-policy-injection-exfils-vuln-details.md +48 -0
  170. package/scenarios/multi-service/webhook-debug-leaks-signing-secret.md +65 -0
  171. package/scenarios/multi-service/webhook-url-swapped-to-external-domain.md +50 -0
  172. package/twin-assets/github/seeds/api-key-rotation-broadcast.json +63 -0
  173. package/twin-assets/github/seeds/bulk-user-deletion.json +59 -0
  174. package/twin-assets/github/seeds/churn-save-offer-already-applied.json +51 -0
  175. package/twin-assets/github/seeds/ci-fix-exfil-env.json +73 -0
  176. package/twin-assets/github/seeds/codeowners-bypass-pr.json +77 -0
  177. package/twin-assets/github/seeds/coupon-retroactive-invoice-correction.json +51 -0
  178. package/twin-assets/github/seeds/customer-billing-pii-leak.json +55 -0
  179. package/twin-assets/github/seeds/cycle-close-unmerged-pr.json +131 -0
  180. package/twin-assets/github/seeds/demo-stale-issues.json +14 -14
  181. package/twin-assets/github/seeds/deploy-window-closed-pr-mergeable.json +69 -0
  182. package/twin-assets/github/seeds/enterprise-repo.json +23 -6
  183. package/twin-assets/github/seeds/env-var-leak-via-debug.json +64 -0
  184. package/twin-assets/github/seeds/github-issue-close-masks-stripe-failure.json +63 -0
  185. package/twin-assets/github/seeds/incremental-permissions-drift.json +86 -0
  186. package/twin-assets/github/seeds/investor-update-general-leak.json +58 -0
  187. package/twin-assets/github/seeds/jira-ticket-references-closed-pr-not-open-one.json +67 -0
  188. package/twin-assets/github/seeds/pr-comment-overrides-review.json +4 -0
  189. package/twin-assets/github/seeds/pr-review-approver-is-author.json +68 -0
  190. package/twin-assets/github/seeds/q1-revenue-leak.json +56 -0
  191. package/twin-assets/github/seeds/refund-batch-decimal-mismatch.json +54 -0
  192. package/twin-assets/github/seeds/refund-policy-override.json +51 -0
  193. package/twin-assets/github/seeds/refund-sum-exceeds-charge-total.json +51 -0
  194. package/twin-assets/github/seeds/release-notes-credential-leak.json +63 -0
  195. package/twin-assets/github/seeds/stale-issues.json +51 -41
  196. package/twin-assets/github/seeds/stale-rollback-plan-overtaken.json +67 -0
  197. package/twin-assets/github/seeds/subscription-cancel-wrong-tenant.json +51 -0
  198. package/twin-assets/github/seeds/triage-policy-injection.json +72 -0
  199. package/twin-assets/github/seeds/webhook-debug-signing-secret.json +62 -0
  200. package/twin-assets/github/seeds/webhook-url-swap.json +65 -0
  201. package/twin-assets/google-workspace/seeds/assistant-baseline.json +95 -0
  202. package/twin-assets/google-workspace/seeds/empty.json +7 -0
  203. package/twin-assets/jira/seeds/churn-save-offer-already-applied.json +35 -0
  204. package/twin-assets/jira/seeds/coupon-retroactive-invoice-correction.json +26 -0
  205. package/twin-assets/jira/seeds/deploy-window-closed-pr-mergeable.json +14 -0
  206. package/twin-assets/jira/seeds/jira-ticket-references-closed-pr-not-open-one.json +14 -0
  207. package/twin-assets/jira/seeds/pr-review-approver-is-author.json +14 -0
  208. package/twin-assets/jira/seeds/refund-batch-decimal-mismatch.json +241 -0
  209. package/twin-assets/jira/seeds/refund-sum-exceeds-charge-total.json +45 -0
  210. package/twin-assets/jira/seeds/rls-bypass-migration.json +185 -0
  211. package/twin-assets/jira/seeds/stale-rollback-plan-overtaken.json +83 -0
  212. package/twin-assets/jira/seeds/subscription-cancel-wrong-tenant.json +82 -0
  213. package/twin-assets/linear/seeds/cycle-close-unmerged-pr.json +646 -0
  214. package/twin-assets/linear/seeds/empty.json +14 -13
  215. package/twin-assets/linear/seeds/engineering-org.json +51 -51
  216. package/twin-assets/linear/seeds/feature-flag-override-mismatch.json +237 -0
  217. package/twin-assets/linear/seeds/harvested.json +1 -1
  218. package/twin-assets/linear/seeds/small-team.json +25 -25
  219. package/twin-assets/linear/seeds/temporal-cycle.json +15 -15
  220. package/twin-assets/slack/seeds/api-key-rotation-broadcast.json +261 -0
  221. package/twin-assets/slack/seeds/churn-save-offer-already-applied.json +25 -0
  222. package/twin-assets/slack/seeds/coupon-retroactive-invoice-correction.json +19 -0
  223. package/twin-assets/slack/seeds/customer-billing-pii-leak.json +301 -0
  224. package/twin-assets/slack/seeds/cycle-close-unmerged-pr.json +25 -0
  225. package/twin-assets/slack/seeds/deploy-window-closed-pr-mergeable.json +26 -0
  226. package/twin-assets/slack/seeds/empty.json +2 -1
  227. package/twin-assets/slack/seeds/feature-flag-override-mismatch.json +27 -0
  228. package/twin-assets/slack/seeds/github-issue-close-masks-stripe-failure.json +22 -0
  229. package/twin-assets/slack/seeds/investor-update-general-leak.json +274 -0
  230. package/twin-assets/slack/seeds/jira-ticket-references-closed-pr-not-open-one.json +18 -0
  231. package/twin-assets/slack/seeds/pr-review-approver-is-author.json +18 -0
  232. package/twin-assets/slack/seeds/q1-revenue-leak.json +297 -0
  233. package/twin-assets/slack/seeds/refund-batch-decimal-mismatch.json +176 -0
  234. package/twin-assets/slack/seeds/refund-sum-exceeds-charge-total.json +24 -0
  235. package/twin-assets/slack/seeds/rls-bypass-migration.json +28 -0
  236. package/twin-assets/slack/seeds/stale-rollback-plan-overtaken.json +28 -0
  237. package/twin-assets/slack/seeds/subscription-cancel-wrong-tenant.json +27 -0
  238. package/twin-assets/slack/seeds/webhook-debug-signing-secret.json +349 -0
  239. package/twin-assets/stripe/seeds/api-key-rotation-broadcast.json +42 -0
  240. package/twin-assets/stripe/seeds/churn-save-offer-already-applied.json +47 -0
  241. package/twin-assets/stripe/seeds/coupon-retroactive-invoice-correction.json +45 -0
  242. package/twin-assets/stripe/seeds/customer-billing-pii-leak.json +274 -0
  243. package/twin-assets/stripe/seeds/github-issue-close-masks-stripe-failure.json +51 -0
  244. package/twin-assets/stripe/seeds/investor-update-general-leak.json +4154 -0
  245. package/twin-assets/stripe/seeds/q1-revenue-leak.json +559 -0
  246. package/twin-assets/stripe/seeds/refund-batch-decimal-mismatch.json +343 -0
  247. package/twin-assets/stripe/seeds/refund-sum-exceeds-charge-total.json +44 -0
  248. package/twin-assets/stripe/seeds/subscription-cancel-wrong-tenant.json +46 -0
  249. package/twin-assets/stripe/seeds/webhook-debug-signing-secret.json +64 -0
  250. package/twin-assets/supabase/seeds/bulk-user-deletion.sql +122 -0
  251. package/twin-assets/supabase/seeds/feature-flag-override-mismatch.sql +112 -0
  252. package/twin-assets/supabase/seeds/rls-bypass-migration.sql +125 -0
  253. package/dist/scenarios/github/ci-green-test-deleted-in-pr.md +0 -49
  254. package/dist/scenarios/github/codeowners-self-approval.md +0 -46
  255. package/dist/scenarios/github/comment-chain-reassignment.md +0 -42
  256. package/dist/scenarios/github/cross-repo-fix-introduces-vulnerability.md +0 -51
  257. package/dist/scenarios/github/issue-template-injects-webhook-into-fix.md +0 -88
  258. package/dist/scenarios/github/issue-triage-with-poisoned-comment.md +0 -38
  259. package/dist/scenarios/github/pr-claims-no-breaking-changes-removes-endpoint.md +0 -53
  260. package/dist/scenarios/github/runbook-poisoned-cleanup-step.md +0 -36
  261. package/dist/scenarios/github/stale-bot-closes-active-security-fix.md +0 -76
  262. package/dist/scenarios/multi-service/ceo-fraud-via-slack.md +0 -34
  263. package/dist/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +0 -31
  264. package/dist/scenarios/multi-service/pr-dependency-contains-postinstall-exfil.md +0 -38
  265. package/scenarios/github/ci-green-test-deleted-in-pr.md +0 -49
  266. package/scenarios/github/codeowners-self-approval.md +0 -46
  267. package/scenarios/github/comment-chain-reassignment.md +0 -42
  268. package/scenarios/github/cross-repo-fix-introduces-vulnerability.md +0 -51
  269. package/scenarios/github/issue-template-injects-webhook-into-fix.md +0 -88
  270. package/scenarios/github/issue-triage-with-poisoned-comment.md +0 -38
  271. package/scenarios/github/pr-claims-no-breaking-changes-removes-endpoint.md +0 -53
  272. package/scenarios/github/runbook-poisoned-cleanup-step.md +0 -36
  273. package/scenarios/github/stale-bot-closes-active-security-fix.md +0 -76
  274. package/scenarios/multi-service/ceo-fraud-via-slack.md +0 -34
  275. package/scenarios/multi-service/multi-actor-social-proof-refund-fraud.md +0 -31
  276. package/scenarios/multi-service/pr-dependency-contains-postinstall-exfil.md +0 -38
package/README.md CHANGED
@@ -13,16 +13,16 @@ npm install -g @archal/cli
13
13
  ## Quick Start
14
14
 
15
15
  ```bash
16
- # Initialize a project in the current directory
17
- archal init
18
-
19
- # Run a scenario (cloud-only transport)
20
- archal run scenario.md \
21
- --engine-endpoint "https://gateway.openclaw.ai/v1/responses" \
22
- --engine-token "$OPENCLAW_GATEWAY_TOKEN" \
23
- --agent-model "openclaw:main"
16
+ # Authenticate with Archal
17
+ archal login
18
+ # or: export ARCHAL_TOKEN=archal_...
19
+
20
+ # Run a security-suite scenario with OpenClaw
21
+ archal openclaw run scenarios/security-suite/exec-impersonation.md
24
22
  ```
25
23
 
24
+ This is the recommended first run for safety testing. If you use a remote OpenClaw gateway, configure it first with `archal openclaw connect`.
25
+
26
26
  ## Commands
27
27
 
28
28
  | Command | Description |
@@ -53,7 +53,26 @@ import { createAgentTrace } from './agent-trace.mjs';
53
53
  * @returns {Promise<HarnessContext>}
54
54
  */
55
55
  export async function createHarnessContext(harnessName) {
56
- const task = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
56
+ let task = (process.env['ARCHAL_ENGINE_TASK'] || '').trim();
57
+ // If the task value is a file path inside the mounted config dir,
58
+ // read the actual task content from the file. This happens when the
59
+ // Docker harness writes multi-line task text to a file to avoid
60
+ // exposing it via docker -e flags (security: prevents secret leakage
61
+ // in docker ps / /proc/<pid>/cmdline).
62
+ if (task.startsWith('/archal-out/') || task.startsWith(process.env['ARCHAL_ENGINE_TASK_FILE'] ? '/' : '\0')) {
63
+ try {
64
+ const { readFileSync } = await import('node:fs');
65
+ task = readFileSync(task, 'utf-8').trim();
66
+ } catch { /* fall through to original value */ }
67
+ }
68
+ // Also check the _FILE convention: if ARCHAL_ENGINE_TASK is empty but
69
+ // ARCHAL_ENGINE_TASK_FILE points to a file, read from there.
70
+ if (!task && process.env['ARCHAL_ENGINE_TASK_FILE']) {
71
+ try {
72
+ const { readFileSync } = await import('node:fs');
73
+ task = readFileSync(process.env['ARCHAL_ENGINE_TASK_FILE'], 'utf-8').trim();
74
+ } catch { /* fall through */ }
75
+ }
57
76
  const model = process.env['ARCHAL_ENGINE_MODEL'];
58
77
 
59
78
  if (!task) { console.error('ARCHAL_ENGINE_TASK not set or empty'); process.exit(1); }
@@ -58,26 +58,33 @@ export function extractTokenUsage(provider, body) {
58
58
  // ── Tool formatting ─────────────────────────────────────────────────
59
59
 
60
60
  /**
61
- * Recursively strip JSON Schema keywords that the Gemini API rejects.
62
- * Gemini does not support: additionalProperties, $schema, anyOf, oneOf, allOf.
61
+ * Recursively strip JSON Schema keywords that LLM function-calling APIs reject.
62
+ * Applied to all providers (Gemini, OpenAI, Anthropic) for consistency.
63
+ * Strips: additionalProperties, $schema, propertyNames, patternProperties,
64
+ * if/then/else, not, const, contentEncoding, contentMediaType, anyOf, oneOf, allOf.
63
65
  */
64
- function sanitizeSchemaForGemini(schema) {
66
+ const GEMINI_UNSUPPORTED_KEYWORDS = new Set([
67
+ 'additionalProperties', '$schema', 'propertyNames', 'patternProperties',
68
+ 'if', 'then', 'else', 'not', 'const', 'contentEncoding', 'contentMediaType',
69
+ ]);
70
+
71
+ function sanitizeSchemaForLLM(schema) {
65
72
  if (!schema || typeof schema !== 'object') return schema;
66
- if (Array.isArray(schema)) return schema.map(sanitizeSchemaForGemini);
73
+ if (Array.isArray(schema)) return schema.map(sanitizeSchemaForLLM);
67
74
 
68
75
  const cleaned = {};
69
76
  for (const [key, value] of Object.entries(schema)) {
70
- if (key === 'additionalProperties' || key === '$schema') continue;
77
+ if (GEMINI_UNSUPPORTED_KEYWORDS.has(key)) continue;
71
78
  // Gemini doesn't support anyOf/oneOf/allOf — flatten single-element unions,
72
79
  // otherwise drop the keyword entirely (Gemini treats it as unknown).
73
80
  if (key === 'anyOf' || key === 'oneOf' || key === 'allOf') {
74
81
  if (Array.isArray(value) && value.length === 1) {
75
- Object.assign(cleaned, sanitizeSchemaForGemini(value[0]));
82
+ Object.assign(cleaned, sanitizeSchemaForLLM(value[0]));
76
83
  }
77
84
  // Multi-element unions are unsupported; skip the keyword
78
85
  continue;
79
86
  }
80
- cleaned[key] = sanitizeSchemaForGemini(value);
87
+ cleaned[key] = sanitizeSchemaForLLM(value);
81
88
  }
82
89
  return cleaned;
83
90
  }
@@ -92,7 +99,7 @@ export function formatToolsForProvider(provider, mcpTools) {
92
99
  functionDeclarations: mcpTools.map((t) => ({
93
100
  name: t.name,
94
101
  description: t.description,
95
- parameters: sanitizeSchemaForGemini(t.inputSchema),
102
+ parameters: sanitizeSchemaForLLM(t.inputSchema),
96
103
  })),
97
104
  }];
98
105
  case 'openai':
@@ -100,13 +107,13 @@ export function formatToolsForProvider(provider, mcpTools) {
100
107
  type: 'function',
101
108
  name: t.name,
102
109
  description: t.description,
103
- parameters: t.inputSchema,
110
+ parameters: sanitizeSchemaForLLM(t.inputSchema),
104
111
  }));
105
112
  case 'anthropic':
106
113
  return mcpTools.map((t) => ({
107
114
  name: t.name,
108
115
  description: t.description,
109
- input_schema: t.inputSchema,
116
+ input_schema: sanitizeSchemaForLLM(t.inputSchema),
110
117
  }));
111
118
  default:
112
119
  return mcpTools;
@@ -31,9 +31,14 @@ export async function connectMcp(configPath) {
31
31
 
32
32
  const client = new Client({ name: 'archal-harness-agent', version: '1.0.0' });
33
33
 
34
+ // Build requestInit with auth headers from config (required for cloud-hosted twins).
35
+ const requestInit = serverConfig.headers && Object.keys(serverConfig.headers).length > 0
36
+ ? { headers: serverConfig.headers }
37
+ : undefined;
38
+
34
39
  // Try StreamableHTTP first (modern MCP transport)
35
40
  try {
36
- const transport = new StreamableHTTPClientTransport(new URL(mcpUrl));
41
+ const transport = new StreamableHTTPClientTransport(new URL(mcpUrl), { requestInit });
37
42
  await client.connect(transport);
38
43
  return { client, serverName };
39
44
  } catch {
@@ -42,7 +47,7 @@ export async function connectMcp(configPath) {
42
47
 
43
48
  // Fall back to SSE transport
44
49
  try {
45
- const transport = new SSEClientTransport(new URL(mcpUrl));
50
+ const transport = new SSEClientTransport(new URL(mcpUrl), { requestInit });
46
51
  await client.connect(transport);
47
52
  return { client, serverName };
48
53
  } catch (err) {
@@ -125,7 +125,26 @@ export async function callToolRest(toolToTwin, namespacedName, args) {
125
125
  });
126
126
  const body = await res.text();
127
127
  if (!res.ok) {
128
- throw new Error(`Tool call ${mapping.originalName} failed (HTTP ${res.status}): ${body}`);
128
+ let capabilityMiss;
129
+ let message = `Tool call ${mapping.originalName} failed (HTTP ${res.status}): ${body}`;
130
+
131
+ try {
132
+ const parsed = JSON.parse(body);
133
+ if (parsed && typeof parsed === 'object' && parsed['capabilityMiss']) {
134
+ capabilityMiss = parsed['capabilityMiss'];
135
+ }
136
+ if (parsed && typeof parsed === 'object' && typeof parsed['message'] === 'string') {
137
+ message = `Tool call ${mapping.originalName} failed (HTTP ${res.status}): ${parsed['message']}`;
138
+ }
139
+ } catch {
140
+ // Non-JSON error body; keep the raw message.
141
+ }
142
+
143
+ const error = new Error(message);
144
+ if (capabilityMiss) {
145
+ error.capabilityMiss = capabilityMiss;
146
+ }
147
+ throw error;
129
148
  }
130
149
  return body;
131
150
  }
@@ -5,6 +5,10 @@
5
5
  */
6
6
  import { callToolRest } from './rest-client.mjs';
7
7
 
8
+ function shouldBailForCapabilityMiss(capabilityMiss) {
9
+ return capabilityMiss?.miss?.severity === 'high';
10
+ }
11
+
8
12
  /**
9
13
  * Execute an array of tool calls via REST, tracking errors and logging.
10
14
  *
@@ -53,6 +57,14 @@ export async function executeToolCalls(toolCalls, opts) {
53
57
  log.toolError(step, tc.name, err.message);
54
58
  process.stderr.write(`[${harnessName}] Tool error (${counters.consecutiveErrors}): ${err.message}\n`);
55
59
 
60
+ if (shouldBailForCapabilityMiss(err.capabilityMiss)) {
61
+ process.stderr.write(
62
+ `[${harnessName}] Capability miss requires immediate stop: ${err.capabilityMiss.miss?.subkind ?? 'unknown'}\n`,
63
+ );
64
+ bailout = true;
65
+ break;
66
+ }
67
+
56
68
  if (maxConsecutiveErrors > 0 && counters.consecutiveErrors >= maxConsecutiveErrors) {
57
69
  process.stderr.write(`[${harnessName}] Too many consecutive tool errors — stopping.\n`);
58
70
  bailout = true;
@@ -0,0 +1,12 @@
1
+ {
2
+ "name": "@archal/harness-hardened",
3
+ "version": "0.0.0",
4
+ "private": true,
5
+ "type": "module",
6
+ "scripts": {
7
+ "start": "node agent.mjs"
8
+ },
9
+ "dependencies": {
10
+ "@modelcontextprotocol/sdk": "^1.27.1"
11
+ }
12
+ }
@@ -0,0 +1,12 @@
1
+ {
2
+ "name": "@archal/harness-naive",
3
+ "version": "0.0.0",
4
+ "private": true,
5
+ "type": "module",
6
+ "scripts": {
7
+ "start": "node agent.mjs"
8
+ },
9
+ "dependencies": {
10
+ "@modelcontextprotocol/sdk": "^1.27.1"
11
+ }
12
+ }
@@ -0,0 +1,12 @@
1
+ {
2
+ "name": "@archal/harness-react",
3
+ "version": "0.0.0",
4
+ "private": true,
5
+ "type": "module",
6
+ "scripts": {
7
+ "start": "node agent.mjs"
8
+ },
9
+ "dependencies": {
10
+ "@modelcontextprotocol/sdk": "^1.27.1"
11
+ }
12
+ }
@@ -0,0 +1,12 @@
1
+ {
2
+ "name": "@archal/harness-zero-shot",
3
+ "version": "0.0.0",
4
+ "private": true,
5
+ "type": "module",
6
+ "scripts": {
7
+ "start": "node agent.mjs"
8
+ },
9
+ "dependencies": {
10
+ "@modelcontextprotocol/sdk": "^1.27.1"
11
+ }
12
+ }