archal 0.9.18 → 0.9.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +9 -1
  2. package/agents/github-octokit/.archal.json +8 -0
  3. package/agents/github-octokit/Dockerfile +8 -0
  4. package/agents/github-octokit/README.md +113 -0
  5. package/agents/github-octokit/agent.mjs +54 -0
  6. package/agents/github-octokit/package.json +9 -0
  7. package/agents/github-octokit/scenarios/test-repo-access.md +27 -0
  8. package/agents/google-workspace-local-tools/Dockerfile +6 -0
  9. package/agents/google-workspace-local-tools/README.md +58 -0
  10. package/agents/google-workspace-local-tools/agent.mjs +196 -0
  11. package/agents/google-workspace-local-tools/archal-harness.json +7 -0
  12. package/agents/google-workspace-local-tools/run-input.yaml +16 -0
  13. package/agents/google-workspace-local-tools/scenario.md +29 -0
  14. package/agents/hermes/.archal.json +8 -0
  15. package/agents/hermes/Dockerfile +46 -0
  16. package/agents/hermes/README.md +87 -0
  17. package/agents/hermes/SOUL.md +27 -0
  18. package/agents/hermes/config.yaml +34 -0
  19. package/agents/hermes/drive.mjs +113 -0
  20. package/agents/hermes/scenarios/stripe-customers-read-only.md +32 -0
  21. package/agents/openclaw/.archal.json +8 -0
  22. package/agents/openclaw/Dockerfile +96 -0
  23. package/agents/openclaw/README.md +120 -0
  24. package/agents/openclaw/drive.mjs +311 -0
  25. package/agents/openclaw/package.json +9 -0
  26. package/agents/openclaw/scenarios/github-issue-triage-read-only.md +44 -0
  27. package/agents/openclaw/workspace/AGENTS.md +23 -0
  28. package/agents/openclaw/workspace/IDENTITY.md +8 -0
  29. package/agents/openclaw/workspace/SOUL.md +14 -0
  30. package/agents/openclaw/workspace/TOOLS.md +35 -0
  31. package/agents/pagination-test/README.md +24 -0
  32. package/agents/pagination-test/scenario.md +24 -0
  33. package/agents/replay-capsule-harness/README.md +29 -0
  34. package/agents/replay-capsule-harness/observability-install-offline-e2e.mts +1517 -0
  35. package/agents/replay-capsule-harness/replay-capsule-e2e.mjs +104 -0
  36. package/clone-assets/apify/tools.json +213 -13
  37. package/clone-assets/calcom/tools.json +510 -0
  38. package/clone-assets/clickup/tools.json +1258 -0
  39. package/clone-assets/customerio/tools.json +386 -0
  40. package/clone-assets/datadog/tools.json +734 -0
  41. package/clone-assets/github/tools.json +312 -25
  42. package/clone-assets/gitlab/tools.json +999 -0
  43. package/clone-assets/google-workspace/tools.json +18 -6
  44. package/clone-assets/hubspot/tools.json +1406 -0
  45. package/clone-assets/jira/fidelity.json +1 -1
  46. package/clone-assets/jira/tools.json +266 -543
  47. package/clone-assets/linear/tools.json +238 -40
  48. package/clone-assets/ownerrez/tools.json +548 -0
  49. package/clone-assets/pricelabs/tools.json +343 -0
  50. package/clone-assets/sentry/tools.json +745 -0
  51. package/clone-assets/slack/tools.json +1 -2
  52. package/clone-assets/stripe/tools.json +185 -46
  53. package/clone-assets/supabase/tools.json +511 -14
  54. package/clone-assets/unipile/tools.json +408 -0
  55. package/clone-assets/webflow/tools.json +415 -0
  56. package/dist/autoloop-worker-types-BEb_E44z.d.cts +196 -0
  57. package/dist/cli.cjs +151033 -75282
  58. package/dist/commands/autoloop-hosted-worker.cjs +43942 -0
  59. package/dist/commands/autoloop-hosted-worker.d.cts +143 -0
  60. package/dist/commands/autoloop-pr-verification.cjs +4227 -0
  61. package/dist/commands/autoloop-pr-verification.d.cts +17 -0
  62. package/dist/{vitest/chunk-IVXSSEYS.js → commands/autoloop-result-parser.cjs} +16515 -18857
  63. package/dist/commands/autoloop-result-parser.d.cts +39 -0
  64. package/dist/commands/autoloop-worker.cjs +36163 -0
  65. package/dist/commands/autoloop-worker.d.cts +97 -0
  66. package/dist/harness.cjs +1 -0
  67. package/dist/index.cjs +1 -1
  68. package/dist/replay.cjs +49624 -0
  69. package/dist/replay.d.cts +4625 -0
  70. package/dist/scenarios.cjs +80343 -0
  71. package/dist/scenarios.d.cts +562 -0
  72. package/dist/vitest/chunk-6CBYFCFK.js +4667 -0
  73. package/dist/vitest/chunk-ARVS45PP.js +2764 -0
  74. package/dist/vitest/index.cjs +6079 -75089
  75. package/dist/vitest/index.d.ts +7 -6
  76. package/dist/vitest/index.js +8 -8
  77. package/dist/vitest/runtime/hosted-session-reaper.cjs +801 -34187
  78. package/dist/vitest/runtime/hosted-session-reaper.js +1 -1
  79. package/dist/vitest/runtime/setup-files.js +2 -2
  80. package/package.json +14 -9
  81. package/skills/archal-agent/SKILL.md +87 -0
  82. package/skills/autoloop/SKILL.md +376 -0
  83. package/skills/autoloop/references/hosted-sources.md +62 -0
  84. package/skills/autoloop/references/trace-schema-mapping.md +73 -0
  85. package/skills/eval/SKILL.md +35 -1
  86. package/skills/install-agent/SKILL.md +221 -0
  87. package/skills/onboard/SKILL.md +80 -0
  88. package/skills/scenario/SKILL.md +19 -4
  89. package/skills/seed/SKILL.md +237 -0
  90. package/dist/seed/dynamic-generator.cjs +0 -45564
  91. package/dist/seed/dynamic-generator.d.cts +0 -106
  92. package/dist/vitest/chunk-CTSN67QR.js +0 -47188
@@ -4,7 +4,7 @@ import {
4
4
  createHostedAuthLease,
5
5
  parsePositiveInteger,
6
6
  runHostedSessionReaper
7
- } from "../chunk-CTSN67QR.js";
7
+ } from "../chunk-ARVS45PP.js";
8
8
 
9
9
  // src/runtime/hosted-session-reaper.ts
10
10
  var VITEST_AUTH_LEASE_OPTIONS = {
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  bootstrapArchalVitestRouting
3
- } from "../chunk-IVXSSEYS.js";
4
- import "../chunk-CTSN67QR.js";
3
+ } from "../chunk-6CBYFCFK.js";
4
+ import "../chunk-ARVS45PP.js";
5
5
 
6
6
  // src/runtime/setup-files.ts
7
7
  import { existsSync, rmSync } from "fs";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "archal",
3
- "version": "0.9.18",
3
+ "version": "0.9.20",
4
4
  "description": "Test your agents & integrations against service clones",
5
5
  "type": "module",
6
6
  "main": "dist/index.cjs",
@@ -17,6 +17,10 @@
17
17
  "types": "./dist/harness.d.cts",
18
18
  "default": "./dist/harness.cjs"
19
19
  },
20
+ "./scenarios": {
21
+ "types": "./dist/scenarios.d.cts",
22
+ "default": "./dist/scenarios.cjs"
23
+ },
20
24
  "./vitest": {
21
25
  "types": "./dist/vitest/index.d.ts",
22
26
  "import": "./dist/vitest/index.js",
@@ -47,15 +51,9 @@
47
51
  "dist",
48
52
  "skills",
49
53
  "clone-assets",
54
+ "agents",
50
55
  "LICENSE"
51
56
  ],
52
- "scripts": {
53
- "verify:artifacts": "node scripts/assert-artifacts.mjs",
54
- "prepack": "pnpm run verify:artifacts",
55
- "prepare": "node scripts/prepare.cjs",
56
- "typecheck:raw": "node --check bin/archal.cjs && node --check scripts/assert-artifacts.mjs && node --check scripts/prepare.cjs",
57
- "typecheck": "pnpm run typecheck:raw"
58
- },
59
57
  "peerDependencies": {
60
58
  "vitest": ">=2.1.0"
61
59
  },
@@ -65,6 +63,13 @@
65
63
  }
66
64
  },
67
65
  "dependencies": {
66
+ "@aws-sdk/client-secrets-manager": "^3.1065.0",
67
+ "e2b": "^2.28.2",
68
68
  "picomatch": "^4.0.4"
69
+ },
70
+ "scripts": {
71
+ "verify:artifacts": "node scripts/assert-artifacts.mjs",
72
+ "typecheck:raw": "node --check bin/archal.cjs && node --check scripts/assert-artifacts.mjs && node --check scripts/prepare.cjs",
73
+ "typecheck": "pnpm run typecheck:raw"
69
74
  }
70
- }
75
+ }
@@ -0,0 +1,87 @@
1
+ ---
2
+ name: archal-agent
3
+ description: The front door for using Archal to test, debug, and fix an AI agent. START HERE, then route to the right sub-skill instead of guessing. Use when the user says "use Archal", "test my agent", "set up Archal", "my agent is failing", "reproduce this production failure", "grade my traces", or otherwise wants to evaluate, debug, or auto-fix an agent against service clones. Pick this whenever the request is Archal-shaped but the specific workflow is unclear.
4
+ user-invocable: true
5
+ argument-hint: "[what you want to do with your agent]"
6
+ ---
7
+
8
+ # Archal
9
+
10
+ You are the entry point for Archal. Archal is the QA layer for AI agents: it
11
+ runs an agent against stateful behavioral clones of real services (GitHub,
12
+ Slack, Stripe, Linear, Jira, Supabase, and more), scores how well it satisfies
13
+ each scenario, and turns failures into reproductions and PR fixes. Your job here
14
+ is to orient the operator and route to the sub-skill that owns their workflow.
15
+ Do not inline those flows; hand off by exact name and let the sub-skill drive.
16
+
17
+ ## Product mental model
18
+
19
+ Archal tests AI agents against service clones instead of real services, so every
20
+ run is deterministic and replayable. You describe a task and success criteria,
21
+ the agent runs against clones, and an evaluator scores satisfaction (a
22
+ probability, not pass/fail). When a real production trace shows a failure, Archal
23
+ reproduces it on clones from trace evidence and ships the fix as a GitHub PR.
24
+ Everything aims at the same thing: deterministic, replayable evals you can trust.
25
+
26
+ ## Decision guide
27
+
28
+ | I want to... | Route to |
29
+ |--------------|----------|
30
+ | Set up Archal in a repo from scratch (install, auth, detect clones) | `onboard` |
31
+ | Write or edit a scenario test file | `scenario` |
32
+ | Load explicit JSON/SQL/catalog state into a clone (deterministic, no LLM) | `seed` |
33
+ | Run scenarios or tasks and interpret satisfaction scores and failures | `eval` |
34
+ | Wire clones into an existing Vitest suite | `vitest` |
35
+ | Connect a repo's production observability so traces get captured | `install-agent` |
36
+ | Run the autoloop (ingest -> grade -> find-failed -> reproduce-on-clones) and ship the fix as a PR (autofix) over real trace sources | `autoloop` |
37
+ | Turn autofix or autoloop on/off for an agent | `autoloop`; the copilot can toggle either |
38
+
39
+ If the user is brand new and has none of this set up, start with `onboard`; it
40
+ detects clones and routes onward from there.
41
+
42
+ ## The sub-skills
43
+
44
+ Each lives in `packages/archal/skills/` and owns its own commands, contracts,
45
+ and mental model. Route by exact name:
46
+
47
+ - `onboard` — set up Archal in a repo from scratch: install the CLI, handle auth,
48
+ detect which clones the agent needs, and hand off to the right workflow.
49
+ - `scenario` — author and edit scenario test files (Setup / Prompt / Expected
50
+ Behavior / Success Criteria) that `archal run` executes against clones.
51
+ - `seed` — load explicit JSON, SQL, or catalog state into a clone deterministically,
52
+ with no LLM in the loop, so runs start from a known fixture state.
53
+ - `eval` — run scenarios or inline tasks against clones and interpret the results:
54
+ satisfaction scores, `[D]` vs `[P]` criteria, trace inspection, failure diagnosis.
55
+ - `vitest` — wire clones into an existing Vitest suite using the right composition
56
+ pattern, so the agent's own tests route through clones.
57
+ - `install-agent` — connect a repo's production observability (OTLP, Langfuse,
58
+ Braintrust, database trace tables) so real agent traces are captured for Archal.
59
+ - `autoloop` — the loop over real trace sources: ingest a trace, grade it for a
60
+ real failure, find the failed trace, and reproduce it on clones. Autofix (the
61
+ fix/PR step) is a separate toggle on top of this: when turned on, autoloop
62
+ reproduces a failure and ships the fix as a PR.
63
+
64
+ ## Autoloop and autofix toggles
65
+
66
+ Autoloop (ingest -> grade -> find-failed -> reproduce-on-clones) and autofix (the
67
+ fix/PR step) are **separate per-agent toggles**. Autofix is opt-in: it is not part
68
+ of autoloop until it is turned on. Either can be switched on or off per agent from
69
+ the agents tab, the CLI (`--execution-policy reproduce` is autoloop with autofix
70
+ off; `fix` turns autofix on), or by asking the Archal copilot in chat — the
71
+ copilot can toggle either for an agent. When the user asks to turn autofix or
72
+ autoloop on/off for an agent, handle the toggle, then route to `autoloop` for the
73
+ deeper flow.
74
+
75
+ ## Provider-switchable remediation
76
+
77
+ The Archal copilot is not locked to one model. When autofix reproduces a failure
78
+ and writes a fix, the user can drive that remediation with their own agent —
79
+ `archal preprod` exposes `--remediation-agent auto|codex|claude|cursor` so the
80
+ fix is written by their Claude Code, Cursor, or Codex — or let Archal's managed
81
+ agent do it. Mention this when the user asks who writes the fix or wants to use
82
+ their own coding agent, then route to `autoloop`.
83
+
84
+ ## Docs
85
+
86
+ - Quickstart: https://docs.archal.ai/quickstart
87
+ - Full docs: https://docs.archal.ai
@@ -0,0 +1,376 @@
1
+ ---
2
+ name: autoloop
3
+ description: Wire Archal Autoloop to a repo plus a real agent-trace source, then drive the import -> grade -> reproduce -> PR-fix loop. USE THIS whenever the user wants to turn production agent traces into reproducible failures and fixes: "I have prod agent traces and want to reproduce a failure", "import my Langfuse / Braintrust / OTel / Supabase traces", "connect a trace source", "grade my prod traces", "turn a failed trace into a PR", "set up / configure the autoloop", or any mention of replaying, grading, or auto-fixing real traces. Also fires when diagnosing a stuck import, grade, reproduction, or PR-fix run, or configuring trace schema mapping.
4
+ user-invocable: true
5
+ argument-hint: "[repo, trace source, or failure description]"
6
+ ---
7
+
8
+ # Archal Autoloop
9
+
10
+ You help users connect real agent traces to Archal. Your job is to wire the repo,
11
+ trace source, harness contract, scenario contract, and GitHub PR path without
12
+ guessing or leaking secrets.
13
+
14
+ Autoloop is not a replacement for `archal run`. It uses the same harness and clone
15
+ routing ideas, but the trigger is a trace that already happened.
16
+
17
+ Autoloop is also not arbitrary production trace replay. It can reproduce a
18
+ failure only when the trace, scenario contract, and seed templates contain
19
+ enough evidence to reconstruct the service state that matters. If the evidence
20
+ is thin, block and name the missing data instead of claiming reproduction.
21
+
22
+ ## Product mental model
23
+
24
+ Autoloop does this loop:
25
+
26
+ 1. Import a trace and its child spans from a read-only source.
27
+ 2. Grade whether the trace contains a real failure.
28
+ 3. Build a reproduction scenario and clone seed from trace evidence plus
29
+ repo-owned seed templates.
30
+ 4. Run the reproduction against service clones through the customer harness.
31
+ 5. If reproduced, patch the repo and open a GitHub issue or PR.
32
+
33
+ Steps 1-4 are **autoloop**: ingest -> grade -> find the failed trace ->
34
+ reproduce on clones. Step 5 (writing the fix and opening the PR) is **autofix**,
35
+ a separate opt-in step that is *not* part of autoloop until it is turned on.
36
+ Both are per-agent toggles, switchable from the agents tab, the CLI, or by asking
37
+ the Archal copilot in chat.
38
+
39
+ The CLI maps these toggles to `--execution-policy`: `reproduce` runs autoloop
40
+ only with autofix off, while `fix` turns autofix on (autoloop plus the fix/PR
41
+ step). Narrower policies stop earlier:
42
+
43
+ | Policy | Stops after | Autofix |
44
+ |--------|-------------|---------|
45
+ | `observe` | import | off |
46
+ | `grade` | grading | off |
47
+ | `reproduce` | reproduction | off |
48
+ | `fix` | PR or blocked fix status | on |
49
+
50
+ Do not invent or promote separate top-level judge, reproduce, fix, or
51
+ trace-replay commands. The public command is `archal autoloop`. Local stop
52
+ command is `archal detach` for file-backed trace directories.
53
+
54
+ ## Discover first
55
+
56
+ Before changing anything, inspect the repo:
57
+
58
+ 1. `package.json` and scripts: how is the agent run? What tests should a fix PR
59
+ pass?
60
+ 2. Existing Archal files:
61
+ - `.archal.json`
62
+ - `.archal/harness.*`
63
+ - `archal/harness.json`
64
+ - `archal/scenario.md`
65
+ - `archal/run-input.yaml`
66
+ - `archal/seeds/*.json`
67
+ - `scenarios/*.md`
68
+ 3. Service SDKs and likely clones:
69
+ - `stripe` -> `stripe`
70
+ - `@octokit/rest`, `octokit` -> `github`
71
+ - `@slack/web-api`, `@slack/bolt` -> `slack`
72
+ - `jira`, `jira-client`, `jira.js` -> `jira`
73
+ - `@linear/sdk` -> `linear`
74
+ - `@supabase/supabase-js`, `pg` -> `supabase`
75
+ - `googleapis` -> `google-workspace`
76
+ 4. GitHub remote:
77
+ ```bash
78
+ git remote get-url origin
79
+ ```
80
+ Hosted sources and `--execution-policy fix` need a GitHub remote.
81
+ 5. Trace source shape, if available:
82
+ - provider: local files, Postgres, Supabase
83
+ - or local/client-ingested sources normalized through `archal trace-source`
84
+ such as file, HTTP/OTel, Langfuse, Braintrust, S3/GCS, or custom JSON
85
+ - trace table and span table names
86
+ - id columns
87
+ - parent span column
88
+ - timestamp/cursor columns
89
+ - status, workspace, trace group, and agent filters
90
+
91
+ Never print secrets while inspecting. If you need to show a database URL, show
92
+ only the env var name or secret reference.
93
+
94
+ ## Preconditions
95
+
96
+ You need these before a full hosted Autoloop setup:
97
+
98
+ - Archal CLI installed in the repo or reachable with `npx archal`
99
+ - authenticated user (`archal login`) or `ARCHAL_TOKEN=archal_ws_...`
100
+ - GitHub App installed on the target repo
101
+ - repo origin that resolves to `github.com/<owner>/<repo>`
102
+ - read-only trace source credential
103
+ - headless harness command
104
+ - `archal/scenario.md` for the trace family
105
+ - optional but strongly recommended `archal/seeds/*.json`
106
+ - model/provider keys required by the user's agent and tests
107
+
108
+ If any prerequisite is missing, make the smallest safe change and explain what
109
+ is still required. Do not continue into a fake reproduction.
110
+
111
+ ## Safe setup flow
112
+
113
+ ### 1. Install and authenticate
114
+
115
+ Prefer project-local install:
116
+
117
+ ```bash
118
+ npm install -D archal
119
+ npx archal login
120
+ npx archal usage
121
+ ```
122
+
123
+ In CI or a customer repo, use:
124
+
125
+ ```bash
126
+ export ARCHAL_TOKEN=archal_ws_...
127
+ npx archal usage
128
+ ```
129
+
130
+ Use a workspace API key for automated runs. Do not commit it.
131
+
132
+ ### 2. Add or verify `archal/harness.json`
133
+
134
+ Minimal shape:
135
+
136
+ ```json
137
+ {
138
+ "version": 1,
139
+ "local": {
140
+ "command": "node",
141
+ "args": ["agent.mjs"]
142
+ }
143
+ }
144
+ ```
145
+
146
+ The command must be headless and repeatable. It should run the real agent path,
147
+ not a hand-authored mock. During reproduction, Archal invokes this command
148
+ through `archal run`, so the agent should read the task from `AGENT_TASK` and
149
+ print its final answer to stdout. If the project already has `.archal.json` for
150
+ `archal run`, align the Autoloop harness with that command.
151
+
152
+ ### 3. Add or verify `archal/scenario.md`
153
+
154
+ The scenario describes the standing task and checks for this trace family.
155
+
156
+ Required sections:
157
+
158
+ ```md
159
+ # Scenario title
160
+
161
+ ## Setup
162
+ Trace-family context and the starting state Archal should reconstruct.
163
+
164
+ ## Prompt
165
+ The task the agent should complete.
166
+
167
+ ## Expected Behavior
168
+ The answer key for grading and reproduction.
169
+
170
+ ## Success Criteria
171
+ - [D] Deterministic clone-state check
172
+ - [P] Probabilistic trace/output check
173
+
174
+ ## Config
175
+ clones: stripe, slack
176
+ timeout: 120
177
+ ```
178
+
179
+ Keep model-visible instructions realistic. Do not tell the tested agent that it
180
+ is in Archal, a clone-backed environment, or a special replay.
181
+
182
+ ### 4. Add seed templates when trace evidence is thin
183
+
184
+ Recommended:
185
+
186
+ ```text
187
+ archal/seeds/
188
+ stripe-billing-support.json
189
+ jira-escalations.json
190
+ ```
191
+
192
+ Seed templates should contain stable service state for the task family. Autoloop
193
+ can then fill in trace-specific identifiers. This is much safer than expecting
194
+ weak traces to reconstruct full service state.
195
+
196
+ ## Hosted database source
197
+
198
+ Use this when traces live in Postgres or Supabase. Create a read-only DB user,
199
+ keep the URL in `TRACE_DATABASE_URL` (or a secret ref in hosted production), then
200
+ `--check` the source and re-run without `--check` to register it. Registration
201
+ posts the source config to Archal and returns; hosted workers own polling after
202
+ that, so local `archal detach` does not disable it.
203
+
204
+ See `references/hosted-sources.md` for the full check, register, and
205
+ `--database-url-secret-ref` flag blocks.
206
+
207
+ ## Client-side trace ingestion
208
+
209
+ Use `archal trace-source` when traces are not already in a hosted Postgres or
210
+ Supabase table. This command normalizes source-specific payloads into Archal
211
+ trace upload envelopes, writes them to a trace directory, and can upload them to
212
+ hosted Autoloop when workspace auth is available.
213
+
214
+ Common paths:
215
+
216
+ ```bash
217
+ npx archal trace-source import ./exports --preview --json
218
+ npx archal trace-source import ./exports --upload --repository owner/repo
219
+
220
+ npx archal trace-source connect langfuse \
221
+ --base-url https://cloud.langfuse.com \
222
+ --api-key-env LANGFUSE_READ_KEY \
223
+ --out .archal/traces/inbox
224
+ npx archal trace-source test langfuse
225
+ npx archal trace-source sync langfuse --upload --repository owner/repo
226
+ npx archal trace-source watch langfuse --upload --repository owner/repo
227
+
228
+ npx archal trace-source connect custom --name "prod exporter" --out .archal/traces/inbox
229
+ npx archal trace-source serve "prod exporter" --port 4319
230
+ ```
231
+
232
+ Use `archal trace-source status [source]` to inspect registry validation,
233
+ cursor, and last-sync state. `watch` is for pull-style sources; push sources
234
+ stay continuous through `serve`.
235
+
236
+ ## Trace schema mapping
237
+
238
+ Hosted sources default to `ai_traces` / `ai_spans` with `id` / `trace_id`
239
+ columns and `updated_at_id` cursor mode. When the customer's tables differ, pass
240
+ mapping flags to override table names, id columns, parent-span column, and
241
+ cursor columns; switch to `created_at_id` cursor mode for append-only sources;
242
+ and use `--source-*` filters to scope noisy sources by workspace, agent, status,
243
+ trace group, or limit.
244
+
245
+ See `references/trace-schema-mapping.md` for the full defaults table plus the
246
+ custom-schema, append-only, and filter flag blocks.
247
+
248
+ ## Local trace directory
249
+
250
+ Use this for a local pilot or exported trace files:
251
+
252
+ ```bash
253
+ npx archal autoloop ./prod-traces --repo . --execution-policy reproduce
254
+ ```
255
+
256
+ Artifacts are written under:
257
+
258
+ ```text
259
+ .archal/autoloop/
260
+ autoloops.json
261
+ runs.jsonl
262
+ raw/
263
+ grades/
264
+ seeds/
265
+ runs/
266
+ fixes/
267
+ failed/
268
+ logs/
269
+ ```
270
+
271
+ Stop the local file-backed loop:
272
+
273
+ ```bash
274
+ npx archal detach ./prod-traces --repo .
275
+ ```
276
+
277
+ Do not describe `archal detach` as a hosted source disable command.
278
+
279
+ ## CLI-first operation
280
+
281
+ Prefer CLI and artifact evidence for handoffs unless the user explicitly asks
282
+ for a workspace page.
283
+
284
+ - Local file-backed loops: `archal autoloop <trace-dir> --repo ...` starts the
285
+ watcher, `archal detach <trace-dir> --repo ...` stops it, `archal
286
+ autoloop-status --repo ...` summarizes trace jobs, and `archal
287
+ autoloop-reprocess --repo ... <trace-id>` retries terminal jobs after a
288
+ blocker is fixed.
289
+ - Hosted database sources: `archal autoloop --source postgres|supabase ...`
290
+ registers the source and returns. Local `archal detach` does not disable a
291
+ hosted source because hosted workers own polling after registration.
292
+ - Safe resume means re-running the same `archal autoloop` registration or
293
+ reprocessing a terminal local trace only after the missing evidence,
294
+ credential, mapping, harness, or GitHub blocker is corrected.
295
+
296
+ Report exact artifact paths and statuses. Do not make dashboard pages the only
297
+ place a user can understand what happened.
298
+
299
+ ## How to diagnose failures
300
+
301
+ Classify failures precisely:
302
+
303
+ - Trace import failure: database/source auth, mapping, cursor, filters, bad
304
+ trace shape.
305
+ - Trace ingestion failure: `trace-source` adapter mismatch, rejected hosted
306
+ upload, missing workspace auth, bad idempotency key, or receiver auth failure.
307
+ - Grade failure: judge could not determine expected outcome, missing evaluator
308
+ contract, trace lacks task context.
309
+ - Missing evidence: trace does not contain enough state to seed. Add spans,
310
+ state snapshots, or repo-owned seed templates.
311
+ - Reproduction failure: scenario or seed could not replay the failure against
312
+ clones. Inspect generated `scenario.md`, `seed.json`, and run manifest.
313
+ - Agent behavior: the reproduced run shows the agent making the same wrong
314
+ service action it made in the original trace.
315
+ - Harness issue: the agent command crashes, hangs, needs UI auth, or does not
316
+ reach clone-routed services.
317
+ - Fix generation issue: patch does not apply, tests fail, no changes produced,
318
+ or generated PR metadata is incomplete.
319
+ - GitHub issue: GitHub App missing, branch protection, permission denied, PR
320
+ checks unavailable.
321
+
322
+ When evidence is insufficient, say so directly. Do not manufacture a seed or
323
+ claim reproduction succeeded.
324
+
325
+ ## Artifact reading guide
326
+
327
+ Local file-backed Autoloop uses repo-local artifacts. Hosted Autoloop exposes the
328
+ same phase information in the dashboard.
329
+
330
+ | Artifact | What to inspect |
331
+ |----------|-----------------|
332
+ | `grades/<trace>/routing.json` | trace import route and selected phase |
333
+ | `grades/<trace>/grade.json` | verdict, summary, and reproduction decision |
334
+ | `seeds/<trace>/scenario.md` | generated reproduction scenario |
335
+ | `seeds/<trace>/seed.json` | generated seed request or materialized seed metadata |
336
+ | `runs/<trace>/manifest.json` | reproduction status, command, attempts, evidence |
337
+ | `runs/<trace>/stdout.json` | machine-readable run output |
338
+ | `runs/<trace>/stderr.log` | reproduction stderr |
339
+ | `fixes/<trace>/status.json` | blocked fix status |
340
+ | `fixes/<trace>/pr-details.md` | PR reviewer summary |
341
+ | `fixes/<trace>/repo.patch` | patch captured when PR creation cannot complete |
342
+
343
+ ## Security rules
344
+
345
+ - Use read-only trace database credentials.
346
+ - Never commit database URLs, API keys, model keys, or GitHub tokens.
347
+ - Prefer `--database-url-env` locally and `--database-url-secret-ref` in hosted
348
+ production setup.
349
+ - Do not pass production write credentials to a clone-routed reproduction.
350
+ - Do not add model-visible copy that reveals Archal or clone routing to the
351
+ tested agent.
352
+ - Do not bypass the GitHub App PR workflow with direct pushes.
353
+ - Redact raw trace payloads before sharing artifacts outside the workspace.
354
+
355
+ ## What to report back
356
+
357
+ After setup or debugging, give the user:
358
+
359
+ - command run
360
+ - source provider and source id
361
+ - repo full name
362
+ - execution policy
363
+ - CLI status command to run next
364
+ - artifacts produced, if local
365
+ - whether import, grade, seed, reproduce, and fix phases are ready
366
+ - exact blocker if any
367
+ - next command or next owner
368
+
369
+ ## Docs
370
+
371
+ - Autoloop production traces: https://docs.archal.ai/guides/autoloop-production-traces
372
+ - Autonomous loops: https://docs.archal.ai/guides/autoloop-production-traces
373
+ - CLI reference: https://docs.archal.ai/cli/autoloop
374
+ - Running with an agent: https://docs.archal.ai/guides/run-with-agent
375
+ - Writing scenarios: https://docs.archal.ai/guides/writing-scenarios
376
+ - Seeds: https://docs.archal.ai/guides/seeds
@@ -0,0 +1,62 @@
1
+ # Hosted database source
2
+
3
+ Full flag reference for registering a hosted Postgres or Supabase trace source.
4
+ Use this when traces live in Postgres or Supabase. Registration posts the source
5
+ config to Archal and returns; hosted workers own polling after that.
6
+
7
+ ## Contents
8
+
9
+ - Read-only credential
10
+ - Check the source
11
+ - Register the source
12
+ - Secret reference (hosted production)
13
+
14
+ ## Read-only credential
15
+
16
+ First, create or request a read-only database user. Then keep the URL in an env
17
+ var:
18
+
19
+ ```bash
20
+ export TRACE_DATABASE_URL='postgres://readonly:...'
21
+ ```
22
+
23
+ ## Check the source
24
+
25
+ Run a check:
26
+
27
+ ```bash
28
+ npx archal autoloop \
29
+ --repo . \
30
+ --source supabase \
31
+ --database-url-env TRACE_DATABASE_URL \
32
+ --source-id prod-agent-traces \
33
+ --check
34
+ ```
35
+
36
+ ## Register the source
37
+
38
+ Register:
39
+
40
+ ```bash
41
+ npx archal autoloop \
42
+ --repo . \
43
+ --source supabase \
44
+ --database-url-env TRACE_DATABASE_URL \
45
+ --source-id prod-agent-traces
46
+ ```
47
+
48
+ This posts the source config to Archal and returns. It does not start a local
49
+ watcher and does not write local source state.
50
+
51
+ ## Secret reference (hosted production)
52
+
53
+ Use a secret reference when the customer already has one:
54
+
55
+ ```bash
56
+ npx archal autoloop \
57
+ --repo . \
58
+ --source postgres \
59
+ --database-url-secret-ref aws-secretsmanager://customer/prod-agent-traces
60
+ ```
61
+
62
+ `--database-url-secret-ref` must not contain a plaintext credential.
@@ -0,0 +1,73 @@
1
+ # Trace schema mapping
2
+
3
+ Full mapping-flag reference for hosted Postgres/Supabase trace sources. Use this
4
+ when the customer's trace and span tables do not match the defaults below.
5
+
6
+ ## Contents
7
+
8
+ - Defaults
9
+ - Custom schema flags
10
+ - Append-only sources
11
+ - Filters for noisy sources
12
+
13
+ ## Defaults
14
+
15
+ | Concept | Default |
16
+ |---------|---------|
17
+ | trace table | `ai_traces` |
18
+ | span table | `ai_spans` |
19
+ | trace id | `id` |
20
+ | span id | `id` |
21
+ | span trace id | `trace_id` |
22
+ | trace updated cursor | `updated_at` |
23
+ | span updated cursor | `updated_at` |
24
+ | cursor mode | `updated_at_id` |
25
+
26
+ ## Custom schema flags
27
+
28
+ For a custom schema, pass mapping flags:
29
+
30
+ ```bash
31
+ npx archal autoloop \
32
+ --repo . \
33
+ --source postgres \
34
+ --database-url-env TRACE_DATABASE_URL \
35
+ --trace-table public.agent_traces \
36
+ --span-table public.agent_spans \
37
+ --trace-id-column trace_id \
38
+ --span-id-column span_id \
39
+ --span-trace-id-column trace_id \
40
+ --parent-span-id-column parent_span_id \
41
+ --trace-updated-at-column updated_at \
42
+ --span-updated-at-column updated_at
43
+ ```
44
+
45
+ ## Append-only sources
46
+
47
+ For append-only sources:
48
+
49
+ ```bash
50
+ npx archal autoloop \
51
+ --repo . \
52
+ --source supabase \
53
+ --database-url-env TRACE_DATABASE_URL \
54
+ --cursor-mode created_at_id \
55
+ --trace-created-at-column created_at \
56
+ --span-created-at-column created_at
57
+ ```
58
+
59
+ ## Filters for noisy sources
60
+
61
+ Use filters for noisy sources:
62
+
63
+ ```bash
64
+ npx archal autoloop \
65
+ --repo . \
66
+ --source supabase \
67
+ --database-url-env TRACE_DATABASE_URL \
68
+ --source-workspace-id workspace_123 \
69
+ --source-agent-id support-agent \
70
+ --source-status failed error \
71
+ --source-trace-group billing-support \
72
+ --source-limit 250
73
+ ```