npm - archal - Versions diffs - 0.9.13 → 0.9.15 - Mend

archal 0.9.13 → 0.9.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/README.md +81 -73
package/bin/archal.cjs +1 -1
package/clone-assets/apify/tools.json +668 -0
package/{twin-assets → clone-assets}/discord/fidelity.json +1 -1
package/{twin-assets → clone-assets}/discord/tools.json +510 -510
package/clone-assets/github/fidelity.json +31 -0
package/{twin-assets → clone-assets}/github/tools.json +113 -3
package/{twin-assets → clone-assets}/google-workspace/fidelity.json +2 -2
package/{twin-assets → clone-assets}/google-workspace/tools.json +10 -10
package/{twin-assets → clone-assets}/jira/fidelity.json +44 -4
package/{twin-assets → clone-assets}/jira/tools.json +1 -1
package/clone-assets/linear/fidelity.json +36 -0
package/{twin-assets → clone-assets}/linear/tools.json +1 -1
package/{twin-assets → clone-assets}/ramp/fidelity.json +1 -1
package/{twin-assets → clone-assets}/ramp/tools.json +1 -1
package/clone-assets/slack/fidelity.json +38 -0
package/{twin-assets → clone-assets}/slack/tools.json +1 -1
package/clone-assets/stripe/fidelity.json +67 -0
package/{twin-assets → clone-assets}/stripe/tools.json +42 -11
package/clone-assets/supabase/fidelity.json +31 -0
package/{twin-assets → clone-assets}/supabase/tools.json +1 -1
package/clone-assets/tavily/tools.json +115 -0
package/dist/cli.cjs +97917 -0
package/dist/cli.d.cts +1 -0
package/dist/harness.cjs +62 -0
package/dist/harness.d.cts +20 -0
package/dist/index.cjs +5 -87878
package/dist/index.d.cts +3 -1
package/dist/seed/dynamic-generator.cjs +8796 -9201
package/dist/seed/dynamic-generator.d.cts +39 -0
package/dist/vitest/chunk-2GY4SFKE.js +29279 -0
package/dist/vitest/{chunk-KTMNDJFB.js → chunk-WVRVNHAX.js} +45255 -44440
package/dist/vitest/index.cjs +56408 -31519
package/dist/vitest/index.d.ts +61 -27
package/dist/vitest/index.js +145 -1807
package/dist/vitest/runtime/hosted-session-reaper.cjs +34766 -28922
package/dist/vitest/runtime/hosted-session-reaper.js +1 -2
package/dist/vitest/runtime/setup-files.js +2 -3
package/package.json +19 -10
package/skills/eval/SKILL.md +113 -0
package/skills/onboard/SKILL.md +67 -36
package/skills/scenario/SKILL.md +22 -20
package/skills/vitest/SKILL.md +25 -24
package/dist/vitest/chunk-L6HSMJ3F.js +0 -2216
package/dist/vitest/chunk-YJICENME.js +0 -1230
package/dist/vitest/src-JGHX6UKK.js +0 -94
package/skills/audit/SKILL.md +0 -55
package/skills/test/SKILL.md +0 -109
package/twin-assets/github/fidelity.json +0 -13
package/twin-assets/linear/fidelity.json +0 -18
package/twin-assets/slack/fidelity.json +0 -20
package/twin-assets/stripe/fidelity.json +0 -22
package/twin-assets/supabase/fidelity.json +0 -13

package/dist/vitest/src-JGHX6UKK.js DELETED Viewed

@@ -1,94 +0,0 @@
-import {
-  AUTH_RETRY_OPTIONS,
-  AUTH_TOKEN_ENV_VAR,
-  CLI_LOOPBACK_CALLBACK_HOST,
-  CREDENTIALS_FILE,
-  CREDENTIALS_KEY_FILE,
-  CREDENTIALS_MASTER_KEY_ENV_VAR,
-  ENV_TOKEN_FALLBACK_TTL_SECONDS,
-  ExpiredEnvTokenError,
-  HOSTED_DEFAULT_API_BASE_URL,
-  HOSTED_DEFAULT_AUTH_BASE_URL,
-  HOSTED_DEFAULT_RUNTIME_BASE_URL,
-  KEYCHAIN_ACCOUNT,
-  KEYCHAIN_SERVICE,
-  REQUEST_TIMEOUT_MS,
-  STRICT_ENDPOINTS_ENV_VAR,
-  TOKEN_ENCRYPTION_PREFIX,
-  buildAuthRequestHeaders,
-  buildLoopbackCallbackUrl,
-  decodeJwtPayload,
-  deleteCredentials,
-  errorMessage,
-  exchangeCliAuthCode,
-  getArchalDir,
-  getConfiguredApiBaseUrl,
-  getConfiguredAuthBaseUrl,
-  getConfiguredRuntimeBaseUrl,
-  getCredentials,
-  getJwtExpiry,
-  getRealHomeStoredCredentials,
-  getStoredCredentials,
-  isEntitled,
-  isLoopbackHostname,
-  isLoopbackHttpUrl,
-  isLoopbackRedirectUri,
-  isPlan,
-  pollCliDeviceAuth,
-  refreshAuthFromServer,
-  refreshAuthFromServerWithValidation,
-  refreshCliSession,
-  resolveWhoamiAuthState,
-  revokeCliSession,
-  saveCredentials,
-  saveRealHomeCredentials,
-  startCliDeviceAuth,
-  validateTokenWithServer
-} from "./chunk-YJICENME.js";
-export {
-  AUTH_RETRY_OPTIONS,
-  AUTH_TOKEN_ENV_VAR,
-  CLI_LOOPBACK_CALLBACK_HOST,
-  CREDENTIALS_FILE,
-  CREDENTIALS_KEY_FILE,
-  CREDENTIALS_MASTER_KEY_ENV_VAR,
-  ENV_TOKEN_FALLBACK_TTL_SECONDS,
-  ExpiredEnvTokenError,
-  HOSTED_DEFAULT_API_BASE_URL,
-  HOSTED_DEFAULT_AUTH_BASE_URL,
-  HOSTED_DEFAULT_RUNTIME_BASE_URL,
-  KEYCHAIN_ACCOUNT,
-  KEYCHAIN_SERVICE,
-  REQUEST_TIMEOUT_MS,
-  STRICT_ENDPOINTS_ENV_VAR,
-  TOKEN_ENCRYPTION_PREFIX,
-  buildAuthRequestHeaders,
-  buildLoopbackCallbackUrl,
-  decodeJwtPayload,
-  deleteCredentials,
-  errorMessage,
-  exchangeCliAuthCode,
-  getArchalDir,
-  getConfiguredApiBaseUrl,
-  getConfiguredAuthBaseUrl,
-  getConfiguredRuntimeBaseUrl,
-  getCredentials,
-  getJwtExpiry,
-  getRealHomeStoredCredentials,
-  getStoredCredentials,
-  isEntitled,
-  isLoopbackHostname,
-  isLoopbackHttpUrl,
-  isLoopbackRedirectUri,
-  isPlan,
-  pollCliDeviceAuth,
-  refreshAuthFromServer,
-  refreshAuthFromServerWithValidation,
-  refreshCliSession,
-  resolveWhoamiAuthState,
-  revokeCliSession,
-  saveCredentials,
-  saveRealHomeCredentials,
-  startCliDeviceAuth,
-  validateTokenWithServer
-};

package/skills/audit/SKILL.md DELETED Viewed

@@ -1,55 +0,0 @@
----
-name: audit
-description: Audit an Archal repository thoroughly. Trace real execution paths, identify concrete bugs and design flaws, distinguish root-cause fixes from architecture problems, and add regression tests for every confirmed issue.
-user-invocable: true
-argument-hint: "[repo path or scope]"
----
-# Archal Repository Audit
-Use this skill when the goal is to inspect an Archal repository deeply, find problems worth fixing, and avoid shallow or local-only patches.
-## Audit standard
-- Trace real execution paths from entrypoints before proposing fixes.
-- Prefer root-cause fixes over guards, silencing, or narrow special cases.
-- If the real problem is architectural, report it instead of applying a monkey patch.
-- For every confirmed bug you fix, add the narrowest regression test that would have caught it earlier.
-- Always include at least one regression test that covers a stale-data row or pre-migration row when the touched path has compatibility logic.
-## Working pattern
-1. Map the hot paths first.
-   - Identify the actual entrypoints: CLI commands, web routes, background jobs, and core runtime/session flows.
-   - Ignore dead-looking surfaces until the primary paths are understood.
-2. Read the execution path end to end.
-   - Follow inputs through parsing, validation, persistence, normalization, and response shaping.
-   - Inspect nearby invariants and adjacent edge cases before deciding on a fix.
-3. Separate findings into two buckets.
-   - **Fix now**: clear bug, contained scope, root cause understood, regression test is obvious.
-   - **Escalate**: the defect comes from a bad abstraction or architectural boundary and a local patch would hide the real problem.
-4. Validate narrowly, then broadly.
-   - Run the smallest meaningful tests for the changed path first.
-   - If code changed, also run the relevant package build/typecheck before concluding.
-## What to look for
-- Compatibility shims that silently drop data from old rows or partially migrated schemas
-- Session lifecycle bugs around start, ready, teardown, stale state, and idempotency
-- Projection code that derives canonical state from stale denormalized fields
-- Fallback behavior that changes semantics instead of preserving them
-- Query builders that filter on derived fields inconsistently across list/count paths
-- Evidence, trace, or normalization code that double-counts, hides, or misattributes records
-## Output format
-For each finding, report:
-- Problem
-- Technical cause
-- Simple explanation
-- Optimal fix
-- Why that fix is better than narrower alternatives
-- Regression test to add
-If no actionable problems are found in a slice, say that explicitly and note any remaining coverage gaps.

package/skills/test/SKILL.md DELETED Viewed

@@ -1,109 +0,0 @@
----
-name: test
-description: Run Archal scenarios or inline tasks against hosted twins, diagnose failed runs, and interpret satisfaction scores. Triggers on "run my scenario", "test my agent", "archal run X", "debug this failing run", "what does this satisfaction score mean".
-user-invocable: true
-argument-hint: "[scenario.md or task description]"
----
-# Archal Test Runner
-You run Archal scenarios and inline tasks, then help the user interpret the results. For setting up the agent path or `.archal.json` in a fresh repo, hand off to the `onboard` skill.
-## What only you know (product mental model)
-- `archal run` spawns the user's agent as a child process. The agent needs:
-  - A **runnable agent path**. Two ways to supply it: explicit `--harness <path>` (e.g. `./.archal/harness.ts`), or `.archal.json` with an `agent` command. Repo-local auto-discovery also walks up from cwd for a top-level `harness.{ts,js,mjs,cjs}`.
-  - A **headless boundary** — no UI, no browser OAuth. The process is spawned without a shell, so interactive auth hangs forever.
-  - Env vars — auto-injected. `ARCHAL_ENGINE_TASK` is the prompt; `ARCHAL_<TWIN>_BASE_URL` / `ARCHAL_<TWIN>_URL` point at twins; `ARCHAL_PREFLIGHT=1` is set during boot check (harness should exit early).
-- Every `archal run` writes local artifacts under `.archal/cache/last-run.json` and `.archal/cache/runs/*.json` **regardless** of `--output`. `--output json` is only for machine-readable stdout; it's not needed for local persistence.
-- **Satisfaction score** = (runs passing all criteria) / (total runs). `[D]` criteria are deterministic state checks; `[P]` criteria are LLM-judged from trace + final state.
-## Preflight the harness before a run
-When the agent path is uncertain, or after any change to the harness file, smoke-test the harness directly before `archal run`:
-```bash
-ARCHAL_PREFLIGHT=1 ARCHAL_ENGINE_TASK="Reply with OK and do not use tools." npx tsx ./.archal/harness.ts
-```
-A harness that exits cleanly with no tool calls is ready. Catches: no runnable entrypoint, UI-boot assumptions, missing provider keys, service bridge misconfig. A failure here is much easier to diagnose than a silent timeout inside `archal run`.
-## Running
-Scenario from a file:
-```bash
-archal run scenario.md
-archal run scenario.md --runs 5 --seed enterprise-repo   # N runs → satisfaction score
-```
-Inline task (no scenario file):
-```bash
-archal run --task "Create an issue titled hello" --harness ./.archal/harness.ts --twin github
-```
-`--task` only replaces the scenario file — it still needs a runnable agent path. `--twin` is required with `--task`; repeat or comma-separate for multiple twins.
-When `.archal.json` exists in cwd, bare `archal run` uses it. If the user doesn't have one yet, that's setup — hand off to the `onboard` skill, which owns harness creation and `.archal.json` scaffolding.
-## Interpret results
-Score breakdown:
-- `100%` = every run passed every criterion
-- `80%` = 4/5 runs passed
-- `0%` = none passed
-Criterion types:
-- `[D]` — deterministic state check. A failure is real; never a model variance artifact.
-- `[P]` — LLM judge reads trace + final state. A single failure can be variance; re-run with `--runs 3+` to confirm before acting on it.
-## Diagnose failures
-Re-run with `-v` for the full trace, then classify with these signals:
-- **Agent bug** — wrong tool called, wrong arguments, stopped early.
-  *Signals:* trace shows the correct tool was available but the agent chose another; or arguments are malformed.
-  *Fix:* agent prompt, tool wiring, or underlying model.
-- **Scenario bug** — criteria are too strict, ambiguous, or contradict the Setup.
-  *Signals:* agent clearly did the right thing but a `[D]` criterion expects an exact count the Setup didn't guarantee; or two criteria contradict each other.
-  *Fix:* make Setup more specific, or relax the criterion. Use the `scenario` skill.
-- **Seed mismatch** — twin state doesn't match what Setup describes.
-  *Signals:* agent's first introspection tool call returns unexpected state (e.g. Setup says "4 stale issues" but the seed has 3).
-  *Fix:* different seed, or adjust Setup to match. `archal seed list <twin>` to browse.
-- **Harness bug** — agent process never started, crashed immediately, or hung.
-  *Signals:* no tool calls in the trace, stderr shows a boot error, or the run times out at the configured `--timeout`.
-  *Fix:* smoke-test the harness directly with `ARCHAL_PREFLIGHT=1 ARCHAL_ENGINE_TASK="Reply with OK." npx tsx ./.archal/harness.ts`, then look for UI-only imports, missing provider keys, or interactive auth.
-## CI mode
-```bash
-archal run scenario.md --runs 3 --pass-threshold 80 -o json -q
-```
-Exit codes: `0` pass, `1` fail or score < threshold, `2` validation error. For GitHub Actions, inject `ARCHAL_TOKEN` as a secret.
-## Artifacts + dashboard
-- **Local (always written):** `.archal/cache/last-run.json` (summary), `.archal/cache/runs/*.json` (full redacted trace).
-- **Hosted:** every run also uploads to https://www.archal.ai/dashboard — useful for sharing a failing trace with a colleague or comparing across agent model versions.
-Don't tell users they need `-o json` to save artifacts locally — that's only for stdout.
-## Anti-patterns
-- Don't re-document the `archal run` flag list here. `archal run --help` and https://docs.archal.ai/cli/run own that — they'll drift if duplicated.
-- Don't guess the agent path. If the user doesn't have `--harness`, a repo-local harness, or `.archal.json`, hand off to `onboard` — it owns setup.
-- Don't promote `--proxy` as default. It's for agents that still call real service domains through raw HTTPS clients. Env-var wiring is the primary path; proxy is a fallback.
-- Don't classify a single `[P]` failure as an agent bug without re-running. Probabilistic criteria need sample size.
-- Don't treat a `[D]` failure as model variance. Deterministic failures are real bugs.
-## Docs
-- Running with an agent: https://docs.archal.ai/guides/run-with-agent
-- Existing repo playbook: https://docs.archal.ai/guides/existing-agent-repo
-- Scenario authoring: hand off to the `scenario` skill
-- Twin sessions: https://docs.archal.ai/guides/twin-sessions

package/twin-assets/github/fidelity.json DELETED Viewed

@@ -1,13 +0,0 @@
-{
-  "twin": "github",
-  "api": "REST v3",
-  "version": "0.1.0",
-  "capabilities": [
-    { "name": "Stateful CRUD (issues, PRs, branches, files)", "supported": true },
-    { "name": "Error responses (404, 422, 403)", "supported": true },
-    { "name": "Pagination", "supported": true },
-    { "name": "Rate limiting", "supported": true },
-    { "name": "Webhooks", "supported": false },
-    { "name": "Branch protection rules", "supported": false }
-  ]
-}

package/twin-assets/linear/fidelity.json DELETED Viewed

@@ -1,18 +0,0 @@
-{
-  "twin": "linear",
-  "api": "GraphQL",
-  "version": "0.1.0",
-  "capabilities": [
-    { "name": "Stateful CRUD (issues, projects, cycles, initiatives)", "supported": true },
-    { "name": "Workflow state transitions", "supported": true },
-    { "name": "Issue relations", "supported": true },
-    { "name": "Comments & attachments", "supported": true },
-    { "name": "Webhooks", "supported": false },
-    { "name": "OAuth flows", "supported": false }
-  ],
-  "thresholds": {
-    "minReplaySteps": 41,
-    "minScenarioWindows": 18,
-    "maxValueDifferences": 0
-  }
-}

package/twin-assets/slack/fidelity.json DELETED Viewed

@@ -1,20 +0,0 @@
-{
-  "twin": "slack",
-  "api": "Web API",
-  "version": "0.1.0",
-  "capabilities": [
-    { "name": "Visible MCP tools exactness (8-tool exposed surface)", "supported": true },
-    { "name": "Supported Web API flows: auth.test, conversations.create/info/list/history/replies, chat.postMessage/delete, reactions.add/remove, users.list/profile.get", "supported": true },
-    { "name": "State replay and reset exactness for supported flows", "supported": true },
-    { "name": "Hidden/internal-only Slack tools", "supported": false },
-    { "name": "files.* and other non-exposed Slack API areas", "supported": false },
-    { "name": "Real-time events (WebSocket)", "supported": false }
-  ],
-  "thresholds": {
-    "minReplaySteps": 29,
-    "minWorkflowScenarios": 2,
-    "minWorkflowSteps": 7,
-    "minScenarioWindows": 6,
-    "maxValueDifferences": 0
-  }
-}

package/twin-assets/stripe/fidelity.json DELETED Viewed

@@ -1,22 +0,0 @@
-{
-  "twin": "stripe",
-  "api": "Stripe API v1 (via MCP agent-toolkit)",
-  "version": "0.1.0",
-  "capabilities": [
-    { "name": "Stateful CRUD (customers, products, prices)", "supported": true },
-    { "name": "Payment lifecycle (intents, confirm, capture, cancel)", "supported": true },
-    { "name": "Refunds (full, partial, by charge or payment_intent)", "supported": true },
-    { "name": "Invoice lifecycle (draft, finalize, pay, void)", "supported": true },
-    { "name": "Subscriptions (create, update, cancel, trial periods)", "supported": true },
-    { "name": "Coupons (percent-off, amount-off, duration types)", "supported": true },
-    { "name": "Payment Links (create, list)", "supported": true },
-    { "name": "Balance and balance transactions", "supported": true },
-    { "name": "Disputes (list, update evidence, submit)", "supported": true },
-    { "name": "Error responses (400, 404, 402, 429)", "supported": true },
-    { "name": "Rate limiting simulation", "supported": true },
-    { "name": "Stripe Connect (multi-account)", "supported": false },
-    { "name": "Webhooks", "supported": false },
-    { "name": "3D Secure / SCA flows", "supported": false },
-    { "name": "Tax calculation", "supported": false }
-  ]
-}

package/twin-assets/supabase/fidelity.json DELETED Viewed

@@ -1,13 +0,0 @@
-{
-  "twin": "supabase",
-  "api": "Management API + SQL",
-  "version": "0.1.0",
-  "capabilities": [
-    { "name": "SQL execution", "supported": true },
-    { "name": "Schema introspection", "supported": true },
-    { "name": "Migrations", "supported": true },
-    { "name": "Edge functions (metadata)", "supported": true },
-    { "name": "Realtime subscriptions", "supported": false },
-    { "name": "Storage (file uploads)", "supported": false }
-  ]
-}