@plune-ai/cairn 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +674 -0
- package/README.md +181 -0
- package/dist/agent/graph.d.ts +538 -0
- package/dist/agent/graph.d.ts.map +1 -0
- package/dist/agent/graph.js +0 -0
- package/dist/agent/graph.js.map +1 -0
- package/dist/agent/index.d.ts +83 -0
- package/dist/agent/index.d.ts.map +1 -0
- package/dist/agent/index.js +407 -0
- package/dist/agent/index.js.map +1 -0
- package/dist/analyze/index.d.ts +22 -0
- package/dist/analyze/index.d.ts.map +1 -0
- package/dist/analyze/index.js +39 -0
- package/dist/analyze/index.js.map +1 -0
- package/dist/artifacts/index.d.ts +29 -0
- package/dist/artifacts/index.d.ts.map +1 -0
- package/dist/artifacts/index.js +68 -0
- package/dist/artifacts/index.js.map +1 -0
- package/dist/artifacts/report.d.ts +21 -0
- package/dist/artifacts/report.d.ts.map +1 -0
- package/dist/artifacts/report.js +56 -0
- package/dist/artifacts/report.js.map +1 -0
- package/dist/artifacts/testcase-md.d.ts +37 -0
- package/dist/artifacts/testcase-md.d.ts.map +1 -0
- package/dist/artifacts/testcase-md.js +91 -0
- package/dist/artifacts/testcase-md.js.map +1 -0
- package/dist/browser/backends/playwright-cli.d.ts +23 -0
- package/dist/browser/backends/playwright-cli.d.ts.map +1 -0
- package/dist/browser/backends/playwright-cli.js +85 -0
- package/dist/browser/backends/playwright-cli.js.map +1 -0
- package/dist/browser/backends/playwright-lib.d.ts +32 -0
- package/dist/browser/backends/playwright-lib.d.ts.map +1 -0
- package/dist/browser/backends/playwright-lib.js +157 -0
- package/dist/browser/backends/playwright-lib.js.map +1 -0
- package/dist/browser/gateway.d.ts +33 -0
- package/dist/browser/gateway.d.ts.map +1 -0
- package/dist/browser/gateway.js +2 -0
- package/dist/browser/gateway.js.map +1 -0
- package/dist/browser/index.d.ts +15 -0
- package/dist/browser/index.d.ts.map +1 -0
- package/dist/browser/index.js +59 -0
- package/dist/browser/index.js.map +1 -0
- package/dist/browser/types.d.ts +99 -0
- package/dist/browser/types.d.ts.map +1 -0
- package/dist/browser/types.js +6 -0
- package/dist/browser/types.js.map +1 -0
- package/dist/checklist/index.d.ts +24 -0
- package/dist/checklist/index.d.ts.map +1 -0
- package/dist/checklist/index.js +65 -0
- package/dist/checklist/index.js.map +1 -0
- package/dist/cli/branding.d.ts +14 -0
- package/dist/cli/branding.d.ts.map +1 -0
- package/dist/cli/branding.js +14 -0
- package/dist/cli/branding.js.map +1 -0
- package/dist/cli/index.d.ts +12 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +322 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/lex-bot.d.ts +3 -0
- package/dist/cli/lex-bot.d.ts.map +1 -0
- package/dist/cli/lex-bot.js +11 -0
- package/dist/cli/lex-bot.js.map +1 -0
- package/dist/codegen/index.d.ts +36 -0
- package/dist/codegen/index.d.ts.map +1 -0
- package/dist/codegen/index.js +63 -0
- package/dist/codegen/index.js.map +1 -0
- package/dist/codegen/schema.d.ts +14 -0
- package/dist/codegen/schema.d.ts.map +1 -0
- package/dist/codegen/schema.js +9 -0
- package/dist/codegen/schema.js.map +1 -0
- package/dist/config/env.d.ts +18 -0
- package/dist/config/env.d.ts.map +1 -0
- package/dist/config/env.js +42 -0
- package/dist/config/env.js.map +1 -0
- package/dist/config/index.d.ts +11 -0
- package/dist/config/index.d.ts.map +1 -0
- package/dist/config/index.js +74 -0
- package/dist/config/index.js.map +1 -0
- package/dist/config/profiles.d.ts +7 -0
- package/dist/config/profiles.d.ts.map +1 -0
- package/dist/config/profiles.js +28 -0
- package/dist/config/profiles.js.map +1 -0
- package/dist/config/schema.d.ts +91 -0
- package/dist/config/schema.d.ts.map +1 -0
- package/dist/config/schema.js +20 -0
- package/dist/config/schema.js.map +1 -0
- package/dist/design/index.d.ts +36 -0
- package/dist/design/index.d.ts.map +1 -0
- package/dist/design/index.js +35 -0
- package/dist/design/index.js.map +1 -0
- package/dist/design/schema.d.ts +109 -0
- package/dist/design/schema.d.ts.map +1 -0
- package/dist/design/schema.js +35 -0
- package/dist/design/schema.js.map +1 -0
- package/dist/eval/collect.d.ts +18 -0
- package/dist/eval/collect.d.ts.map +1 -0
- package/dist/eval/collect.js +53 -0
- package/dist/eval/collect.js.map +1 -0
- package/dist/eval/experiment.d.ts +49 -0
- package/dist/eval/experiment.d.ts.map +1 -0
- package/dist/eval/experiment.js +66 -0
- package/dist/eval/experiment.js.map +1 -0
- package/dist/eval/judge.d.ts +30 -0
- package/dist/eval/judge.d.ts.map +1 -0
- package/dist/eval/judge.js +47 -0
- package/dist/eval/judge.js.map +1 -0
- package/dist/eval/pilot.d.ts +21 -0
- package/dist/eval/pilot.d.ts.map +1 -0
- package/dist/eval/pilot.js +24 -0
- package/dist/eval/pilot.js.map +1 -0
- package/dist/eval/scorers.d.ts +23 -0
- package/dist/eval/scorers.d.ts.map +1 -0
- package/dist/eval/scorers.js +38 -0
- package/dist/eval/scorers.js.map +1 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +13 -0
- package/dist/index.js.map +1 -0
- package/dist/knowledge/index.d.ts +7 -0
- package/dist/knowledge/index.d.ts.map +1 -0
- package/dist/knowledge/index.js +39 -0
- package/dist/knowledge/index.js.map +1 -0
- package/dist/llm/factory.d.ts +31 -0
- package/dist/llm/factory.d.ts.map +1 -0
- package/dist/llm/factory.js +48 -0
- package/dist/llm/factory.js.map +1 -0
- package/dist/llm/index.d.ts +5 -0
- package/dist/llm/index.d.ts.map +1 -0
- package/dist/llm/index.js +3 -0
- package/dist/llm/index.js.map +1 -0
- package/dist/llm/structured.d.ts +30 -0
- package/dist/llm/structured.d.ts.map +1 -0
- package/dist/llm/structured.js +58 -0
- package/dist/llm/structured.js.map +1 -0
- package/dist/llm/vision.d.ts +16 -0
- package/dist/llm/vision.d.ts.map +1 -0
- package/dist/llm/vision.js +4 -0
- package/dist/llm/vision.js.map +1 -0
- package/dist/observe/index.d.ts +21 -0
- package/dist/observe/index.d.ts.map +1 -0
- package/dist/observe/index.js +18 -0
- package/dist/observe/index.js.map +1 -0
- package/dist/observe/parse-aria.d.ts +8 -0
- package/dist/observe/parse-aria.d.ts.map +1 -0
- package/dist/observe/parse-aria.js +71 -0
- package/dist/observe/parse-aria.js.map +1 -0
- package/dist/probe/index.d.ts +19 -0
- package/dist/probe/index.d.ts.map +1 -0
- package/dist/probe/index.js +38 -0
- package/dist/probe/index.js.map +1 -0
- package/dist/promote/index.d.ts +6 -0
- package/dist/promote/index.d.ts.map +1 -0
- package/dist/promote/index.js +4 -0
- package/dist/promote/index.js.map +1 -0
- package/dist/promote/promote-case.d.ts +12 -0
- package/dist/promote/promote-case.d.ts.map +1 -0
- package/dist/promote/promote-case.js +103 -0
- package/dist/promote/promote-case.js.map +1 -0
- package/dist/promote/selectors.d.ts +29 -0
- package/dist/promote/selectors.d.ts.map +1 -0
- package/dist/promote/selectors.js +58 -0
- package/dist/promote/selectors.js.map +1 -0
- package/dist/prompts/index.d.ts +32 -0
- package/dist/prompts/index.d.ts.map +1 -0
- package/dist/prompts/index.js +55 -0
- package/dist/prompts/index.js.map +1 -0
- package/dist/prompts/local/identify-elements.d.ts +6 -0
- package/dist/prompts/local/identify-elements.d.ts.map +1 -0
- package/dist/prompts/local/identify-elements.js +21 -0
- package/dist/prompts/local/identify-elements.js.map +1 -0
- package/dist/prompts/local/index.d.ts +3 -0
- package/dist/prompts/local/index.d.ts.map +1 -0
- package/dist/prompts/local/index.js +18 -0
- package/dist/prompts/local/index.js.map +1 -0
- package/dist/prompts/local/judge-checklist-coverage.d.ts +3 -0
- package/dist/prompts/local/judge-checklist-coverage.d.ts.map +1 -0
- package/dist/prompts/local/judge-checklist-coverage.js +11 -0
- package/dist/prompts/local/judge-checklist-coverage.js.map +1 -0
- package/dist/prompts/local/judge-test-cases.d.ts +3 -0
- package/dist/prompts/local/judge-test-cases.d.ts.map +1 -0
- package/dist/prompts/local/judge-test-cases.js +11 -0
- package/dist/prompts/local/judge-test-cases.js.map +1 -0
- package/dist/prompts/local/pilot-review.d.ts +3 -0
- package/dist/prompts/local/pilot-review.d.ts.map +1 -0
- package/dist/prompts/local/pilot-review.js +13 -0
- package/dist/prompts/local/pilot-review.js.map +1 -0
- package/dist/prompts/local/qa-manual-test-designer.d.ts +7 -0
- package/dist/prompts/local/qa-manual-test-designer.d.ts.map +1 -0
- package/dist/prompts/local/qa-manual-test-designer.js +13 -0
- package/dist/prompts/local/qa-manual-test-designer.js.map +1 -0
- package/dist/prompts/local/qa-playwright-ts-writer.d.ts +6 -0
- package/dist/prompts/local/qa-playwright-ts-writer.d.ts.map +1 -0
- package/dist/prompts/local/qa-playwright-ts-writer.js +40 -0
- package/dist/prompts/local/qa-playwright-ts-writer.js.map +1 -0
- package/dist/prompts/local/qa-testcase-from-ui.d.ts +6 -0
- package/dist/prompts/local/qa-testcase-from-ui.d.ts.map +1 -0
- package/dist/prompts/local/qa-testcase-from-ui.js +52 -0
- package/dist/prompts/local/qa-testcase-from-ui.js.map +1 -0
- package/dist/session/index.d.ts +27 -0
- package/dist/session/index.d.ts.map +1 -0
- package/dist/session/index.js +74 -0
- package/dist/session/index.js.map +1 -0
- package/dist/telemetry/index.d.ts +21 -0
- package/dist/telemetry/index.d.ts.map +1 -0
- package/dist/telemetry/index.js +26 -0
- package/dist/telemetry/index.js.map +1 -0
- package/dist/tui/App.d.ts +6 -0
- package/dist/tui/App.d.ts.map +1 -0
- package/dist/tui/App.js +61 -0
- package/dist/tui/App.js.map +1 -0
- package/dist/tui/components/error-boundary.d.ts +17 -0
- package/dist/tui/components/error-boundary.d.ts.map +1 -0
- package/dist/tui/components/error-boundary.js +20 -0
- package/dist/tui/components/error-boundary.js.map +1 -0
- package/dist/tui/components/field.d.ts +10 -0
- package/dist/tui/components/field.d.ts.map +1 -0
- package/dist/tui/components/field.js +8 -0
- package/dist/tui/components/field.js.map +1 -0
- package/dist/tui/components/help.d.ts +5 -0
- package/dist/tui/components/help.d.ts.map +1 -0
- package/dist/tui/components/help.js +7 -0
- package/dist/tui/components/help.js.map +1 -0
- package/dist/tui/components/log-pane.d.ts +6 -0
- package/dist/tui/components/log-pane.d.ts.map +1 -0
- package/dist/tui/components/log-pane.js +10 -0
- package/dist/tui/components/log-pane.js.map +1 -0
- package/dist/tui/components/node-checklist.d.ts +6 -0
- package/dist/tui/components/node-checklist.d.ts.map +1 -0
- package/dist/tui/components/node-checklist.js +9 -0
- package/dist/tui/components/node-checklist.js.map +1 -0
- package/dist/tui/components/pilot-badge.d.ts +6 -0
- package/dist/tui/components/pilot-badge.d.ts.map +1 -0
- package/dist/tui/components/pilot-badge.js +12 -0
- package/dist/tui/components/pilot-badge.js.map +1 -0
- package/dist/tui/components/scores-table.d.ts +6 -0
- package/dist/tui/components/scores-table.d.ts.map +1 -0
- package/dist/tui/components/scores-table.js +9 -0
- package/dist/tui/components/scores-table.js.map +1 -0
- package/dist/tui/components/scrollable-text.d.ts +6 -0
- package/dist/tui/components/scrollable-text.d.ts.map +1 -0
- package/dist/tui/components/scrollable-text.js +22 -0
- package/dist/tui/components/scrollable-text.js.map +1 -0
- package/dist/tui/components/session-picker.d.ts +5 -0
- package/dist/tui/components/session-picker.d.ts.map +1 -0
- package/dist/tui/components/session-picker.js +16 -0
- package/dist/tui/components/session-picker.js.map +1 -0
- package/dist/tui/components/test-case-list.d.ts +7 -0
- package/dist/tui/components/test-case-list.d.ts.map +1 -0
- package/dist/tui/components/test-case-list.js +10 -0
- package/dist/tui/components/test-case-list.js.map +1 -0
- package/dist/tui/hooks/use-run-artifacts.d.ts +14 -0
- package/dist/tui/hooks/use-run-artifacts.d.ts.map +1 -0
- package/dist/tui/hooks/use-run-artifacts.js +37 -0
- package/dist/tui/hooks/use-run-artifacts.js.map +1 -0
- package/dist/tui/hooks/use-runner.d.ts +25 -0
- package/dist/tui/hooks/use-runner.d.ts.map +1 -0
- package/dist/tui/hooks/use-runner.js +116 -0
- package/dist/tui/hooks/use-runner.js.map +1 -0
- package/dist/tui/hooks/use-runs.d.ts +14 -0
- package/dist/tui/hooks/use-runs.d.ts.map +1 -0
- package/dist/tui/hooks/use-runs.js +57 -0
- package/dist/tui/hooks/use-runs.js.map +1 -0
- package/dist/tui/hooks/use-sessions.d.ts +10 -0
- package/dist/tui/hooks/use-sessions.d.ts.map +1 -0
- package/dist/tui/hooks/use-sessions.js +32 -0
- package/dist/tui/hooks/use-sessions.js.map +1 -0
- package/dist/tui/hooks/use-stdout-dimensions.d.ts +3 -0
- package/dist/tui/hooks/use-stdout-dimensions.d.ts.map +1 -0
- package/dist/tui/hooks/use-stdout-dimensions.js +18 -0
- package/dist/tui/hooks/use-stdout-dimensions.js.map +1 -0
- package/dist/tui/index.d.ts +7 -0
- package/dist/tui/index.d.ts.map +1 -0
- package/dist/tui/index.js +13 -0
- package/dist/tui/index.js.map +1 -0
- package/dist/tui/router-context.d.ts +12 -0
- package/dist/tui/router-context.d.ts.map +1 -0
- package/dist/tui/router-context.js +14 -0
- package/dist/tui/router-context.js.map +1 -0
- package/dist/tui/router.d.ts +44 -0
- package/dist/tui/router.d.ts.map +1 -0
- package/dist/tui/router.js +22 -0
- package/dist/tui/router.js.map +1 -0
- package/dist/tui/screens/form-screen.d.ts +7 -0
- package/dist/tui/screens/form-screen.d.ts.map +1 -0
- package/dist/tui/screens/form-screen.js +104 -0
- package/dist/tui/screens/form-screen.js.map +1 -0
- package/dist/tui/screens/launcher-screen.d.ts +2 -0
- package/dist/tui/screens/launcher-screen.d.ts.map +1 -0
- package/dist/tui/screens/launcher-screen.js +28 -0
- package/dist/tui/screens/launcher-screen.js.map +1 -0
- package/dist/tui/screens/run-dashboard-screen.d.ts +7 -0
- package/dist/tui/screens/run-dashboard-screen.d.ts.map +1 -0
- package/dist/tui/screens/run-dashboard-screen.js +32 -0
- package/dist/tui/screens/run-dashboard-screen.js.map +1 -0
- package/dist/tui/screens/run-detail-screen.d.ts +5 -0
- package/dist/tui/screens/run-detail-screen.d.ts.map +1 -0
- package/dist/tui/screens/run-detail-screen.js +67 -0
- package/dist/tui/screens/run-detail-screen.js.map +1 -0
- package/dist/tui/screens/runs-list-screen.d.ts +2 -0
- package/dist/tui/screens/runs-list-screen.d.ts.map +1 -0
- package/dist/tui/screens/runs-list-screen.js +22 -0
- package/dist/tui/screens/runs-list-screen.js.map +1 -0
- package/dist/tui/screens/summary-screen.d.ts +6 -0
- package/dist/tui/screens/summary-screen.d.ts.map +1 -0
- package/dist/tui/screens/summary-screen.js +53 -0
- package/dist/tui/screens/summary-screen.js.map +1 -0
- package/dist/tui/theme.d.ts +31 -0
- package/dist/tui/theme.d.ts.map +1 -0
- package/dist/tui/theme.js +70 -0
- package/dist/tui/theme.js.map +1 -0
- package/dist/tui/types.d.ts +38 -0
- package/dist/tui/types.d.ts.map +1 -0
- package/dist/tui/types.js +2 -0
- package/dist/tui/types.js.map +1 -0
- package/dist/validate/index.d.ts +30 -0
- package/dist/validate/index.d.ts.map +1 -0
- package/dist/validate/index.js +39 -0
- package/dist/validate/index.js.map +1 -0
- package/dist/validate/runner.d.ts +15 -0
- package/dist/validate/runner.d.ts.map +1 -0
- package/dist/validate/runner.js +74 -0
- package/dist/validate/runner.js.map +1 -0
- package/package.json +97 -0
package/README.md
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
# Cairn
|
|
2
|
+
|
|
3
|
+
> **Cairn — an AI that walks your system and leaves a trail of tests: UI, API, unit, docs.**
|
|
4
|
+
|
|
5
|
+
<!-- asciinema: docs/demo/cairn.cast (record before launch — see docs/demo/README.md) -->
|
|
6
|
+
[](https://asciinema.org/a/REPLACE_ME)
|
|
7
|
+
|
|
8
|
+
> Autonomous QA agent (Node.js / TypeScript) that logs into a web app with a saved session, explores
|
|
9
|
+
> pages (ARIA snapshot + screenshot), designs methodology-based UI test cases (ISO/IEC/IEEE 29119-4),
|
|
10
|
+
> generates runnable `@playwright/test` code, self-validates and self-repairs, and **self-improves**
|
|
11
|
+
> via Langfuse. A portable utility (CLI + library) for embedding into other TypeScript projects.
|
|
12
|
+
|
|
13
|
+
**Cairn is the generation layer** — it produces tests across surfaces (UI today; API / unit / docs planned),
|
|
14
|
+
each arriving by demand, one at a time. A separate **Plune** layer owns record / management / eval.
|
|
15
|
+
|
|
16
|
+
> **Renamed from Lex-Bot → Cairn.** The old `lex-bot` command and the `@plune-ai/lex-bot` package still
|
|
17
|
+
> work — the CLI prints a one-line deprecation notice — but switch to `cairn` / `@plune-ai/cairn`; the old
|
|
18
|
+
> names will be removed in 1–2 releases. Legacy `LEX_`/`LEXBOT_` env vars still work too; prefer `CAIRN_`.
|
|
19
|
+
|
|
20
|
+
## What it does
|
|
21
|
+
|
|
22
|
+
Point it at a URL (behind login, with a saved Playwright `storageState`) and it will:
|
|
23
|
+
|
|
24
|
+
1. **Observe** — navigate, wait for SPA hydration, capture an ARIA snapshot + screenshot, extract interactive elements.
|
|
25
|
+
2. **Ground** — verify every locator (`getByRole().count()`), explore tabs/views (multi-state), probe safe state transitions — so it tests what is *actually* there, not hallucinations.
|
|
26
|
+
3. **Design** — write methodology-based test cases (29119-4: EP / BVA / decision-table / state-transition / error-guessing), steered by an optional checklist and domain knowledge files.
|
|
27
|
+
4. **Generate & validate** — emit POM-style `@playwright/test` specs (role-based locators, `test.step`), run them, classify pass/fail/flaky, and **self-repair** failures (with keep-best: a repair never makes the suite worse).
|
|
28
|
+
5. **Judge & learn** — deterministic scorers + an LLM judge + a holistic **Pilot** verdict + semantic checklist coverage, all traced to Langfuse; accumulate the best cases across runs.
|
|
29
|
+
|
|
30
|
+
## Two decoupled modes
|
|
31
|
+
|
|
32
|
+
- **`design`** — explore + write test cases as Markdown files (`ATC-*` automatable / `MTC-*` manual) with recorded selectors. **No code.** Review them as a human, automate later.
|
|
33
|
+
- **`automate`** — generate `@playwright/test` code from approved `ATC-*` cases (skips `MTC-*` manual ones).
|
|
34
|
+
- **`explore`** — the full pipeline at once (cases → code → validation → repair → Pilot verdict).
|
|
35
|
+
|
|
36
|
+
## How it works — the full cycle
|
|
37
|
+
|
|
38
|
+
New to this? The bot writes two kinds of test case:
|
|
39
|
+
- **ATC** (*Automatable Test Case*) — the bot is confident it can drive reliably (read-only checks, verified locators) → it generates Playwright code for these.
|
|
40
|
+
- **MTC** (*Manual Test Case*) — needs a human (full form submits, security/XSS, visual/UX, irreversible actions) → left for you to run by hand.
|
|
41
|
+
|
|
42
|
+
The typical flow:
|
|
43
|
+
|
|
44
|
+
1. **Capture a session** (once) — log in so the bot can reach pages behind auth.
|
|
45
|
+
2. **Design** — the bot studies the page and writes test cases (`ATC-*` / `MTC-*` `.md` files with recorded selectors). No code yet.
|
|
46
|
+
3. **Review** — you read the cases (in the TUI: *Browse past runs* → open a run → *Cases*).
|
|
47
|
+
4. **Promote** *(optional)* — reviewed an `MTC` and decided it's actually automatable? `cairn promote …` (or `a` in the TUI) converts it to an `ATC` in place. It's then picked up by automate.
|
|
48
|
+
5. **Automate** — generate `@playwright/test` code from the `ATC` cases.
|
|
49
|
+
6. **Validate** — run the generated tests, classify pass/fail/flaky, and self-repair failures.
|
|
50
|
+
|
|
51
|
+
`explore` runs steps 2–6 in one go; `design` + `automate` split them so you can review (and promote) in between. Full walkthrough: **[docs/getting-started.md](docs/getting-started.md)**.
|
|
52
|
+
|
|
53
|
+
## Install
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
npm install -g @plune-ai/cairn # CLI
|
|
57
|
+
# or as a library:
|
|
58
|
+
npm install @plune-ai/cairn
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Requires Node.js 20+. Copy `.env.example` → `.env` and fill in your keys.
|
|
62
|
+
|
|
63
|
+
## Quickstart
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
# 1. Capture a session (opens a browser to log in)
|
|
67
|
+
npm run session:save -- --url https://app.example.com/ --name myapp
|
|
68
|
+
|
|
69
|
+
# 2. Design test cases (no code) — review the .md files it writes
|
|
70
|
+
cairn design --url https://app.example.com/page --session myapp --checklist plan.md
|
|
71
|
+
|
|
72
|
+
# 3. (optional) Promote a manual case you decided is automatable: MTC → ATC
|
|
73
|
+
cairn promote --run runs/<id> --cases MTC-LOGIN-001
|
|
74
|
+
|
|
75
|
+
# 4. Automate the approved (ATC) cases → @playwright/test code, and run them
|
|
76
|
+
cairn automate --run runs/<id> --validate --session myapp
|
|
77
|
+
|
|
78
|
+
# …or do everything at once:
|
|
79
|
+
cairn explore --url https://app.example.com/page --session myapp --checklist plan.md
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
New here? Read the **[Getting started guide](docs/getting-started.md)** — it walks the whole cycle with explanations.
|
|
83
|
+
|
|
84
|
+
## Interactive TUI
|
|
85
|
+
|
|
86
|
+
Run `cairn` with **no arguments** in a terminal to open the interactive TUI (built with Ink):
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
cairn # launches the terminal UI
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Pick a command (explore / design / automate), fill parameters (URL, session, checklist, style) via a
|
|
93
|
+
guided form, watch a **live dashboard** of the graph nodes as the run progresses, read the result summary
|
|
94
|
+
(scores, green %, Pilot verdict, test cases), and **browse past runs** in `./runs` — opening any run to
|
|
95
|
+
read its test cases, report and logs.
|
|
96
|
+
|
|
97
|
+
The commands below stay available for scripting/CI; in a non-interactive (piped/CI) shell, `cairn` with
|
|
98
|
+
no arguments prints help instead of starting the UI.
|
|
99
|
+
|
|
100
|
+
## Commands
|
|
101
|
+
|
|
102
|
+
| Command | Purpose |
|
|
103
|
+
|---|---|
|
|
104
|
+
| `cairn observe --url <u> [--session <s>]` | ARIA snapshot + interactive elements + screenshot |
|
|
105
|
+
| `cairn design --url <u> --session <s> [--checklist <f>] [--style <s>]` | Test cases only (ATC/MTC `.md` + selectors), no code |
|
|
106
|
+
| `cairn automate --run <dir> [--validate --session <s>]` | `@playwright/test` from `ATC-*` cases |
|
|
107
|
+
| `cairn promote --run <dir> --cases <ids> [--session <s>]` | Promote manual MTC case(s) to ATC (.md only; then `automate`) |
|
|
108
|
+
| `cairn explore --url <u> --session <s> [--checklist <f>]` | Full pipeline (cases → code → validate → repair → Pilot) |
|
|
109
|
+
| `cairn experiment --dataset <d> --candidate name=file` | Compare prompt versions on a dataset |
|
|
110
|
+
|
|
111
|
+
> `lex-bot <command>` still runs every command above (deprecated alias — prints a notice, then runs `cairn`).
|
|
112
|
+
|
|
113
|
+
## Configuration (env)
|
|
114
|
+
|
|
115
|
+
| Var | Purpose |
|
|
116
|
+
|---|---|
|
|
117
|
+
| `LLM_PROFILE` | `anthropic` \| `openrouter` \| `mixed` |
|
|
118
|
+
| `ANTHROPIC_API_KEY` / `OPENROUTER_API_KEY` | provider keys (per profile) |
|
|
119
|
+
| `QA_TESTCASE_LANG` | test-case language (default `English`; e.g. `Ukrainian`, `uk`) |
|
|
120
|
+
| `LANGFUSE_BASE_URL` / `LANGFUSE_PUBLIC_KEY` / `LANGFUSE_SECRET_KEY` | Langfuse — **cloud or self-hosted** (optional; see [below](#optional-langfuse)) |
|
|
121
|
+
| `BROWSER_BACKEND` | `lib` (in-process Playwright) \| `cli` |
|
|
122
|
+
| `BROWSER_CHANNEL` | `chrome` to use real Chrome (helps with OAuth) |
|
|
123
|
+
| `MAX_REPAIR` | repair attempts (default 2) |
|
|
124
|
+
|
|
125
|
+
- **Env var prefix:** every variable above is read as-is **or** with a `CAIRN_` prefix (e.g. `CAIRN_LLM_PROFILE`, `CAIRN_MAX_REPAIR`). Legacy `LEX_`/`LEXBOT_` prefixes still work but print a one-time deprecation warning — prefer `CAIRN_`.
|
|
126
|
+
- **Domain knowledge:** put `*.md` files in `./knowledge/` with a `url:` front-matter to inject credentials/validation rules into design.
|
|
127
|
+
- **Prompt overrides:** drop `./prompts/<name>.md` to override any built-in prompt without rebuilding.
|
|
128
|
+
|
|
129
|
+
## Optional: Langfuse
|
|
130
|
+
|
|
131
|
+
Langfuse is **entirely optional** — leave the `LANGFUSE_*` variables unset and the bot runs fully offline.
|
|
132
|
+
Everything core still works: `observe` / `design` / `automate` / `explore`, locator grounding, the LLM judge,
|
|
133
|
+
deterministic scorers, self-repair, and results-level learning (best cases are read from local
|
|
134
|
+
`runs/<id>/report.json`). Prompts fall back to the built-in defaults — override any of them with `./prompts/<name>.md`.
|
|
135
|
+
|
|
136
|
+
Set the three `LANGFUSE_*` variables to **additionally** get: traces in the Langfuse UI, scores/datasets
|
|
137
|
+
recorded centrally, and versioned prompts (with production labels & A/B prompt experiments via `cairn experiment`).
|
|
138
|
+
|
|
139
|
+
**Cloud or self-hosted — same setup.** Langfuse Cloud and a self-hosted instance are configured identically:
|
|
140
|
+
you only pass the host URL and the API keys, nothing else changes.
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
# Pick ONE base URL:
|
|
144
|
+
LANGFUSE_BASE_URL=https://cloud.langfuse.com # Langfuse Cloud (EU)
|
|
145
|
+
# LANGFUSE_BASE_URL=https://us.cloud.langfuse.com # Langfuse Cloud (US)
|
|
146
|
+
# LANGFUSE_BASE_URL=https://langfuse.your-host.tld # self-hosted
|
|
147
|
+
LANGFUSE_PUBLIC_KEY=pk-lf-...
|
|
148
|
+
LANGFUSE_SECRET_KEY=sk-lf-...
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
> Enablement is all-or-nothing: Langfuse turns on only when **all three** variables are set; otherwise
|
|
152
|
+
> telemetry is a no-op and the bot behaves exactly as offline.
|
|
153
|
+
|
|
154
|
+
## Library API
|
|
155
|
+
|
|
156
|
+
```ts
|
|
157
|
+
import { runDesign, runAutomate, runExploration, loadConfig } from "@plune-ai/cairn";
|
|
158
|
+
|
|
159
|
+
const config = loadConfig(process.env);
|
|
160
|
+
const result = await runDesign({ url, config, sessionName: "myapp", checklistText });
|
|
161
|
+
// result.testCases, result.testCaseFiles, result.scores
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## Development
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
npm run build # tsc
|
|
168
|
+
npm test # vitest (unit + integration; LLM/browser are mocked in unit)
|
|
169
|
+
npm run test:coverage
|
|
170
|
+
npm run lint
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## Documentation
|
|
174
|
+
|
|
175
|
+
- **[Getting started](docs/getting-started.md)** — step-by-step onboarding (session → design → review → promote → automate → validate), written for people new to the tool.
|
|
176
|
+
- **[Architecture overview](docs/architecture/overview.md)** — how the agent works inside (the LangGraph state machine, locator grounding, self-improvement).
|
|
177
|
+
- **[Architecture Decision Records](docs/adr/)** — why it's built this way (0001–0010, incl. the interactive TUI, the `@playwright/test` output format, and the Lex-Bot → Cairn rename).
|
|
178
|
+
|
|
179
|
+
## License
|
|
180
|
+
|
|
181
|
+
GPL-3.0. Methodology prompts ported from `AZANIR/qa-skills` (see `docs/adr/0008`).
|