npm - @test-lab-ai/cli - Versions diffs - 0.2.7 → 0.2.9 - Mend

@test-lab-ai/cli 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +7 -2
package/bin/testlab.mjs +47 -3
package/lib/skills.mjs +56 -40
package/lib/update-check.mjs +17 -4
package/package.json +3 -2
package/skills/LICENSE +21 -0
package/skills/README.md +66 -0
package/skills/test-lab-plan/SKILL.md +254 -0
package/skills/test-lab-plan/examples/auth.md +145 -0
package/skills/test-lab-plan/examples/pipelines.md +163 -0
package/skills/test-lab-plan/examples/workflows.md +85 -0
package/skills/test-lab-plan/references/run-via-api.md +97 -0
package/skills/test-lab-plan/references/syntax.md +94 -0

package/README.md CHANGED Viewed

@@ -120,8 +120,13 @@ each, defaulting to Claude Code if none are found.
 Add `--global` for Claude Code or Codex (installs under your home directory);
 Cursor user rules are set in Cursor's Settings. Restart your agent to load it.
-It installs from the public [`Test-Lab-ai/skills`](https://github.com/Test-Lab-ai/skills)
-mirror, so you always get the latest version.
+The skill is **bundled with the CLI** and version-locked to it (offline, reproducible
+installs); the public [`Test-Lab-ai/skills`](https://github.com/Test-Lab-ai/skills)
+repo is the browsable source.
+A skill change ships as a new CLI version, so `npm i -g @test-lab-ai/cli@latest`
+updates it: the CLI auto-refreshes installed copies on first run after an upgrade,
+and `testlab skills update` re-installs the bundled version on demand.
 ## For AI agents

package/bin/testlab.mjs CHANGED Viewed

@@ -23,8 +23,8 @@ import { apiFetch } from "../lib/api.mjs"
 import { loadImportFile, runImport } from "../lib/import.mjs"
 import { browserLogin } from "../lib/login.mjs"
 import { EXAMPLES_TEXT } from "../lib/examples.mjs"
-import { TESTLAB_SKILLS, AGENTS, detectAgents, installSkill } from "../lib/skills.mjs"
-import { checkForUpdate, currentVersion } from "../lib/update-check.mjs"
+import { TESTLAB_SKILLS, AGENTS, detectAgents, installSkill, installedSkillLocations } from "../lib/skills.mjs"
+import { checkForUpdate, currentVersion, previousRunVersion } from "../lib/update-check.mjs"
 const log = (...a) => console.log(...a)
 function errExit(msg) {
@@ -51,6 +51,7 @@ Usage:
                                            resource (designed for AI agents)
   testlab skills install [--agent ...]     Install the test-lab-plan skill (auto-detects
                                            Claude/Codex/Cursor; --agent claude|codex|cursor|all)
+  testlab skills update                    Refresh installed skills (also auto-runs after a CLI upgrade)
 Options:
   --key <tl_…>      API key (else $TESTLAB_API_KEY or ~/.test-lab/config.json)
@@ -392,6 +393,46 @@ function cmdSkillsList() {
   log(`\nInstall with: testlab skills install [name] [--agent ${AGENTS.join("|")}|all]`)
 }
+async function cmdSkillsUpdate() {
+  let refreshed = 0
+  for (const name of TESTLAB_SKILLS) {
+    for (const loc of installedSkillLocations(name)) {
+      try {
+        const res = await installSkill(name, loc.agent, { global: loc.global })
+        refreshed++
+        log(`✓ ${name} → ${loc.agent}${loc.global ? " (global)" : ""}: ${res.dest}`)
+      } catch (e) {
+        log(`  ✗ ${name} (${loc.agent}): ${e.message}`)
+      }
+    }
+  }
+  if (refreshed === 0) log("No installed skills found here. Run `testlab skills install` first.")
+  else log(`\nRefreshed ${refreshed} location(s). Restart your agent to load the changes.`)
+}
+// After a CLI upgrade, refresh any already-installed skills in place — runs once
+// per version bump (best effort). Skipped in CI / when NO_UPDATE_NOTIFIER is set.
+async function maybeRefreshSkillsOnUpgrade() {
+  if (process.env.CI || process.env.NO_UPDATE_NOTIFIER) return
+  const ver = currentVersion()
+  const prev = previousRunVersion(ver)
+  if (!prev || prev === ver) return
+  let refreshed = 0
+  for (const name of TESTLAB_SKILLS) {
+    for (const loc of installedSkillLocations(name)) {
+      try {
+        await installSkill(name, loc.agent, { global: loc.global })
+        refreshed++
+      } catch {
+        /* best effort — never break the actual command */
+      }
+    }
+  }
+  if (refreshed > 0) {
+    process.stderr.write(`↻ CLI upgraded ${prev} → ${ver}; refreshed ${refreshed} installed skill location(s).\n`)
+  }
+}
 async function main() {
   let parsed
   try {
@@ -414,6 +455,8 @@ async function main() {
     return
   }
+  await maybeRefreshSkillsOnUpgrade()
   switch (args[0]) {
     case "login":
       return cmdLogin(flags)
@@ -441,8 +484,9 @@ async function main() {
       return cmdExamples()
     case "skills":
       if (args[1] === "install") return cmdSkillsInstall(flags, args)
+      if (args[1] === "update") return cmdSkillsUpdate()
       if (args[1] === "list") return cmdSkillsList()
-      return errExit("usage: testlab skills <install|list> [name] [--global]")
+      return errExit("usage: testlab skills <install|update|list> [name] [--global]")
     case "import":
       return cmdImport(flags, args)
     default:

package/lib/skills.mjs CHANGED Viewed

@@ -1,7 +1,13 @@
 /**
- * Install test-lab skills (e.g. test-lab-plan) into a local AI coding agent by
- * fetching them from the public Test-Lab-ai/skills mirror. The mirror is the
- * single source of truth, so this always installs the latest published skill.
+ * Install test-lab skills (e.g. test-lab-plan) into a local AI coding agent from
+ * the copy BUNDLED with this CLI. The bundle lives at packages/cli/skills/ and is
+ * populated at publish time from the monorepo's skills/ (see scripts/bundle-skill.mjs);
+ * when running from source the install falls back to that monorepo copy.
+ *
+ * The skill is therefore version-locked to the CLI: a skill change ships as a new
+ * CLI version, so upgrading the CLI is what updates the skill (and the on-upgrade
+ * auto-refresh keeps installed copies current). The public Test-Lab-ai/skills repo
+ * stays as the browsable, open-source view; it is no longer fetched at runtime.
  *
  * Each agent has its own convention (verified against current docs):
  *   claude → .claude/skills/<name>/SKILL.md   (project) | ~/.claude/skills  (--global)
@@ -9,19 +15,15 @@
  *   cursor → .cursor/rules/<name>.mdc         (project only — Cursor has no
  *            global *file*; user rules live in Settings)
  *
- * claude + codex use the identical SKILL.md folder format, so they share the
- * write path (only the base dir differs). cursor uses a single .mdc rule file
- * with its own frontmatter, so we convert SKILL.md on the way out.
+ * claude + codex share the SKILL.md folder format (only the base dir differs);
+ * cursor gets a single .mdc rule with its own frontmatter, converted on the way out.
  *
- * Zero-dep: global fetch + node:fs.
+ * Zero-dep, no network.
  */
 import fs from "node:fs"
 import os from "node:os"
 import path from "node:path"
-const MIRROR = "Test-Lab-ai/skills"
-const MIRROR_BRANCH = "main"
-const UA = { "User-Agent": "test-lab-cli" }
+import { fileURLToPath } from "node:url"
 // Skills published by test-lab. Add new skill directory names here.
 export const TESTLAB_SKILLS = ["test-lab-plan"]
@@ -51,6 +53,23 @@ export function detectAgents() {
   return [...found]
 }
+/**
+ * Where skill `name` is already installed — project (cwd) and global locations
+ * across all agents — filtered to the paths that actually exist. Used to
+ * refresh installed skills (on `skills update` and after a CLI upgrade).
+ */
+export function installedSkillLocations(name) {
+  const home = os.homedir()
+  const cwd = process.cwd()
+  return [
+    { agent: "claude", global: false, path: path.join(cwd, ".claude", "skills", name) },
+    { agent: "codex", global: false, path: path.join(cwd, ".agents", "skills", name) },
+    { agent: "cursor", global: false, path: path.join(cwd, ".cursor", "rules", `${name}.mdc`) },
+    { agent: "claude", global: true, path: path.join(home, ".claude", "skills", name) },
+    { agent: "codex", global: true, path: path.join(home, ".agents", "skills", name) },
+  ].filter((c) => fs.existsSync(c.path))
+}
 /** Resolve the install target (base dir + format) for one agent. */
 export function agentTarget(agent, { global } = {}) {
   const home = os.homedir()
@@ -70,23 +89,24 @@ export function agentTarget(agent, { global } = {}) {
   }
 }
-async function listSkillFiles(name) {
-  const r = await fetch(
-    `https://api.github.com/repos/${MIRROR}/git/trees/${MIRROR_BRANCH}?recursive=1`,
-    { headers: { ...UA, Accept: "application/vnd.github+json" } }
-  )
-  if (!r.ok) throw new Error(`could not list ${MIRROR} (${r.status})`)
-  const tree = await r.json()
-  const prefix = `skills/${name}/`
-  return (tree.tree || [])
-    .filter((e) => e.type === "blob" && typeof e.path === "string" && e.path.startsWith(prefix))
-    .map((e) => e.path)
+/** Locate the skill's source dir: the bundled copy, or the monorepo when run from source. */
+function skillSourceDir(name) {
+  const bundled = fileURLToPath(new URL(`../skills/${name}`, import.meta.url)) // packages/cli/skills/<name>
+  if (fs.existsSync(bundled)) return bundled
+  const monorepo = fileURLToPath(new URL(`../../../skills/${name}`, import.meta.url)) // repo-root skills/<name>
+  if (fs.existsSync(monorepo)) return monorepo
+  throw new Error(`skill "${name}" is not bundled with this CLI`)
 }
-async function fetchText(p) {
-  const r = await fetch(`https://raw.githubusercontent.com/${MIRROR}/${MIRROR_BRANCH}/${p}`, { headers: UA })
-  if (!r.ok) throw new Error(`could not fetch ${p} (${r.status})`)
-  return r.text()
+/** Relative paths of every file under dir (recursive). */
+function walkFiles(dir, base = dir) {
+  const out = []
+  for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
+    const full = path.join(dir, entry.name)
+    if (entry.isDirectory()) out.push(...walkFiles(full, base))
+    else out.push(path.relative(base, full))
+  }
+  return out
 }
 /** Convert a Claude SKILL.md into a Cursor .mdc rule (Agent Requested mode). */
@@ -100,30 +120,26 @@ function toCursorRule(skillMd) {
   return `---\ndescription: ${description}\nalwaysApply: false\n---\n\n${body}\n`
 }
-/** Install one skill for one agent. Returns { name, agent, count, dest }. */
+/** Install one skill (from the bundled copy) for one agent. Returns { name, agent, count, dest }. */
 export async function installSkill(name, agent, opts = {}) {
   const { kind, base } = agentTarget(agent, opts)
-  const files = await listSkillFiles(name)
-  if (files.length === 0) throw new Error(`skill "${name}" not found in ${MIRROR}`)
-  const prefix = `skills/${name}/`
+  const srcDir = skillSourceDir(name)
   if (kind === "cursor-rule") {
-    const skillPath = files.find((p) => p.endsWith("/SKILL.md"))
-    if (!skillPath) throw new Error(`skill "${name}" has no SKILL.md`)
+    const skillMd = path.join(srcDir, "SKILL.md")
+    if (!fs.existsSync(skillMd)) throw new Error(`skill "${name}" has no SKILL.md`)
     const dest = path.join(base, `${name}.mdc`)
     fs.mkdirSync(base, { recursive: true })
-    fs.writeFileSync(dest, toCursorRule(await fetchText(skillPath)))
+    fs.writeFileSync(dest, toCursorRule(fs.readFileSync(skillMd, "utf8")))
     return { name, agent, count: 1, dest }
   }
-  // skill-dir: claude + codex (identical SKILL.md folder layout).
-  let count = 0
-  for (const p of files) {
-    const rel = p.slice(prefix.length)
+  // skill-dir: claude + codex (full SKILL.md folder).
+  const files = walkFiles(srcDir)
+  for (const rel of files) {
     const dest = path.join(base, name, rel)
     fs.mkdirSync(path.dirname(dest), { recursive: true })
-    fs.writeFileSync(dest, await fetchText(p))
-    count++
+    fs.copyFileSync(path.join(srcDir, rel), dest)
   }
-  return { name, agent, count, dest: path.join(base, name) }
+  return { name, agent, count: files.length, dest: path.join(base, name) }
 }

package/lib/update-check.mjs CHANGED Viewed

@@ -2,7 +2,7 @@
  * Best-effort "update available" notice.
  *
  * Design constraints: never block, slow, or break a command. It reads a cached
- * latest-version (checked against the npm registry at most once/day), prints a
+ * latest-version (checked against the npm registry at most every few hours), prints a
  * one-line notice to STDERR when the running version is behind, and kicks off a
  * background refresh for next time. Any failure (offline, slow/forbidden
  * registry) is swallowed. Suppressed when stderr isn't a TTY (pipes / CI /
@@ -15,7 +15,7 @@ import path from "node:path"
 const PKG = "@test-lab-ai/cli"
 const REGISTRY = "https://registry.npmjs.org/@test-lab-ai%2Fcli"
 const CACHE = path.join(os.homedir(), ".test-lab", "update-check.json")
-const DAY = 24 * 60 * 60 * 1000
+const CHECK_TTL = 3 * 60 * 60 * 1000 // re-check npm at most every 3h
 export function currentVersion() {
   try {
@@ -70,16 +70,29 @@ export function checkForUpdate() {
   const notice = updateNotice(current, cache.latest)
   if (notice) process.stderr.write(notice + "\n")
-  if (!cache.lastCheck || Date.now() - cache.lastCheck > DAY) {
+  if (!cache.lastCheck || Date.now() - cache.lastCheck > CHECK_TTL) {
     const ctrl = new AbortController()
     const timer = setTimeout(() => ctrl.abort(), 2000)
     fetch(REGISTRY, { signal: ctrl.signal, headers: { Accept: "application/vnd.npm.install-v1+json" } })
       .then((r) => (r.ok ? r.json() : null))
       .then((j) => {
         const latest = j && j["dist-tags"] && j["dist-tags"].latest
-        if (latest) writeCache({ lastCheck: Date.now(), latest })
+        if (latest) writeCache({ ...cache, lastCheck: Date.now(), latest })
       })
       .catch(() => {})
       .finally(() => clearTimeout(timer))
   }
 }
+/**
+ * Record the running CLI version and return the version seen on the PREVIOUS
+ * run (null on first run). Lets the CLI notice "I was just upgraded" so it can
+ * refresh installed skills. Merges into the shared cache so it doesn't clobber
+ * the update-check state.
+ */
+export function previousRunVersion(current) {
+  const cache = readCache()
+  const prev = cache.cliVersion || null
+  if (prev !== current) writeCache({ ...cache, cliVersion: current })
+  return prev
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@test-lab-ai/cli",
-  "version": "0.2.7",
+  "version": "0.2.9",
   "description": "Import existing test plans into test-lab.ai from the command line (or an AI agent).",
   "type": "module",
   "bin": {
@@ -13,12 +13,13 @@
     "bin",
     "lib",
     "examples",
+    "skills",
     "README.md",
     "AGENTS.md"
   ],
   "scripts": {
     "test": "node test/toposort.test.mjs",
-    "prepublishOnly": "npm test"
+    "prepublishOnly": "node scripts/bundle-skill.mjs && npm test"
   },
   "keywords": [
     "test-lab",

package/skills/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 test-lab.ai
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/skills/README.md ADDED Viewed

@@ -0,0 +1,66 @@
+# test-lab.ai skills
+Claude Code (and Cursor / Codex / OpenCode / Cline / Warp / 50+ other agents) skills for [test-lab.ai](https://test-lab.ai), the AI QA platform that runs natural-language tests against websites.
+This directory is the canonical source. The contents are mirrored to [github.com/Test-Lab-ai/skills](https://github.com/Test-Lab-ai/skills) on every push to `main`, and that mirror is what the install commands below point at.
+## Available skills
+- **[`test-lab-plan`](./test-lab-plan/SKILL.md)** – Convert a flow description into a paste-ready test-lab.ai test plan with explicit URLs, numbered acceptance criteria, mode and agent type, and proper credential syntax. Slash command: `/test-lab-plan`.
+More skills planned: result analysis, CI integration, pipeline design.
+## Install
+### Via the [`skills`](https://www.npmjs.com/package/skills) CLI (recommended)
+Works for Claude Code, Cursor, Codex, OpenCode, Cline, Warp, and 50+ other agents:
+```sh
+# Install all test-lab skills to a single agent (Claude Code shown)
+npx skills add Test-Lab-ai/skills -a claude-code
+# Install a specific skill
+npx skills add Test-Lab-ai/skills --skill test-lab-plan -a claude-code
+# Install globally (available across all projects)
+npx skills add Test-Lab-ai/skills --skill test-lab-plan -g -a claude-code
+# Update later
+npx skills update test-lab-plan
+```
+For Claude Code specifically, this drops the skill into `~/.claude/skills/test-lab-plan/` (with `-g`) or `<project>/.claude/skills/test-lab-plan/` (without `-g`).
+### Manually (Claude Code)
+Clone or copy the skill directory into your Claude Code skills folder:
+```sh
+# Global
+git clone https://github.com/Test-Lab-ai/skills.git /tmp/test-lab-skills
+cp -r /tmp/test-lab-skills/skills/test-lab-plan ~/.claude/skills/
+# Or project-local
+cp -r /tmp/test-lab-skills/skills/test-lab-plan <your-project>/.claude/skills/
+```
+## Usage
+After install, invoke via the slash command:
+```
+/test-lab-plan
+```
+Or just describe what you want to test ("write a test for our checkout flow at /cart") and the skill triggers on its own.
+The skill outputs a copy-pasteable plan ready for the test-lab.ai dashboard. It does **not** submit plans on your behalf — see [`test-lab-plan/references/run-via-api.md`](./test-lab-plan/references/run-via-api.md) for the API contract if you want to trigger runs from CI.
+## Contributing
+The canonical source lives in this directory of the [test-lab](https://github.com/AdrianNeatu/test-lab) monorepo. Edits here are auto-synced to the public mirror. PRs against the mirror should be opened against the monorepo instead.
+## License
+MIT. See [LICENSE](./LICENSE).

package/skills/test-lab-plan/SKILL.md ADDED Viewed

@@ -0,0 +1,254 @@
+---
+name: test-lab-plan
+description: Write production-ready test plans for test-lab.ai, the AI QA platform that runs natural-language tests against websites. Use this skill whenever the user wants to write a test for test-lab.ai, draft a "natural language test" / "english test" / "AI test" / "test plan", set up acceptance criteria for a user flow, describe a journey to test, or generate prompts for the test-lab.ai dashboard. Trigger on mentions of test-lab, test-lab.ai, or testlab, and on requests like "test my login", "write a QA test for [page]", "smoke test for [flow]", or any browser-test description that does not reference Playwright/Cypress/Jest by name. Outputs a copy-pasteable test plan with explicit URLs, numbered acceptance criteria, mode + agent type recommendation, and {{credentials.<key>}} syntax for sensitive values.
+allowed-tools:
+  - Read
+  - Glob
+  - Grep
+  - WebFetch
+  - Bash
+  - Write
+---
+# test-lab.ai test plans
+[test-lab.ai](https://test-lab.ai) is an AI QA platform. A test plan is a plain-English prompt that describes a single user flow; an AI agent reads the prompt, drives a real browser, and reports pass/fail against the acceptance criteria you wrote. Your job in this skill is to turn whatever the user describes into a paste-ready plan that follows the conventions below.
+You are **writing the prompt** (you don't run the test). There are two ways your output gets used — offer the second whenever it's available:
+1. **Copy-paste** into the test-lab.ai dashboard (Test Plans → New) — the default.
+2. **Create it directly** with the `@test-lab-ai/cli` (`testlab`), which creates the credentials, labels, test data (data fixtures), AND the plan(s) in the user's account in one step. See **"Creating it with the CLI"** below.
+Either way the design rules are identical (explicit URLs, declarative criteria, credentials syntax) — the CLI just uploads what you'd otherwise hand back to paste.
+## Workflow
+Follow these steps in order. Each step has a reason — when you're tempted to skip one, re-read the reason.
+### 1. Gather scope
+Ask (or infer from context) four things, then confirm before drafting:
+- **The flow**: one sentence — "user signs up", "shopper checks out with a saved card", "admin disables a schedule"
+- **The starting URL or path**: `/login`, `https://example.com/cart`, etc. The agent has to navigate somewhere to begin.
+- **What success looks like**: the visible signal that the flow worked (redirect, message, element appearing). The agent needs verifiable acceptance criteria, not "verify it works."
+- **Who performs it**: anonymous visitor, logged-in user, admin. If the flow needs a logged-in user or other setup state, configure that as a pre-step on the plan in the dashboard — don't add it to the prompt body (see references/syntax.md).
+Why first: every later step depends on this. Drafting before scope is set produces plans that need rewriting.
+#### Read the source if it's in the repo
+If you're operating inside a repo that contains the target site's code, **read the relevant components and routes before drafting**. This is the difference between guessing and knowing. Use Glob / Grep / Read to find:
+- The page or form component (often in `app/`, `components/`, `src/components/`, or similar)
+- The API route handler the form posts to
+- Shared widgets the form composes (captcha, modal, error renderer)
+Anchor every acceptance criterion to real DOM text or real response shape. If the success state is "the form is replaced by a card with the heading 'Message Sent!'", say that exactly — not "a confirmation message appears."
+If the source is **not** available (you're not in the repo, or it's a third-party site), pull a snapshot via WebFetch or ask the user for screenshots of the key states. Do not invent placeholder text — vague criteria like "a success banner appears" make the AI agent flag false negatives whenever copy changes.
+### 2. Pick the test mode
+Two modes exist:
+- **Quick** — single agent, ~10 steps max, ~2-5 min. Use for smoke tests, frequent CI runs, the happy path.
+- **Deep** — multiple agents, 20-40+ steps, ~5-10 min. Use for critical pre-release flows, edge cases, sad paths, exploratory branches.
+Default to **Quick** unless the user asked for thorough coverage or the flow has obvious sad paths the user wants exercised. A Quick plan with 30 acceptance criteria is wrong; rewrite it as Deep or split it.
+Note the vocabulary mismatch: the dashboard says "Quick / Deep"; the API and DB use `quickTest` / `deepTest`. Teach the user both — they'll paste into different surfaces.
+### 3. Pick the agent type
+Two agent types are user-pickable in the dashboard today:
+- **Functional** (default) — workflows, forms, navigation, CRUD, happy and sad paths. Use this unless the user is explicitly asking about accessibility.
+- **Accessibility** — WCAG behavior, keyboard navigation, focus management, screen-reader-relevant patterns. Use only when the test is about a11y; do not silently pick it for general tests.
+Other agent types (UI/UX, exploratory, performance, security) exist in the platform but are not yet exposed in the dashboard; do not recommend them.
+**Labels:** assign exactly **one** label by default: the single most relevant tag, such as the feature area (`onboarding`, `auth`, `checkout`) or `smoke` for a basic health check. Add a second label only if the user explicitly asks for more. In the dashboard you set this on the plan; in a CLI bundle it is the plan's `labels` array, which should normally have a single entry.
+### 4. Draft the plan from the template
+Use the Template section below. Fill in the steps in the order a user would do them. Keep prose natural — write like you're briefing a human tester, not coding a DSL. Numbered steps are fine but not required for the action sequence; what *must* be numbered is the acceptance criteria block.
+### 5. Write declarative, verifiable acceptance criteria
+After the action sequence, add a numbered `Verify that:` block. Each item describes a state the agent can observe, not an action to take.
+- Good: `2. The user menu in the header shows the logged-in user's email.`
+- Bad: `2. Click the user menu and check that login worked.`
+The agent will execute each item independently as a check. Vague items like "verify it works" produce noisy reports.
+### 6. Plug in credentials and dynamic data
+If the flow uses sensitive values (passwords, API keys, real emails), reference them through credentials instead of inlining:
+- Use `{{credentials.<name>}}` — **no spaces** inside the braces. The dashboard validates this and rejects spaced variants.
+- Never write a literal password into a plan you hand back. If the user gives you one, replace it with `{{credentials.<name>}}` and list the credential name in an "Assumes credentials:" footer so the user knows what to set up in Settings → Credentials.
+If the flow needs to **input generated or unique data** — a fresh email per run, a random name, a unique order ref — define a **data fixture** and reference it as `{{data.<fixtureKey>.<fieldKey>}}` (no spaces). A fixture field is either *static* (a literal value) or *dynamic* (a generator like `internet.email`, `person.firstName`, or `string.uuid` that rolls a fresh value every run). Prefer this over a brittle hardcoded value or asking the user to pre-make one. You create fixtures with the CLI (see "Creating it with the CLI"); run `testlab examples` for the field shape and the full generator list.
+For pipeline inputs (only in pre-steps), the syntax is `{{ input.<name> }}` **with spaces** — and the fallback form `{{ input.<name> | credentials.<fallback> }}`. The two syntaxes are intentionally different; do not mix them. Full detail in `references/syntax.md`.
+For dynamic values in **acceptance criteria** (data you *check*, not data you *enter*), write the criterion as a pattern: "verify *a* product appears" rather than "verify 'Blue Widget' appears." Fixtures are for input; patterns are for assertions.
+### 7. Self-check, then hand back
+Before outputting the plan, run it through the Self-check section below. Fix anything the checklist catches. Then output exactly:
+1. A single 3-backtick fenced code block containing the plan body (so the user can copy it cleanly).
+2. Immediately after, a one-line summary in plain prose with mode + agent type + assumed credentials.
+Do NOT wrap the output in another fence (e.g. 4-backticks around the whole thing). The user wants the code block to be copy-pasteable as-is — nesting fences leaks stray ``` lines into the visible output.
+If the user's request is ambiguous about scope (step 1), ask before drafting — do not guess and produce a wrong-shaped plan.
+## Template
+```
+Go to <URL or path>.
+<Action 1 — one or two sentences in natural prose.>
+<Action 2 — including any data the user provides.>
+<… more actions, in the order a real user would perform them.>
+Verify that:
+1. <Observable state 1.>
+2. <Observable state 2.>
+3. <Observable state 3.>
+```
+Setup state (logged-in user, fixture data, etc.) is configured as a pre-step on the plan in the dashboard — never as a `Pre-condition:` line in the prompt body. Don't include such a line.
+For sensitive values, references go inline:
+```
+Enter email {{credentials.testEmail}} and password {{credentials.testPassword}}.
+```
+End with an "Assumes credentials:" footer when applicable:
+```
+Assumes credentials: testEmail, testPassword (configure in Settings → Credentials before running).
+```
+## Inline example
+User says: "Write a test for our login. Lands on /login, real email and password from credentials, expects to land on /dashboard."
+You produce (one 3-backtick fenced code block, then the summary line as plain prose — no outer wrapper):
+```
+Go to /login.
+Enter the email {{credentials.testEmail}} and the password {{credentials.testPassword}}.
+Click the "Sign in" button.
+Verify that:
+1. The browser navigates to /dashboard.
+2. A user menu or avatar is visible in the page header.
+3. The header shows text matching the logged-in user's email or display name.
+4. No error banner appears at the top of the page.
+```
+**Mode:** Quick · **Agent:** Functional · **Assumes credentials:** `testEmail`, `testPassword` (set in Settings → Credentials).
+## Self-check (apply before handing back)
+Walk this list before output. Each item failed = fix the plan, don't ship it.
+1. **Start URL is explicit.** "Go to <something>" appears in the first action sentence.
+2. **Acceptance criteria are numbered and declarative.** Each `Verify that:` item describes an observable state, not an action. No "verify it works."
+3. **No inline secrets.** No literal passwords, API keys, or tokens in the prose. Sensitive values use `{{credentials.<name>}}` with no spaces.
+4. **One flow only.** The plan covers a single user journey. "Test login and then test signup" → split into two plans.
+5. **Mode + agent type declared** in the summary line beneath the fenced block.
+6. **No brittle fixtures.** Where data is dynamic ("a product", "a recent order"), the criterion uses a pattern not a literal value. Where the user explicitly named a value, it stays.
+7. **Variable spacing is correct.** `{{credentials.x}}` has no spaces; `{{ input.x }}` has spaces. Re-scan if the plan uses either.
+8. **Credentials footer present** if any `{{credentials.x}}` appears.
+9. **No `Pre-condition:` line in the prompt body.** Setup state (logged-in user, fixture data) belongs in the plan's pre-step config in the dashboard, not in the prose. If a draft has a `Pre-condition:` line, drop it.
+10. **Acceptance criteria match real source where source was readable.** If you read the form/route code in step 1, every assertable text or shape comes from there, not from a guess. Quoting the wrong success-state copy is the most common drift cause.
+11. **Output is a single 3-backtick fenced code block + a Mode/Agent/Credentials prose line, no nested fences.** Wrapping the output in a 4-backtick (or any outer) fence leaks stray ``` lines into the rendered output. One fence around the plan, then prose.
+## Anti-patterns (refuse or fix)
+These are the most common ways a draft goes wrong. Name the failure to the user when you correct it — they learn from watching you self-correct.
+| Anti-pattern | Why it's wrong | Fix |
+|---|---|---|
+| `Test the login` | The agent has no flow to follow and no criteria to check. Reports come back vague. | Expand into actions + a numbered `Verify that:` block. |
+| `Email: alice@example.com / Password: hunter2` inlined | Secrets in prose leak into reports and version control. | Replace with `{{credentials.x}}` and add an "Assumes credentials" footer. |
+| `Test login, then create a project, then invite a user` | Multiple flows in one plan blur pass/fail; one failed step poisons the rest. | Split into separate plans. If they share state, chain them as a Pipeline (see `examples/pipelines.md`). |
+| `Verify the product 'Blue Widget' shows up` | The agent now requires that exact product to exist; the test breaks the day it sells out. | `Verify that a product card with name, price, and image is shown.` |
+| `{{ credentials.x }}` (with spaces) | Dashboard validation rejects this; the test will fail to parse. | `{{credentials.x}}` — no spaces inside the braces. |
+| `Click the button and check it works` | The agent doesn't know what "works" means. | Split into the click action and a separate `Verify that:` item describing the resulting visible state. |
+| Plan starts with re-doing the login the pre-step already did | The pre-step already authenticated the browser; re-logging in is wasted steps and may break shared state. | If a pre-step exists, the main plan starts after it. See `examples/pipelines.md`. |
+## Vocabulary
+| UI label | API / DB string |
+|---|---|
+| Quick mode | `quickTest` |
+| Deep mode | `deepTest` |
+| Functional | `functional` |
+| Accessibility | `accessibility` |
+When the user is pasting into the dashboard, use UI labels in your summary. When the user is calling the API or writing a CI script, use the string form. If you don't know which surface, default to UI labels and note the API equivalent in parentheses.
+## Creating it with the CLI
+The `@test-lab-ai/cli` (command `testlab`) creates everything you've designed — credentials, labels, data fixtures, and the plan(s) — directly in the user's test-lab account, so they don't copy-paste. Offer this whenever it's set up.
+**1. Check it's available and authenticated.** Run `testlab whoami` (or, with no install, `npx @test-lab-ai/cli whoami`). If it says "not authenticated," the user runs `testlab login` once or sets `TESTLAB_API_KEY` — do NOT create anything until auth works. If the `testlab` command isn't found, fall back to `npx @test-lab-ai/cli …`.
+**2. Survey what already exists, and reuse it.** Before creating anything, inventory the account so you don't duplicate resources or ask for things that already exist:
+- `testlab projects list` (projects)
+- `testlab credentials list` (credential keys; values are never shown)
+- `testlab labels list` (labels)
+- `testlab data list` (data fixtures)
+- `testlab plans list` (existing plans)
+Then design the plan to REUSE what fits:
+- reference an existing credential key (e.g. `{{credentials.testPassword}}`) instead of asking for a secret that already exists;
+- reuse an existing label and an existing data fixture rather than making near-duplicates;
+- if the flow needs setup state (a login, a seeded record), wire an EXISTING plan as a pre-step by name (e.g. a "Login" plan for an auth-gated page) instead of writing a new one;
+- choose the project: no projects means account-level; exactly one is used automatically; if there are several, **show the user the list and ask** (never silently fall back to `--project none`), and **propose a name-matching project** when one fits (e.g. "TestLab Admin" for an admin-dashboard test). An agent can't answer the CLI's interactive prompt, so resolve this now.
+Create only the resources that are missing.
+**3. Ask first.** `testlab import` writes to the user's account. Confirm they want you to create the resources (vs. just receiving the plan to paste).
+**4. Build one import bundle** with only the NEW resources the plan needs (plus references to the existing ones found in step 2), created in order (credentials → labels → fixtures → plans). Run `testlab examples` for the exact shape of every resource. For example, write `bundle.json`:
+```json
+{
+  "credentials": [ { "key": "password", "value": "<the user gives you this — never invent one>" } ],
+  "labels": ["smoke"],
+  "fixtures": [
+    { "key": "newUser", "fields": [
+      { "key": "email", "mode": "dynamic", "generator": "internet.email" }
+    ] }
+  ],
+  "plans": [
+    { "name": "Sign up", "prompt": "Go to https://app.example.com/signup and register with {{data.newUser.email}} / {{credentials.password}}. Confirm the welcome screen.", "labels": ["smoke"] }
+  ]
+}
+```
+**5. Preview, then create:** `testlab import bundle.json --dry-run`, then `testlab import bundle.json --project <id|name>` (use the project resolved in step 2; omit `--project` only when the account has zero or one projects).
+Rules: get secret VALUES from the user (the CLI stores them encrypted, never echoed). Reference fixtures as `{{data.<fixture>.<field>}}` and credentials as `{{credentials.<key>}}` in the prompt. Wire plans together with pre-steps via a `ref` handle. The CLI ships a deep agent guide as `AGENTS.md`; `testlab examples` is the canonical, always-current reference.
+## Going further
+- **Create plans (and their credentials, labels, and data) directly** instead of pasting — the `@test-lab-ai/cli`. See "Creating it with the CLI" above.
+- **Two ways to drive this skill** (author a test while you build a feature, or import tests you already have) — see `examples/workflows.md`.
+- **Variable syntax in depth** (pre-steps, pipeline inputs, devices) — see `references/syntax.md`.
+- **Triggering plans from CI** (only when the user has an API key + an existing `testPlanId`) — see `references/run-via-api.md`.
+- **Auth flow templates** to adapt — `examples/auth.md`.
+- **Pipeline patterns** for multi-step flows that share browser state — `examples/pipelines.md`.
+- **The product's own example library** with cookbook plans for ecommerce, SaaS, social, booking, content, and general web — [test-lab.ai/docs/examples](https://test-lab.ai/docs/examples).

package/skills/test-lab-plan/examples/auth.md ADDED Viewed

@@ -0,0 +1,145 @@
+# Auth flow templates
+Adapt these for any auth surface. Each block is paste-ready into the test-lab.ai dashboard. Replace bracketed placeholders, keep `{{credentials.x}}` references intact.
+These templates favor patterns over fixtures (e.g., "a user menu is visible" rather than "the text 'alice' is shown"). When the user's app has a fixed text element to assert on, swap the pattern for the literal.
+---
+## Login — happy path
+**Mode:** Quick · **Agent:** Functional · **Assumes credentials:** `loginEmail`, `loginPassword`
+```
+Go to /login.
+Enter the email {{credentials.loginEmail}} and the password {{credentials.loginPassword}}.
+Click the "Sign in" button.
+Verify that:
+1. The browser navigates away from /login (typically to /dashboard, /home, or /).
+2. A user menu, avatar, or "Sign out" control is visible in the page header.
+3. The header shows text matching the logged-in user's email or display name.
+4. No error banner is shown at the top of the page.
+```
+---
+## Login — wrong password (sad path)
+**Mode:** Quick · **Agent:** Functional · **Assumes credentials:** `loginEmail`
+```
+Go to /login.
+Enter the email {{credentials.loginEmail}} and the password "deliberately-wrong-password-1234!".
+Click the "Sign in" button.
+Verify that:
+1. The browser stays on /login (no redirect to a logged-in surface).
+2. An error message about invalid credentials is visible near the form.
+3. The error does not reveal whether the email exists in the system (no "user not found" or "email not registered" wording).
+4. The password field is empty or the form is in an "error" state ready for retry.
+```
+---
+## Signup — happy path
+**Mode:** Quick · **Agent:** Functional
+```
+Go to /signup.
+Fill the form with:
+- Email: a unique address using the pattern test-{timestamp}@example.com
+- Password: TestPassword123!
+- Confirm password: TestPassword123! (only if the form has this field)
+Submit the form.
+Verify that one of the following happens:
+1. The page shows a "check your email to verify" message, OR
+2. The browser redirects to a logged-in surface (typically /dashboard, /onboarding, or /welcome), OR
+3. A success banner indicates the account was created.
+Verify that:
+4. No error message is shown at any point during submission.
+5. The form does not stay in an unsubmitted state (no spinner stuck indefinitely).
+```
+---
+## Forgot password
+**Mode:** Quick · **Agent:** Functional · **Assumes credentials:** `loginEmail`
+```
+Go to /forgot-password (or click "Forgot password?" from /login).
+Enter the email {{credentials.loginEmail}}.
+Submit the form.
+Verify that:
+1. A confirmation message appears indicating an email has been sent (e.g., "Check your inbox").
+2. The message does not reveal whether the email is registered (it should look the same for both registered and unregistered emails — this is a security property).
+3. The browser stays on the forgot-password surface or moves to a confirmation surface; it does not redirect to /login or /dashboard.
+4. No error banner is shown.
+```
+---
+## Change password (authenticated)
+**Mode:** Quick · **Agent:** Functional · **Assumes credentials:** `loginPassword` · **Requires:** logged-in pre-step
+```
+Pre-condition: user is logged in (configure a login pre-step on this plan; see examples/pipelines.md).
+Go to /settings/password (or /account/security).
+Fill the password change form with:
+- Current password: {{credentials.loginPassword}}
+- New password: NewTestPassword456!
+- Confirm new password: NewTestPassword456!
+Submit the form.
+Verify that:
+1. A success message appears confirming the password was changed.
+2. The form clears or moves to a confirmation state.
+3. No error banner is shown.
+```
+(If you adapt this to actually verify the new password works, run a separate logout-then-login plan after this one — don't bundle the verification into this plan.)
+---
+## Logout
+**Mode:** Quick · **Agent:** Functional · **Requires:** logged-in pre-step
+```
+Pre-condition: user is logged in (configure a login pre-step on this plan).
+Go to any authenticated surface (e.g., /dashboard).
+Open the user menu (typically an avatar or initials in the header) and click "Sign out" or "Log out".
+Verify that:
+1. The browser redirects to /login, /, or a public landing surface.
+2. The user menu / avatar is no longer visible in the header.
+3. Visiting /dashboard now redirects to /login (the session is fully cleared).
+```
+---
+## Notes for adapting these
+- **Path conventions vary.** Swap `/login`, `/signup`, `/forgot-password`, `/dashboard` for whatever the target site uses. If the user's site uses `/sign-in` or `/account/login`, keep the prose natural and use their convention.
+- **Field labels vary.** "Email" might be "Username" or "Work email"; "Sign in" might be "Continue" or "Log in". Use the wording from the actual page when known; otherwise the natural-language descriptions above ("the email field", "the sign in button") are flexible enough for the agent to map.
+- **MFA / 2FA** is not covered here. If the flow requires a code, the test will fail at that step unless the credential is a TOTP-derived value the credential store can produce — outside the scope of this template set.
+- **CAPTCHAs (reCAPTCHA, Cloudflare Turnstile)** can break automated runs. If the target page has one, mention it to the user — they may need to allowlist test-lab.ai's runner IPs or move the protection to a non-test environment.

package/skills/test-lab-plan/examples/pipelines.md ADDED Viewed

@@ -0,0 +1,163 @@
+# Pipeline patterns
+Pipelines chain test plans on the same browser instance. Use them when a flow requires being already-logged-in, or when a multi-step user journey is more debuggable as separate plans than one giant one.
+A pipeline is **two or more plans** in the dashboard:
+- One or more **pre-steps** (regular plans with the "Use as a pre-step" checkbox enabled)
+- One **main plan** that the user attaches the pre-step(s) to
+The agent in each step is fresh (no memory of previous steps), but the **browser state carries forward** — cookies, localStorage, the current URL. That's how "log in once, test ten things" works.
+When you produce a pipeline, output **both plans** and label which is the pre-step and which is the main plan. The user has to create them as separate entries in the dashboard.
+---
+## Pattern 1 — Reusable login pre-step
+The most common pipeline. Build this once; attach it to every authed test.
+### Pre-step: "Login (reusable)"
+**Mode:** Quick · **Agent:** Functional · **Use as a pre-step:** ✅
+```
+Go to /login.
+Enter the email {{ input.email | credentials.loginEmail }} in the email field.
+Enter the password {{ input.password | credentials.loginPassword }} in the password field.
+Click the "Sign in" button.
+Verify that:
+1. The browser navigates away from /login.
+2. A user menu, avatar, or "Sign out" control is visible in the page header.
+```
+The `{{ input.x | credentials.y }}` form lets the same pre-step serve any caller: most tests will leave the inputs empty and fall back to the default credential, but a specific test can override with different inputs (e.g., admin vs. regular user).
+### Main plan: "Dashboard loads correctly"
+**Mode:** Quick · **Agent:** Functional · **Pre-step:** Login (reusable), with **Fail entire test if a pre-step fails** ✅
+```
+Go to /dashboard.
+Verify that:
+1. The dashboard page loads without an error banner.
+2. The user's name or email is visible in the header.
+3. The primary navigation (sidebar or top nav) is visible with the expected sections.
+4. No loading spinner remains on the page after 5 seconds.
+```
+Note: the main plan starts with `Go to /dashboard`, **not** with a re-login. The pre-step has already authenticated the browser. Re-doing login in the main plan wastes steps and may break shared state.
+---
+## Pattern 2 — Multi-role testing
+Test admin and regular user perspectives in sequence. Two pre-steps, each with a different credential.
+### Pre-step A: "Login as admin"
+**Use as a pre-step:** ✅ · **Assumes credentials:** `adminEmail`, `adminPassword`
+```
+Go to /login.
+Enter {{ input.email | credentials.adminEmail }} and {{ input.password | credentials.adminPassword }}.
+Click "Sign in" and verify the dashboard loads.
+```
+### Pre-step B: "Login as regular user"
+Same prompt, but defaults to `userEmail` / `userPassword` instead.
+### Main plan: "Permission boundaries visible"
+Attach **only** the admin or only the user pre-step (depending on which role you're testing), then:
+```
+Go to /settings.
+Verify that:
+1. The "Team Management" section is visible (admin) OR not visible (user).
+2. The "Billing" tab is clickable (admin) OR shows a "contact your admin" message (user).
+3. The "Audit Log" link is present (admin) OR absent (user).
+```
+Two main plans — one per role — give you clean pass/fail per role. Don't try to put both roles in one plan.
+---
+## Pattern 3 — Multi-step CRUD with shared state
+Each step is independently debuggable. If "delete" breaks, you re-run only that step.
+### Pre-step: "Login" (the reusable one from Pattern 1)
+### Step 1: "Create a project"
+**Use as a pre-step:** ✅ (so step 2 can attach this as its pre-step)
+```
+Go to /projects/new.
+Fill the form with:
+- Name: "Pipeline Test Project {{ input.suffix | 'default' }}"
+- URL: https://example.com
+Click "Create".
+Verify that:
+1. The browser redirects to /projects/<id> (the new project's surface).
+2. A success message confirms creation.
+3. The project name appears in the page header.
+```
+### Step 2 (main plan): "List shows the new project, can be deleted"
+**Pre-steps in order:** Login → Create a project (with `suffix: "step2"`)
+```
+Go to /projects.
+Verify that:
+1. A project card with name containing "Pipeline Test Project step2" is visible.
+2. The card has visible "Edit" and "Delete" controls.
+Click the "Delete" control on that project card.
+Confirm the deletion in the dialog that appears.
+Verify that:
+3. The card disappears from the list.
+4. A "Project deleted" confirmation appears (toast or banner).
+5. Reloading /projects does not bring the project back.
+```
+The `suffix` input on step 1 lets you reference the exact name in step 2's verification, since the same data created in step 1 is what step 2 expects to see.
+---
+## Anti-patterns
+- **Pre-step that does too much.** A single pre-step that logs in *and* seeds data *and* navigates to a section gives you one big black box if something fails. Split: one step per setup concern.
+- **Main plan re-runs setup.** If the pre-step logs in, the main plan starts logged-in. Don't add `Go to /login` at the top of the main plan.
+- **Hardcoded credentials in pre-step prose.** Same rule as solo plans: use `{{ input.x | credentials.y }}` so callers can override and so values aren't leaked.
+- **Forgetting fail-fast.** A pre-step failure usually means later steps will fail in unhelpful ways (missing auth, missing data). Default to **Fail entire test if a pre-step fails ✅** unless the steps are genuinely independent.
+- **Pre-step verifies too aggressively.** A login pre-step's verifications should confirm "I'm logged in" — not "the dashboard renders perfectly." Heavy verification belongs in the main plan, not the setup.
+---
+## When to recommend a pipeline
+Recommend splitting into a pipeline when:
+- The flow requires being logged in (separate login pre-step).
+- The user describes setup that could be reused across many tests (login, seed-a-project, pick-a-tenant).
+- The user describes a sequence where each step is independently meaningful (CRUD, multi-role).
+Don't split into a pipeline when:
+- The whole flow is a single user journey ("user lands on /pricing → clicks Buy → fills card → sees confirmation"). One plan.
+- The user just wants a smoke test. One plan.
+Pipelines have overhead (more dashboard config, more reports to scan). Default to a single plan; introduce pipelines when the structure earns it.

package/skills/test-lab-plan/examples/workflows.md ADDED Viewed

@@ -0,0 +1,85 @@
+# Two ways to drive the test-lab-plan skill
+The skill turns a described flow into a test-lab plan. There are two common
+workflows. In both, the design rules are identical (explicit URL, declarative
+`Verify that:` criteria, `{{credentials.<key>}}` for secrets, `{{data.<fixture>.<field>}}` for
+generated data, and **one label** by default). They differ in where the input
+comes from and whether you create one plan or many.
+## A. Author a test while building or changing a feature
+You are in the repo adding or changing behavior and want a test-lab test that
+covers it.
+1. Trigger the skill ("write a test-lab test for the new password-reset page").
+2. The skill **reads the real source** (the new or changed component and route),
+   so the criteria quote actual on-screen text and response shapes, not guesses.
+3. It drafts one plan, with a single label (the feature area).
+4. If the `testlab` CLI is set up, it **offers to create the plan directly** in
+   your account (see SKILL.md, "Creating it with the CLI"). Otherwise it hands
+   back a paste-ready plan for the dashboard.
+Example. You just added `/account/reset-password`. After reading the form
+component, the skill produces:
+```
+Go to https://app.example.com/account/reset-password.
+Request a reset for {{data.user.email}}, then open the reset link and set a new password that meets the strength rules shown on the form.
+Verify that:
+1. A confirmation reading "Check your email" appears after requesting the reset.
+2. After the new password is set, the page redirects to /login.
+3. A success banner with the text "Password updated" is shown.
+```
+**Mode:** Quick · **Agent:** Functional · **Label:** `auth` · **Fixture:** `user.email`
+With the CLI set up, the skill writes a bundle and runs `testlab import bundle.json`:
+```json
+{
+  "fixtures": [
+    { "key": "user", "fields": [ { "key": "email", "mode": "dynamic", "generator": "internet.email" } ] }
+  ],
+  "plans": [
+    { "name": "Reset password", "prompt": "Go to https://app.example.com/account/reset-password. …", "labels": ["auth"] }
+  ]
+}
+```
+## B. Import test plans you already have
+You have tests elsewhere (Playwright/Cypress specs, Cucumber `.feature` files, a
+TestRail/Zephyr export, or a prose doc) and want them in test-lab.
+1. Point the skill at them ("convert these Playwright specs into test-lab plans").
+2. For each test, it produces a plan: explicit URL in the prompt, secrets as
+   `{{credentials.<key>}}`, generated data as `{{data.<fixture>.<field>}}`, and one label.
+3. It assembles a single **import bundle** (credentials + fixtures + plans) and
+   runs `testlab import ./plans` (with `--dry-run` first). See the CLI's
+   `AGENTS.md` and `testlab examples` for the exact shapes.
+Example bundle from one converted login spec:
+```json
+{
+  "credentials": [ { "key": "password", "value": "<the user provides this>" } ],
+  "fixtures": [
+    { "key": "user", "fields": [ { "key": "email", "mode": "dynamic", "generator": "internet.email" } ] }
+  ],
+  "plans": [
+    { "name": "Login", "prompt": "Go to https://app.example.com/login and sign in with {{data.user.email}} / {{credentials.password}}. Confirm the dashboard loads.", "labels": ["smoke"] }
+  ]
+}
+```
+The difference from A: importing is usually **batch** (many tests at once, often
+a directory of `*.json`), while authoring-while-building is usually **one** plan
+grounded in the code you just wrote.
+## The CLI and the skill, in one line
+The **skill writes** plans (this skill); the **CLI imports** them
+(`@test-lab-ai/cli`). Install the CLI with `npm i -g @test-lab-ai/cli`, and the
+skill itself with `testlab skills install`. The canonical, always-current
+reference for every resource shape is `testlab examples`.

package/skills/test-lab-plan/references/run-via-api.md ADDED Viewed

@@ -0,0 +1,97 @@
+# Triggering plans from CI / via API
+Read this file **only** when the user explicitly asks how to run a plan from CI, programmatically, or via API. The skill's primary job is writing plans; running them is a separate concern that requires an API key and an existing `testPlanId` the user already created in the dashboard.
+If the user has not yet saved their plan in the dashboard, they cannot run it via API yet — the API takes IDs, not prose. Steer them through paste → save → grab ID → then return to this guide.
+## Endpoint
+```
+POST https://test-lab.ai/api/v1/run
+Authorization: Bearer tl_xxxxx
+Content-Type: application/json
+```
+API keys come from **Settings → API Keys** in the dashboard. Treat them like passwords; do not commit them. In CI, supply via secret env (e.g., `TESTLAB_API_KEY`).
+## Request body
+Exactly **one** of `testPlanIds`, `projectId`, or `label` is required. Every selector is scoped to the API key's account — the API never resolves plans, projects, or labels owned by other accounts.
+| Field | Type | Description |
+|---|---|---|
+| `testPlanIds` | number[] or comma-string | Run one or more plans (e.g., `[1,2,3]` or `"1,2,3"`) |
+| `projectId` | number | Run every plan in the project |
+| `label` | string | Run every plan tagged with this label name (matched by name within the account) |
+| `testType` | `"quickTest"` or `"deepTest"` | Optional - overrides the plan's saved default |
+| `buildId` | string (≤100 chars) | Optional - your CI commit SHA / build number for traceability |
+| `cookies` | array of `{name, value, domain}` | Optional - runtime cookies; override stored ones |
+| `preferScript` | boolean | Optional - when true, each plan runs as its saved Playwright script if one exists (deterministic, no LLM cost). Falls back to AI when no script is on file. |
+| `triggerPipelinePreSteps` | boolean | Optional, default `false`. Plans configured as a pipeline pre-step (referenced by another plan as a pre-step) are silently excluded from batch runs by default — they expect input parameters or a specific browser state and produce false-failures when run solo. Set `true` to include them (e.g. you want to smoke-test the login pre-step on its own with default credentials). |
+## Response
+Always the same array shape regardless of selector:
+```json
+{
+  "jobs": [{ "jobId": "uuid", "testPlanId": 123, "testPlanName": "...", "testType": "quickTest", "status": "running" }, ...],
+  "triggered": 3,
+  "failed": 0,
+  "skipped": 0,
+  "buildId": "abc123"
+}
+```
+`status` per job is one of: `running`, `queued`, `pending`, `error`, `skipped`. A `skipped` entry carries an `error` message explaining why (e.g. "Plan is configured as a pipeline pre-step. Pass triggerPipelinePreSteps: true to include pre-step plans in batch runs."). `triggered` excludes both `error` and `skipped`.
+The endpoint returns immediately after queueing — it does not wait for tests to finish. Poll the job, set up a webhook, or use the `buildId` to look up status from a CI status check later.
+## CI example (GitHub Actions)
+```yaml
+- name: Trigger test-lab.ai smoke
+  env:
+    TESTLAB_API_KEY: ${{ secrets.TESTLAB_API_KEY }}
+  run: |
+    curl -fsSL -X POST https://test-lab.ai/api/v1/run \
+      -H "Authorization: Bearer $TESTLAB_API_KEY" \
+      -H "Content-Type: application/json" \
+      -d "{\"projectId\": ${{ vars.TESTLAB_PROJECT_ID }}, \"testType\": \"quickTest\", \"buildId\": \"$GITHUB_SHA\"}"
+```
+For per-PR runs, `projectId` runs the whole project's plans; `testPlanIds` lets you pick a subset.
+## Webhooks
+Configure webhooks at **Settings → Webhooks** to get notified when a job completes (instead of polling). The webhook payload includes `jobId`, `status`, `testPlanId`, `buildId`, and a link to the report. See [test-lab.ai/docs/webhooks](https://test-lab.ai/docs/webhooks) for the full event schema.
+## Common errors
+| Status | Body | What to check |
+|---|---|---|
+| 401 | `Invalid API key` | Token revoked, typo in `Authorization` header, missing `Bearer ` prefix |
+| 402 | `Insufficient credits...` | Top up the org's credit balance |
+| 404 | `No test plans found` | Wrong `testPlanId` / `projectId`, or the key belongs to a different org |
+| 400 | `One of testPlanIds, projectId, or label is required` | Body missing the selector |
+## When pipelines / pre-steps are involved
+A plan with pre-steps configured in the dashboard runs as a pipeline automatically — pre-steps execute first, then the main test, all sharing browser state. No special API parameter needed; just include the master plan in `testPlanIds` (or its `projectId` / `label`).
+With `preferScript: true`: if **every step** (every pre-step + the main) has a saved script for the chosen device, the whole pipeline runs as a script pipeline (state chains via Playwright `storageState`, no LLM cost). If **any step is missing a script**, the entire pipeline falls back to AI mode — mixing script + AI mid-pipeline can't share state cleanly. All-or-nothing.
+The `jobs[]` entry returned carries the **main plan's** job ID. Pre-step jobs share the same `pipeline_id` + `run_group_id` and can be looked up by querying jobs with that group ID.
+Don't confuse `triggerPipelinePreSteps` (above) with this: that flag controls whether plans that ARE pre-steps (used by others) get triggered when listed in a batch — independent from the auto-pipeline-execution behavior here, which fires for plans that HAVE pre-steps.
+## Skill behavior
+When you cite this file to the user, output:
+1. The minimal `curl` for their case (testPlanIds / projectId / label)
+2. A note about which env var to set the API key in
+3. A reminder that the API takes IDs (or label names) and only resolves them on the API key's account; the plan / project / label must already exist there
+4. If they want script-mode runs (cheaper, no LLM cost), include `"preferScript": true` in the body and explain it falls back to AI per-plan when no script is on file
+5. A pointer to webhooks if they ask "how do I know when it's done"
+Do **not** generate API keys, do **not** infer `testPlanIds` / `projectId` / `label` values, and do **not** offer to actually call the API. The skill's role ends at "here is the curl you would run."

package/skills/test-lab-plan/references/syntax.md ADDED Viewed

@@ -0,0 +1,94 @@
+# Variable, pre-step, and device syntax
+Read this file when a test plan needs credentials, pipeline inputs, multi-step shared state, or a non-default device. Skip it for plain plans that just describe a flow.
+## Credentials
+Store credentials in the dashboard at **Settings → Credentials** (key/value pairs, organization-scoped). Reference them in plans:
+```
+Enter the email {{credentials.loginEmail}} and the password {{credentials.loginPassword}}.
+```
+The AI agent never sees the actual values — they're injected directly into form fields at runtime.
+### Syntax rules (validated by the dashboard)
+| Pattern | Valid | Notes |
+|---|---|---|
+| `{{credentials.loginEmail}}` | yes | Canonical form |
+| `{{credentials.user_password}}` | yes | Underscores allowed |
+| `{{credentials.api2Key}}` | yes | Numbers allowed (not at start) |
+| `{{credentials.2faCode}}` | no | Name cannot start with a number |
+| `{{ credentials.email }}` | no | No spaces inside braces |
+| `{credentials.email}` | no | Must use double braces |
+Names are case-sensitive. The dashboard rejects plans that reference a credential that doesn't exist, so **always include an "Assumes credentials:" footer** in your output listing what the user needs to set up.
+### Where credentials work
+- **Test prompts** (the plan body) – primary use case.
+- **Project/plan cookie values**: `Value: {{credentials.sessionToken}}`
+- **Custom HTTP headers**: `Value: Bearer {{credentials.apiKey}}`
+## Pipelines and pre-steps
+A **pipeline** chains test plans on the same browser instance. Cookies, localStorage, and DOM state persist across steps. The most common pattern is a login pre-step + a feature test that runs already authenticated.
+### Pre-steps
+A pre-step is just a regular test plan with the **"Use as a pre-step for other test plans"** checkbox enabled. It accepts inputs declared with `{{ input.<name> }}` syntax:
+```
+Go to https://myapp.com/login.
+Enter {{ input.email | credentials.loginEmail }} in the email field.
+Enter {{ input.password | credentials.loginPassword }} in the password field.
+Click Sign In and verify the dashboard loads.
+```
+### Input syntax (pre-steps only — **with spaces**)
+| Form | Meaning |
+|---|---|
+| `{{ input.email }}` | Required parameter, no default |
+| `{{ input.email \| 'fallback@test.com' }}` | Parameter with a literal default |
+| `{{ input.email \| credentials.loginEmail }}` | Parameter defaulting to a stored credential |
+### Why two syntaxes
+`{{credentials.x}}` and `{{ input.x }}` are intentionally distinguishable. Credentials are static lookups (no spaces, terse). Inputs are template expressions that may include filters (spaces, more like Liquid). Don't normalize the spacing — the dashboard validators rely on the difference.
+### Attaching pre-steps
+In the dashboard:
+1. Open the main plan, click **Add pre-step**
+2. Pick the pre-step from the dropdown
+3. Fill the input values (use the key icon to select credentials)
+4. Toggle **"Fail entire test if a pre-step fails"** when later steps depend on earlier ones (almost always for login pre-steps)
+When the user describes a flow that requires being logged in, the right-shaped output is **two plans**: a login pre-step + the main plan that starts after login. Don't put the login at the top of the main plan.
+### When to use pipelines vs cookie injection
+- **Cookie injection** (configured at the project or plan level) – fastest; use when you can extract session cookies from your app and don't need to test the login UI itself.
+- **Pipelines** – when you're testing the login flow, when cookies are HTTP-only and hard to extract, or when setup needs multiple browser steps. Recommend pipelines if the user wants reusable, shareable building blocks.
+## Devices
+Plans run on Playwright device descriptors. The dashboard's default is `Desktop Chrome`. Common values:
+- `Desktop Chrome`, `Desktop Firefox`, `Desktop Safari`, `Desktop Edge`
+- `iPhone 15 Pro`, `iPhone SE`
+- `Pixel 8`, `Pixel 5`
+- `iPad Pro 11`
+A single plan can be configured to run on multiple devices — the same plan executes once per device, producing one report each. Use this when a flow has known mobile/desktop differences (responsive nav, mobile-only menus, touch interactions).
+## Test type strings
+When pasting into the dashboard, the user picks **Quick mode** or **Deep mode** from a dropdown. When the same plan is triggered via the API or stored in the DB, the strings are:
+- `quickTest` – Quick mode
+- `deepTest` – Deep mode
+These are the only two valid values for `testType` in the `/api/v1/run` payload and for the `default_test_type` column. There is no third option.