elasticdash-test 0.1.18 → 0.1.19-alpha-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,6 +10,7 @@ An AI-native test runner for ElasticDash workflow testing. Built for async AI pi
10
10
  - [Tool Recording](#tool-recording)
11
11
  - [AI Call Recording](#ai-call-recording)
12
12
  - [HTTP Workflow Mode](#http-workflow-mode)
13
+ - [CI/CD Runner](#cicd-runner)
13
14
  - [Configuration](#configuration)
14
15
 
15
16
  ### Open Detailed Docs
@@ -32,6 +33,7 @@ An AI-native test runner for ElasticDash workflow testing. Built for async AI pi
32
33
  - 📊 **Interactive dashboard** — browse workflows, debug traces, validate fixes visually
33
34
  - 🤖 **Agent mid-trace replay** — resume long-running agents from any task without re-execution
34
35
  - 🌐 **HTTP workflow mode** — run workflows against your live dev server for framework-heavy apps (Next.js, Remix, etc.) with full AI and tool call observability
36
+ - 🚀 **CI/CD runner** — fetch test groups from your ElasticDash project, execute tests, submit results, and fail the build on regressions
35
37
 
36
38
  ---
37
39
 
@@ -389,6 +391,162 @@ This mode is intended for local development and testing scenarios. For productio
389
391
 
390
392
  ---
391
393
 
394
+ ## CI/CD Runner
395
+
396
+ Run your ElasticDash test groups directly from CI pipelines. The `ci` command fetches active test groups from your project via API key, executes each test locally, submits results back to the backend, and exits with code 1 if any test fails.
397
+
398
+ ### How It Works
399
+
400
+ ```
401
+ ┌──────────────┐ GET /testgroups/by-project ┌──────────────────┐
402
+ │ CI Runner │ ──────────────────────────────────→ │ ElasticDash API │
403
+ │ (SDK side) │ ←────────────────────────────────── │ (your backend) │
404
+ │ │ test groups + tests + expectations │ │
405
+ │ │ │ │
406
+ │ execute │ POST /testgroups/:id/runs │ │
407
+ │ each test │ ──────────────────────────────────→ │ stores results │
408
+ │ locally │ │ │
409
+ │ │ POST /testgroups/batches │ │
410
+ │ │ ──────────────────────────────────→ │ groups the runs │
411
+ └──────────────┘ └──────────────────┘
412
+ ```
413
+
414
+ 1. **Fetch** — Calls `GET /testgroups/by-project` with the API key (scoped to project). Returns all active test groups with their tests and expectations.
415
+ 2. **Execute** — For each test, runs it locally using existing SDK infrastructure:
416
+ - **Single-step tests** — replays a specific tool or AI step with `mock_input` and `frozen_events`
417
+ - **Full-flow tests** — runs the entire workflow from `ed_workflows.ts` with `workflow_input`
418
+ 3. **Evaluate** — Checks all expectations (token-budget, latency-budget, output-contains, output-schema, tool-called, determinism, llm-judge). Respects `run_count` and `pass_threshold`.
419
+ 4. **Submit** — POSTs each result to `POST /testgroups/:id/runs` with single run data, expectation results, and git metadata.
420
+ 5. **Batch** — Creates a batch grouping all run IDs for dashboard viewing.
421
+
422
+ ### CLI Usage
423
+
424
+ ```bash
425
+ # Basic — uses env vars
426
+ ELASTICDASH_API_URL=https://your-api.com ELASTICDASH_API_KEY=ed_xxx npx elasticdash ci
427
+
428
+ # Explicit flags
429
+ npx elasticdash ci --server https://your-api.com --api-key ed_xxx
430
+
431
+ # Filter by workflow or tags
432
+ npx elasticdash ci --server $ELASTICDASH_API_URL --api-key $ELASTICDASH_API_KEY \
433
+ --workflow checkout --tags payment,critical
434
+
435
+ # Pass git metadata (auto-detected in GitHub Actions / GitLab CI)
436
+ npx elasticdash ci --server $ELASTICDASH_API_URL --api-key $ELASTICDASH_API_KEY \
437
+ --git-branch main --git-commit abc123
438
+ ```
439
+
440
+ **All flags:**
441
+
442
+ | Flag | Env Var | Description |
443
+ |------|---------|-------------|
444
+ | `--server <url>` | `ELASTICDASH_API_URL` | Backend API URL (required) |
445
+ | `--api-key <key>` | `ELASTICDASH_API_KEY` | Project API key (required) |
446
+ | `--workflow <name>` | — | Filter test groups by workflow name |
447
+ | `--tags <t1,t2>` | — | Filter test groups by tags (comma-separated) |
448
+ | `--triggered-by <src>` | — | Trigger source label (default: `ci`) |
449
+ | `--git-branch <branch>` | Auto-detected | Git branch name |
450
+ | `--git-commit <sha>` | Auto-detected | Git commit SHA |
451
+ | `--git-commit-message <msg>` | Auto-detected | Commit message |
452
+ | `--git-pr-number <n>` | Auto-detected | PR number |
453
+ | `--git-pr-url <url>` | Auto-detected | PR URL |
454
+
455
+ ### GitHub Actions Example
456
+
457
+ ```yaml
458
+ name: AI Tests
459
+ on: [push, pull_request]
460
+
461
+ jobs:
462
+ test:
463
+ runs-on: ubuntu-latest
464
+ steps:
465
+ - uses: actions/checkout@v4
466
+ - uses: actions/setup-node@v4
467
+ with:
468
+ node-version: 20
469
+
470
+ - run: npm ci
471
+
472
+ - name: Run ElasticDash CI tests
473
+ run: npx elasticdash ci
474
+ env:
475
+ ELASTICDASH_API_URL: ${{ secrets.ELASTICDASH_API_URL }}
476
+ ELASTICDASH_API_KEY: ${{ secrets.ELASTICDASH_API_KEY }}
477
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} # if tests use OpenAI
478
+ ```
479
+
480
+ Git branch, commit SHA, PR number, and PR URL are auto-detected from GitHub Actions environment variables — no extra flags needed.
481
+
482
+ ### GitLab CI Example
483
+
484
+ ```yaml
485
+ ai-tests:
486
+ stage: test
487
+ image: node:20
488
+ script:
489
+ - npm ci
490
+ - npx elasticdash ci
491
+ variables:
492
+ ELASTICDASH_API_URL: $ELASTICDASH_API_URL
493
+ ELASTICDASH_API_KEY: $ELASTICDASH_API_KEY
494
+ ```
495
+
496
+ ### Programmatic Usage
497
+
498
+ ```ts
499
+ import { runCI } from 'elasticdash-test'
500
+
501
+ const summary = await runCI({
502
+ serverUrl: 'https://your-api.com',
503
+ apiKey: 'ed_xxx',
504
+ workflowName: 'checkout', // optional filter
505
+ tags: ['payment', 'critical'], // optional filter
506
+ })
507
+
508
+ console.log(`${summary.passed}/${summary.total} passed`)
509
+ process.exit(summary.failed > 0 ? 1 : 0)
510
+ ```
511
+
512
+ ### Output
513
+
514
+ ```
515
+ [elasticdash ci] Fetching test groups...
516
+ [elasticdash ci] Found 2 test group(s), 5 test(s) total.
517
+
518
+ Checkout Flow (3 tests)
519
+ validate-input ... PASS (234ms)
520
+ charge-card ... PASS (1823ms)
521
+ send-confirmation ... FAIL (945ms)
522
+ [output-contains] Output text check failed.
523
+
524
+ Refund Flow (2 tests)
525
+ check-eligibility ... PASS (412ms)
526
+ process-refund ... PASS (1567ms)
527
+
528
+ ──────────────────────────────────────────────────
529
+ Summary
530
+ ──────────────────────────────────────────────────
531
+ Total: 5
532
+ Passed: 4
533
+ Failed: 1
534
+ Duration: 5.0s
535
+ Batch ID: 42
536
+ ──────────────────────────────────────────────────
537
+
538
+ [elasticdash ci] 1 test(s) failed.
539
+ ```
540
+
541
+ ### Prerequisites
542
+
543
+ - An ElasticDash project with an API key (create one in the dashboard under Settings → API Keys)
544
+ - Active test groups with tests and expectations configured in the dashboard
545
+ - `ed_tools.ts` and/or `ed_workflows.ts` in your project root (for the executor to discover tools and workflows)
546
+ - AI provider API keys in the environment if tests use LLM calls (e.g., `OPENAI_API_KEY`)
547
+
548
+ ---
549
+
392
550
  ## Configuration
393
551
 
394
552
  Optional `elasticdash.config.ts` at project root:
@@ -461,6 +619,15 @@ ELASTICDASH_RUN_ID=<run-id-from-dashboard> # optional, enables step freezing
461
619
 
462
620
  `wrapTool` and `wrapAI` will auto-connect on their first call. See [Dashboard Auto-Detection](#dashboard-auto-detection-env-var-mode) for details.
463
621
 
622
+ **CI runner (execute test groups from your project):**
623
+
624
+ ```ts
625
+ import { runCI } from 'elasticdash-test'
626
+
627
+ const summary = await runCI({ serverUrl: 'https://your-api.com', apiKey: 'ed_xxx' })
628
+ // summary.total, summary.passed, summary.failed, summary.batchId, summary.results
629
+ ```
630
+
464
631
  ---
465
632
 
466
633
  ## License
@@ -0,0 +1,23 @@
1
+ import type { APITestGroup } from './types.js';
2
+ /**
3
+ * Fetch all active test groups (with nested tests & expectations) for the project
4
+ * scoped to the API key.
5
+ */
6
+ export declare function fetchTestGroups(serverUrl: string, apiKey: string, filters?: {
7
+ workflowName?: string;
8
+ tags?: string[];
9
+ status?: string;
10
+ }): Promise<APITestGroup[]>;
11
+ /**
12
+ * Submit a test run result to the backend.
13
+ */
14
+ export declare function submitTestRun(serverUrl: string, apiKey: string, testGroupId: number, payload: Record<string, unknown>): Promise<{
15
+ id: number;
16
+ }>;
17
+ /**
18
+ * Create a batch grouping multiple test run IDs.
19
+ */
20
+ export declare function createBatch(serverUrl: string, apiKey: string, payload: Record<string, unknown>): Promise<{
21
+ id: number;
22
+ }>;
23
+ //# sourceMappingURL=api-client.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"api-client.d.ts","sourceRoot":"","sources":["../../src/ci/api-client.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAgC9C;;;GAGG;AACH,wBAAsB,eAAe,CACnC,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE;IAAE,YAAY,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GACpE,OAAO,CAAC,YAAY,EAAE,CAAC,CAUzB;AAED;;GAEG;AACH,wBAAsB,aAAa,CACjC,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,WAAW,EAAE,MAAM,EACnB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC;IAAE,EAAE,EAAE,MAAM,CAAA;CAAE,CAAC,CAOzB;AAED;;GAEG;AACH,wBAAsB,WAAW,CAC/B,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC;IAAE,EAAE,EAAE,MAAM,CAAA;CAAE,CAAC,CAOzB"}
@@ -0,0 +1,62 @@
1
+ import { getOriginalFetch } from '../interceptors/http.js';
2
+ // ─── API Client ──────────────────────────────────────────────
3
+ // Uses getOriginalFetch() to bypass SDK interceptors.
4
+ function headers(apiKey) {
5
+ return {
6
+ 'Content-Type': 'application/json',
7
+ 'Authorization': `Bearer ${apiKey}`,
8
+ };
9
+ }
10
+ async function apiRequest(url, apiKey, options = {}) {
11
+ const res = await getOriginalFetch()(url, {
12
+ ...options,
13
+ headers: { ...headers(apiKey), ...(options.headers ?? {}) },
14
+ });
15
+ if (!res.ok) {
16
+ const text = await res.text().catch(() => '');
17
+ throw new Error(`API ${res.status}: ${text || res.statusText}`);
18
+ }
19
+ const json = await res.json();
20
+ // Backend wraps responses in generalApiResponseSender which sends { result: ... }
21
+ return (json.result ?? json.data ?? json);
22
+ }
23
+ /**
24
+ * Fetch all active test groups (with nested tests & expectations) for the project
25
+ * scoped to the API key.
26
+ */
27
+ export async function fetchTestGroups(serverUrl, apiKey, filters) {
28
+ const base = serverUrl.replace(/\/$/, '');
29
+ const params = new URLSearchParams();
30
+ if (filters?.workflowName)
31
+ params.set('workflowName', filters.workflowName);
32
+ if (filters?.tags?.length)
33
+ params.set('tags', filters.tags.join(','));
34
+ if (filters?.status)
35
+ params.set('status', filters.status);
36
+ const qs = params.toString();
37
+ const url = `${base}/api/testgroups/by-project${qs ? `?${qs}` : ''}`;
38
+ return apiRequest(url, apiKey);
39
+ }
40
+ /**
41
+ * Submit a test run result to the backend.
42
+ */
43
+ export async function submitTestRun(serverUrl, apiKey, testGroupId, payload) {
44
+ const base = serverUrl.replace(/\/$/, '');
45
+ const url = `${base}/api/testgroups/${testGroupId}/runs`;
46
+ return apiRequest(url, apiKey, {
47
+ method: 'POST',
48
+ body: JSON.stringify(payload),
49
+ });
50
+ }
51
+ /**
52
+ * Create a batch grouping multiple test run IDs.
53
+ */
54
+ export async function createBatch(serverUrl, apiKey, payload) {
55
+ const base = serverUrl.replace(/\/$/, '');
56
+ const url = `${base}/api/testgroups/batches`;
57
+ return apiRequest(url, apiKey, {
58
+ method: 'POST',
59
+ body: JSON.stringify(payload),
60
+ });
61
+ }
62
+ //# sourceMappingURL=api-client.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"api-client.js","sourceRoot":"","sources":["../../src/ci/api-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AAG1D,gEAAgE;AAChE,sDAAsD;AAEtD,SAAS,OAAO,CAAC,MAAc;IAC7B,OAAO;QACL,cAAc,EAAE,kBAAkB;QAClC,eAAe,EAAE,UAAU,MAAM,EAAE;KACpC,CAAA;AACH,CAAC;AAED,KAAK,UAAU,UAAU,CACvB,GAAW,EACX,MAAc,EACd,UAAuB,EAAE;IAEzB,MAAM,GAAG,GAAG,MAAM,gBAAgB,EAAE,CAAC,GAAG,EAAE;QACxC,GAAG,OAAO;QACV,OAAO,EAAE,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,EAAE,GAAG,CAAC,OAAO,CAAC,OAAiC,IAAI,EAAE,CAAC,EAAE;KACtF,CAAC,CAAA;IAEF,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACZ,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAA;QAC7C,MAAM,IAAI,KAAK,CAAC,OAAO,GAAG,CAAC,MAAM,KAAK,IAAI,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAA;IACjE,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAA8B,CAAA;IACzD,kFAAkF;IAClF,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAM,CAAA;AAChD,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,SAAiB,EACjB,MAAc,EACd,OAAqE;IAErE,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;IACzC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAA;IACpC,IAAI,OAAO,EAAE,YAAY;QAAE,MAAM,CAAC,GAAG,CAAC,cAAc,EAAE,OAAO,CAAC,YAAY,CAAC,CAAA;IAC3E,IAAI,OAAO,EAAE,IAAI,EAAE,MAAM;QAAE,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;IACrE,IAAI,OAAO,EAAE,MAAM;QAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC,CAAA;IAEzD,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAA;IAC5B,MAAM,GAAG,GAAG,GAAG,IAAI,6BAA6B,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAA;IACpE,OAAO,UAAU,CAAiB,GAAG,EAAE,MAAM,CAAC,CAAA;AAChD,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,SAAiB,EACjB,MAAc,EACd,WAAmB,EACnB,OAAgC;IAEhC,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;IACzC,MAAM,GAAG,GAAG,GAAG,IAAI,mBAAmB,WAAW,OAAO,CAAA;IACxD,OAAO,UAAU,CAAiB,GAAG,EAAE,MAAM,EAAE;QAC7C,MAAM,EAAE,MAAM;QACd,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KAC9B,CAAC,CAAA;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,SAAiB,EACjB,MAAc,EACd,OAAgC;IAEhC,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;IACzC,MAAM,GAAG,GAAG,GAAG,IAAI,yBAAyB,CAAA;IAC5C,OAAO,UAAU,CAAiB,GAAG,EAAE,MAAM,EAAE;QAC7C,MAAM,EAAE,MAAM;QACd,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KAC9B,CAAC,CAAA;AACJ,CAAC"}
@@ -0,0 +1,13 @@
1
+ import type { APITestGroupTest, CISingleRunResult, CIExpectationResult } from './types.js';
2
+ interface ExecutionResult {
3
+ passed: boolean;
4
+ singleRuns: CISingleRunResult[];
5
+ expectationResults: CIExpectationResult[];
6
+ durationMs: number;
7
+ }
8
+ /**
9
+ * Execute a test (single-step or full-flow) according to its configuration.
10
+ */
11
+ export declare function executeTest(test: APITestGroupTest, cwd: string): Promise<ExecutionResult>;
12
+ export {};
13
+ //# sourceMappingURL=executor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/ci/executor.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EACV,gBAAgB,EAEhB,iBAAiB,EACjB,mBAAmB,EACpB,MAAM,YAAY,CAAA;AAMnB,UAAU,eAAe;IACvB,MAAM,EAAE,OAAO,CAAA;IACf,UAAU,EAAE,iBAAiB,EAAE,CAAA;IAC/B,kBAAkB,EAAE,mBAAmB,EAAE,CAAA;IACzC,UAAU,EAAE,MAAM,CAAA;CACnB;AAED;;GAEG;AACH,wBAAsB,WAAW,CAC/B,IAAI,EAAE,gBAAgB,EACtB,GAAG,EAAE,MAAM,GACV,OAAO,CAAC,eAAe,CAAC,CA+C1B"}