elasticdash-sdk 0.1.2a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticdash_sdk-0.1.2a2.dist-info/METADATA +500 -0
- elasticdash_sdk-0.1.2a2.dist-info/RECORD +66 -0
- elasticdash_sdk-0.1.2a2.dist-info/WHEEL +4 -0
- elasticdash_sdk-0.1.2a2.dist-info/entry_points.txt +2 -0
- elasticdash_sdk-0.1.2a2.dist-info/licenses/LICENSE +21 -0
- elasticdash_test/__init__.py +36 -0
- elasticdash_test/browser_ui.py +204 -0
- elasticdash_test/capture/__init__.py +15 -0
- elasticdash_test/capture/event.py +33 -0
- elasticdash_test/capture/recorder.py +79 -0
- elasticdash_test/capture/replay.py +64 -0
- elasticdash_test/ci/__init__.py +19 -0
- elasticdash_test/ci/api_client.py +130 -0
- elasticdash_test/ci/benchmark.py +213 -0
- elasticdash_test/ci/ed_runner.py +205 -0
- elasticdash_test/ci/executor.py +382 -0
- elasticdash_test/ci/git_info.py +122 -0
- elasticdash_test/ci/measurement.py +21 -0
- elasticdash_test/ci/replay.py +100 -0
- elasticdash_test/ci/reporters/__init__.py +28 -0
- elasticdash_test/ci/reporters/default.py +43 -0
- elasticdash_test/ci/reporters/json_reporter.py +22 -0
- elasticdash_test/ci/reporters/junit.py +45 -0
- elasticdash_test/ci/runner.py +130 -0
- elasticdash_test/ci/test_discovery.py +26 -0
- elasticdash_test/ci/test_loader.py +109 -0
- elasticdash_test/ci/test_registry.py +32 -0
- elasticdash_test/ci/trace_schema.py +95 -0
- elasticdash_test/ci/trace_writer.py +44 -0
- elasticdash_test/ci/types.py +210 -0
- elasticdash_test/ci/upload_client.py +135 -0
- elasticdash_test/cli.py +482 -0
- elasticdash_test/core/__init__.py +25 -0
- elasticdash_test/core/agent_state.py +186 -0
- elasticdash_test/core/judge_utils.py +196 -0
- elasticdash_test/dashboard.py +1174 -0
- elasticdash_test/execution/__init__.py +21 -0
- elasticdash_test/execution/tool_runner.py +227 -0
- elasticdash_test/interceptors/__init__.py +63 -0
- elasticdash_test/interceptors/ai_interceptor.py +474 -0
- elasticdash_test/interceptors/db.py +196 -0
- elasticdash_test/interceptors/db_auto.py +181 -0
- elasticdash_test/interceptors/http.py +211 -0
- elasticdash_test/interceptors/side_effects.py +119 -0
- elasticdash_test/interceptors/telemetry_push.py +208 -0
- elasticdash_test/interceptors/tool.py +192 -0
- elasticdash_test/interceptors/workflow_ai.py +195 -0
- elasticdash_test/matchers.py +368 -0
- elasticdash_test/observability.py +218 -0
- elasticdash_test/portal_executor.py +294 -0
- elasticdash_test/portal_server.py +185 -0
- elasticdash_test/proxy/__init__.py +8 -0
- elasticdash_test/proxy/llm_capture.py +192 -0
- elasticdash_test/registry.py +108 -0
- elasticdash_test/reporter.py +90 -0
- elasticdash_test/runner.py +141 -0
- elasticdash_test/socket_connector.py +99 -0
- elasticdash_test/telemetry_batcher.py +181 -0
- elasticdash_test/tool_registry.py +111 -0
- elasticdash_test/trace.py +89 -0
- elasticdash_test/tracing.py +50 -0
- elasticdash_test/trigger_executor.py +104 -0
- elasticdash_test/utils/__init__.py +9 -0
- elasticdash_test/utils/debug.py +30 -0
- elasticdash_test/utils/redact.py +39 -0
- elasticdash_test/workflow_runner.py +85 -0
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: elasticdash-sdk
|
|
3
|
+
Version: 0.1.2a2
|
|
4
|
+
Summary: Elasticdash AI test framework for Python
|
|
5
|
+
Author-email: Elasticdash <contact@elasticdash.com>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Requires-Dist: aiohttp>=3.9
|
|
9
|
+
Requires-Dist: click>=8.0
|
|
10
|
+
Requires-Dist: httpx>=0.27
|
|
11
|
+
Requires-Dist: python-dotenv>=1.0
|
|
12
|
+
Provides-Extra: extras
|
|
13
|
+
Requires-Dist: colorama>=0.4; extra == 'extras'
|
|
14
|
+
Requires-Dist: requests>=2.32; extra == 'extras'
|
|
15
|
+
Provides-Extra: observability
|
|
16
|
+
Requires-Dist: python-socketio[asyncio-client]>=5.10; extra == 'observability'
|
|
17
|
+
Provides-Extra: test
|
|
18
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'test'
|
|
19
|
+
Requires-Dist: pytest>=7.4; extra == 'test'
|
|
20
|
+
Requires-Dist: respx>=0.21; extra == 'test'
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
|
|
23
|
+
# elasticdash_test
|
|
24
|
+
|
|
25
|
+
An AI-native test runner for ElasticDash workflow testing. Built for async AI pipelines — not a general-purpose test runner.
|
|
26
|
+
|
|
27
|
+
- Trace-first: every test receives a `ctx.trace` to record and assert on LLM calls and tool invocations
|
|
28
|
+
- Automatic interception for OpenAI, Gemini, and Grok via `httpx`/`requests` — no manual instrumentation required
|
|
29
|
+
- AI-specific matchers: `to_have_llm_step`, `to_call_tool`, `to_match_semantic_output`, `to_have_custom_step`, `to_have_prompt_where`, `to_evaluate_output_metric`
|
|
30
|
+
- Sequential execution, no parallelism overhead
|
|
31
|
+
- No pytest dependency
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install elasticdash_test
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Requires Python 3.10+.
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Quick Start
|
|
46
|
+
|
|
47
|
+
**1. Write a test file** (`my_flow.ai_test.py`):
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from elasticdash_test import ai_test, expect
|
|
51
|
+
|
|
52
|
+
@ai_test("checkout flow")
|
|
53
|
+
async def test_checkout(ctx):
|
|
54
|
+
await run_checkout(ctx)
|
|
55
|
+
|
|
56
|
+
expect(ctx.trace).to_have_llm_step(model="gpt-4o", contains="order confirmed")
|
|
57
|
+
expect(ctx.trace).to_call_tool("chargeCard")
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**2. Run it:**
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
elasticdash test # discover all *.ai_test.py files
|
|
64
|
+
elasticdash test ./ai_tests # discover in a specific directory
|
|
65
|
+
elasticdash run my_flow.ai_test.py # run a single file
|
|
66
|
+
elasticdash dashboard # open workflows dashboard
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**3. Read the output:**
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
✓ checkout flow (1.2s)
|
|
73
|
+
✗ refund flow (0.8s)
|
|
74
|
+
→ Expected tool "chargeCard" to be called, but no tool calls were recorded
|
|
75
|
+
|
|
76
|
+
2 passed
|
|
77
|
+
1 failed
|
|
78
|
+
Total: 3
|
|
79
|
+
Duration: 3.4s
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Writing Tests
|
|
85
|
+
|
|
86
|
+
See the full guide in [docs/test-writing-guidelines.md](docs/test-writing-guidelines.md).
|
|
87
|
+
|
|
88
|
+
### Decorators
|
|
89
|
+
|
|
90
|
+
Import from `elasticdash_test` and apply to functions — no global injection needed:
|
|
91
|
+
|
|
92
|
+
| Decorator | Description |
|
|
93
|
+
|---|---|
|
|
94
|
+
| `@ai_test(name)` | Register a test |
|
|
95
|
+
| `@before_all` | Run once before all tests in the file |
|
|
96
|
+
| `@before_each` | Run before every test in the file |
|
|
97
|
+
| `@after_each` | Run after every test in the file (runs even if the test fails) |
|
|
98
|
+
| `@after_all` | Run once after all tests in the file |
|
|
99
|
+
|
|
100
|
+
### Test context
|
|
101
|
+
|
|
102
|
+
Each test function receives a `ctx: AITestContext` argument:
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
@ai_test("my test")
|
|
106
|
+
async def test_my_flow(ctx):
|
|
107
|
+
# ctx.trace — record and inspect LLM steps and tool calls
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Recording trace data
|
|
111
|
+
|
|
112
|
+
**Automatic interception (recommended):** Call `install_ai_interceptor()` once in `@before_all` and the runner patches `httpx`/`requests` to record LLM steps for OpenAI, Gemini, and Grok calls automatically. See [Automatic AI Interception](#automatic-ai-interception) below.
|
|
113
|
+
|
|
114
|
+
**Manual recording:** Use this for providers not covered by the interceptor, when testing against stubs/mocks, or to capture custom workflow steps:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
ctx.trace.record_llm_step(
|
|
118
|
+
model="gpt-4o",
|
|
119
|
+
prompt="What is the order status?",
|
|
120
|
+
completion="The order has been confirmed.",
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
ctx.trace.record_tool_call(
|
|
124
|
+
name="chargeCard",
|
|
125
|
+
args={"amount": 99.99},
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Record custom workflow steps (RAG fetches, code/fixed steps, etc.)
|
|
129
|
+
ctx.trace.record_custom_step(
|
|
130
|
+
kind="rag", # 'rag' | 'code' | 'fixed' | 'custom'
|
|
131
|
+
name="pokemon-search",
|
|
132
|
+
tags=["sort:asc", "source:db"],
|
|
133
|
+
payload={"query": "pikachu attack"},
|
|
134
|
+
result={"ids": [25]},
|
|
135
|
+
metadata={"latency_ms": 120},
|
|
136
|
+
)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Matchers
|
|
140
|
+
|
|
141
|
+
#### `to_have_llm_step(config?)`
|
|
142
|
+
|
|
143
|
+
Assert the trace contains at least one LLM step matching the given config. All fields are optional and combined with AND logic.
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
expect(ctx.trace).to_have_llm_step(model="gpt-4o")
|
|
147
|
+
expect(ctx.trace).to_have_llm_step(contains="order confirmed") # searches prompt + completion
|
|
148
|
+
expect(ctx.trace).to_have_llm_step(prompt_contains="order status") # searches prompt only
|
|
149
|
+
expect(ctx.trace).to_have_llm_step(output_contains="order confirmed") # searches completion only
|
|
150
|
+
expect(ctx.trace).to_have_llm_step(provider="openai")
|
|
151
|
+
expect(ctx.trace).to_have_llm_step(provider="openai", prompt_contains="order status")
|
|
152
|
+
expect(ctx.trace).to_have_llm_step(prompt_contains="retry", times=3) # exactly 3 matching steps
|
|
153
|
+
expect(ctx.trace).to_have_llm_step(provider="openai", min_times=2) # at least 2 matching steps
|
|
154
|
+
expect(ctx.trace).to_have_llm_step(output_contains="error", max_times=1) # at most 1 matching step
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
| Field | Description |
|
|
158
|
+
|---|---|
|
|
159
|
+
| `model` | Exact model name match (e.g. `'gpt-4o'`) |
|
|
160
|
+
| `contains` | Substring match across prompt + completion (case-insensitive) |
|
|
161
|
+
| `prompt_contains` | Substring match in prompt only (case-insensitive) |
|
|
162
|
+
| `output_contains` | Substring match in completion only (case-insensitive) |
|
|
163
|
+
| `provider` | Provider name: `'openai'`, `'gemini'`, or `'grok'` |
|
|
164
|
+
| `times` | Exact match count (fails unless exactly this many steps match) |
|
|
165
|
+
| `min_times` | Minimum match count (steps matching must be ≥ this value) |
|
|
166
|
+
| `max_times` | Maximum match count (steps matching must be ≤ this value) |
|
|
167
|
+
|
|
168
|
+
#### `to_call_tool(tool_name)`
|
|
169
|
+
|
|
170
|
+
Assert the trace contains a tool call with the given name.
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
expect(ctx.trace).to_call_tool("chargeCard")
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
#### `to_match_semantic_output(expected, **options)`
|
|
177
|
+
|
|
178
|
+
LLM-judged semantic match of combined LLM output vs. the expected string. Defaults to OpenAI GPT-4.1 with `OPENAI_API_KEY`.
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
# Minimal, using default OpenAI model
|
|
182
|
+
await expect(ctx.trace).to_match_semantic_output("order confirmed")
|
|
183
|
+
|
|
184
|
+
# Use a different provider
|
|
185
|
+
await expect(ctx.trace).to_match_semantic_output(
|
|
186
|
+
"attack stat",
|
|
187
|
+
provider="claude",
|
|
188
|
+
model="claude-3-opus-20240229",
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# OpenAI-compatible endpoint (e.g., Moonshot/Kimi) via base_url + api_key
|
|
192
|
+
await expect(ctx.trace).to_match_semantic_output(
|
|
193
|
+
"order confirmed",
|
|
194
|
+
provider="openai",
|
|
195
|
+
model="kimi-k2-turbo-preview",
|
|
196
|
+
api_key=os.environ["KIMI_API_KEY"],
|
|
197
|
+
base_url="https://api.moonshot.ai/v1",
|
|
198
|
+
)
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
Environment keys by provider: `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GEMINI_API_KEY` (or `GOOGLE_API_KEY`), `GROK_API_KEY`.
|
|
202
|
+
|
|
203
|
+
#### `to_evaluate_output_metric(config)`
|
|
204
|
+
|
|
205
|
+
Evaluate one LLM step's prompt or result using an LLM and assert a numeric metric condition in the range 0.0–1.0. Defaults: target=`result`, condition=`at_least 0.7`, provider=`openai`, model=`gpt-4.1`.
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
# Evaluate the last LLM result; default condition at_least 0.7
|
|
209
|
+
await expect(ctx.trace).to_evaluate_output_metric(
|
|
210
|
+
evaluation_prompt="Rate how well this answers the user question.",
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Check a specific step (3rd LLM prompt), target the prompt text, require >= 0.8
|
|
214
|
+
await expect(ctx.trace).to_evaluate_output_metric(
|
|
215
|
+
evaluation_prompt="Score coherence of this prompt between 0 and 1.",
|
|
216
|
+
target="prompt",
|
|
217
|
+
nth=3,
|
|
218
|
+
condition={"at_least": 0.8},
|
|
219
|
+
provider="claude",
|
|
220
|
+
model="claude-3-opus-20240229",
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# Custom comparator: score must be < 0.3
|
|
224
|
+
await expect(ctx.trace).to_evaluate_output_metric(
|
|
225
|
+
evaluation_prompt="Rate hallucination risk (0=none, 1=high).",
|
|
226
|
+
condition={"less_than": 0.3},
|
|
227
|
+
)
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
Options:
|
|
231
|
+
- `evaluation_prompt` (required): your scoring instructions; model is asked to return only a number between 0 and 1.
|
|
232
|
+
- `target`: `'result'` (default) or `'prompt'`. Evaluates that text only.
|
|
233
|
+
- `nth`: pick which LLM step to score (1-based). Defaults to the last LLM step.
|
|
234
|
+
- `condition`: one of `greater_than`, `less_than`, `at_least`, `at_most`, `equals`; default is `{"at_least": 0.7}`. Fails if the score is outside 0.0–1.0 or cannot be parsed.
|
|
235
|
+
- `provider` / `model` / `api_key` / `base_url`: supports OpenAI, Claude, Gemini, Grok, and OpenAI-compatible endpoints via `base_url`.
|
|
236
|
+
|
|
237
|
+
#### `to_have_custom_step(config?)`
|
|
238
|
+
|
|
239
|
+
Assert a recorded custom step (RAG/code/fixed/custom) matches filters.
|
|
240
|
+
|
|
241
|
+
```python
|
|
242
|
+
expect(ctx.trace).to_have_custom_step(kind="rag", name="pokemon-search")
|
|
243
|
+
expect(ctx.trace).to_have_custom_step(tag="sort:asc")
|
|
244
|
+
expect(ctx.trace).to_have_custom_step(contains="pikachu")
|
|
245
|
+
expect(ctx.trace).to_have_custom_step(result_contains="25")
|
|
246
|
+
expect(ctx.trace).to_have_custom_step(kind="rag", min_times=1, max_times=2)
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
#### `to_have_prompt_where(config)`
|
|
250
|
+
|
|
251
|
+
Filter prompts, then assert additional constraints. Example: "all prompts containing A must also contain B".
|
|
252
|
+
|
|
253
|
+
```python
|
|
254
|
+
# Prompts that contain "order" must also contain "confirmed"
|
|
255
|
+
expect(ctx.trace).to_have_prompt_where(filter_contains="order", require_contains="confirmed")
|
|
256
|
+
|
|
257
|
+
# Prompts containing "retry" must NOT contain "cancel"
|
|
258
|
+
expect(ctx.trace).to_have_prompt_where(filter_contains="retry", require_not_contains="cancel")
|
|
259
|
+
|
|
260
|
+
# Control counts on the filtered subset
|
|
261
|
+
expect(ctx.trace).to_have_prompt_where(
|
|
262
|
+
filter_contains="order",
|
|
263
|
+
require_contains="confirmed",
|
|
264
|
+
min_times=1,
|
|
265
|
+
max_times=3,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Check a specific prompt position (1-based nth)
|
|
269
|
+
expect(ctx.trace).to_have_prompt_where(
|
|
270
|
+
filter_contains="order",
|
|
271
|
+
require_contains="confirmed",
|
|
272
|
+
nth=3, # the 3rd prompt among those containing "order"
|
|
273
|
+
)
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
## Automatic AI Interception
|
|
279
|
+
|
|
280
|
+
Call `install_ai_interceptor()` in a `@before_all` hook and the runner patches `httpx` and `requests` before tests run, automatically recording LLM steps for:
|
|
281
|
+
|
|
282
|
+
| Provider | Endpoints intercepted |
|
|
283
|
+
|---|---|
|
|
284
|
+
| **OpenAI** | `api.openai.com/v1/chat/completions`, `/v1/completions` |
|
|
285
|
+
| **Gemini** | `generativelanguage.googleapis.com/.../models/...:generateContent` |
|
|
286
|
+
| **Grok** (xAI) | `api.x.ai/v1/chat/completions` |
|
|
287
|
+
|
|
288
|
+
Each intercepted call records `model`, `provider`, `prompt`, and `completion` into `ctx.trace` automatically. Your workflow code needs no changes.
|
|
289
|
+
|
|
290
|
+
```python
|
|
291
|
+
from elasticdash_test import ai_test, before_all, after_all, install_ai_interceptor, uninstall_ai_interceptor, expect
|
|
292
|
+
|
|
293
|
+
@before_all
|
|
294
|
+
def setup():
|
|
295
|
+
install_ai_interceptor()
|
|
296
|
+
|
|
297
|
+
@after_all
|
|
298
|
+
def teardown():
|
|
299
|
+
uninstall_ai_interceptor()
|
|
300
|
+
|
|
301
|
+
@ai_test("user lookup flow")
|
|
302
|
+
async def test_user_lookup(ctx):
|
|
303
|
+
# This makes a real OpenAI call — intercepted automatically
|
|
304
|
+
await my_workflow.run("Find all active users")
|
|
305
|
+
|
|
306
|
+
# Works without any ctx.trace.record_llm_step() in your workflow
|
|
307
|
+
expect(ctx.trace).to_have_llm_step(prompt_contains="Find all active users")
|
|
308
|
+
expect(ctx.trace).to_have_llm_step(provider="openai")
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
**Streaming:** When `stream=True` is set on a request, the completion is recorded as `"(streamed)"` — the prompt and model are still captured.
|
|
312
|
+
|
|
313
|
+
### Recording trace steps without passing `ctx.trace` (contextvars)
|
|
314
|
+
|
|
315
|
+
The runner sets a per-test `current_trace` using Python's `contextvars`, so your app code can record steps without threading `ctx.trace` through every function:
|
|
316
|
+
|
|
317
|
+
```python
|
|
318
|
+
# In your test
|
|
319
|
+
from elasticdash_test import ai_test, set_current_trace, expect
|
|
320
|
+
|
|
321
|
+
@ai_test("flow test")
|
|
322
|
+
async def test_flow(ctx):
|
|
323
|
+
set_current_trace(ctx.trace) # bind the trace to the current async context
|
|
324
|
+
await run_flow_without_trace_arg() # your existing code
|
|
325
|
+
expect(ctx.trace).to_have_custom_step(kind="rag", name="pokemon-search")
|
|
326
|
+
|
|
327
|
+
# In your app/flow code (called during the test)
|
|
328
|
+
from elasticdash_test import get_current_trace
|
|
329
|
+
|
|
330
|
+
async def run_flow_without_trace_arg():
|
|
331
|
+
trace = get_current_trace()
|
|
332
|
+
if trace:
|
|
333
|
+
trace.record_custom_step(
|
|
334
|
+
kind="rag",
|
|
335
|
+
name="pokemon-search",
|
|
336
|
+
payload={"query": "pikachu attack"},
|
|
337
|
+
result={"ids": [25]},
|
|
338
|
+
tags=["source:db", "sort:asc"],
|
|
339
|
+
)
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
---
|
|
343
|
+
|
|
344
|
+
## Configuration
|
|
345
|
+
|
|
346
|
+
Create an optional `elasticdash.config.py` at the project root:
|
|
347
|
+
|
|
348
|
+
```python
|
|
349
|
+
config = {
|
|
350
|
+
"test_match": ["**/*.ai_test.py"],
|
|
351
|
+
"trace_mode": "local",
|
|
352
|
+
}
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
| Option | Default | Description |
|
|
356
|
+
|---|---|---|
|
|
357
|
+
| `test_match` | `['**/*.ai_test.py']` | Glob patterns for test discovery |
|
|
358
|
+
| `trace_mode` | `'local'` | `'local'` (stub) or `'remote'` (future ElasticDash backend) |
|
|
359
|
+
|
|
360
|
+
### `ed_agents.py`, `ed_workflows.py`, `ed_tools.py`
|
|
361
|
+
|
|
362
|
+
These optional files are thin wrappers that bundle and re-export existing functions from your codebase. Load them automatically during test runs to provide agents, workflows, and tools to your test environment.
|
|
363
|
+
|
|
364
|
+
#### `ed_agents.py`
|
|
365
|
+
|
|
366
|
+
Re-export agent functions or create a config dict for easy reference:
|
|
367
|
+
|
|
368
|
+
```python
|
|
369
|
+
# ed_agents.py — import from your app
|
|
370
|
+
from my_app.agents import checkout_agent, payment_agent
|
|
371
|
+
|
|
372
|
+
config = {
|
|
373
|
+
"checkout": checkout_agent,
|
|
374
|
+
"payment": payment_agent,
|
|
375
|
+
}
|
|
376
|
+
```
|
|
377
|
+
|
|
378
|
+
Access in tests:
|
|
379
|
+
|
|
380
|
+
```python
|
|
381
|
+
@ai_test("checkout flow")
|
|
382
|
+
async def test_checkout(ctx, config):
|
|
383
|
+
agents = config.get("agents", {})
|
|
384
|
+
result = await agents["checkout"]("order-123")
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
#### `ed_workflows.py`
|
|
388
|
+
|
|
389
|
+
Re-export workflow functions from your application:
|
|
390
|
+
|
|
391
|
+
```python
|
|
392
|
+
# ed_workflows.py
|
|
393
|
+
from my_app.workflows import order_workflow, refund_workflow
|
|
394
|
+
|
|
395
|
+
# Re-export directly — the runner will import this module
|
|
396
|
+
```
|
|
397
|
+
|
|
398
|
+
Access in tests:
|
|
399
|
+
|
|
400
|
+
```python
|
|
401
|
+
@ai_test("full order workflow")
|
|
402
|
+
async def test_workflow(ctx):
|
|
403
|
+
from ed_workflows import order_workflow
|
|
404
|
+
result = await order_workflow("order-123", "cust-456")
|
|
405
|
+
expect(ctx.trace).to_call_tool("chargeCard")
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
#### `ed_tools.py`
|
|
409
|
+
|
|
410
|
+
Re-export tool functions that agents or workflows can invoke:
|
|
411
|
+
|
|
412
|
+
```python
|
|
413
|
+
# ed_tools.py
|
|
414
|
+
from my_app.tools import charge_card, fetch_order_status, send_notification
|
|
415
|
+
```
|
|
416
|
+
|
|
417
|
+
Access in tests or workflows:
|
|
418
|
+
|
|
419
|
+
```python
|
|
420
|
+
@ai_test("tool integration")
|
|
421
|
+
async def test_tools(ctx):
|
|
422
|
+
from ed_tools import fetch_order_status
|
|
423
|
+
status = await fetch_order_status("order-123")
|
|
424
|
+
expect(ctx.trace).to_have_custom_step(kind="external", name="fetch_order_status")
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
These files are loaded automatically if present in the project root.
|
|
428
|
+
|
|
429
|
+
## Workflows Dashboard
|
|
430
|
+
|
|
431
|
+
Browse and search all available workflow functions in your project:
|
|
432
|
+
|
|
433
|
+
```bash
|
|
434
|
+
elasticdash dashboard # open dashboard at http://localhost:4573
|
|
435
|
+
elasticdash dashboard --port 4572 # use custom port
|
|
436
|
+
elasticdash dashboard --no-open # skip auto-opening browser
|
|
437
|
+
```
|
|
438
|
+
|
|
439
|
+
The dashboard scans `ed_workflows.py` and displays:
|
|
440
|
+
- **Function names** — all callable functions in the module
|
|
441
|
+
- **Signatures** — function parameters and return types
|
|
442
|
+
- **Async indicator** — marks async vs sync functions
|
|
443
|
+
- **Source module** — where the function is imported from (if not locally defined)
|
|
444
|
+
- **File path** — location of `ed_workflows.py`
|
|
445
|
+
|
|
446
|
+
Use the search field to filter workflows by:
|
|
447
|
+
- **Name** — find workflow by function name (e.g., `checkout_flow`)
|
|
448
|
+
- **Source module** — find all workflows from a specific module (e.g., `app_workflows`)
|
|
449
|
+
- **File path** — filter by location in your codebase
|
|
450
|
+
|
|
451
|
+
This is useful for discovering available workflows, understanding their signatures, and identifying where functions are defined before calling them in tests.
|
|
452
|
+
|
|
453
|
+
## Project Structure
|
|
454
|
+
|
|
455
|
+
```
|
|
456
|
+
elasticdash_test/
|
|
457
|
+
cli.py CLI entry point (click + glob)
|
|
458
|
+
runner.py Sequential test runner engine
|
|
459
|
+
reporter.py Color-coded terminal output
|
|
460
|
+
registry.py ai_test / before_all / after_all registry
|
|
461
|
+
trace.py TraceHandle, AITestContext, contextvars support
|
|
462
|
+
matchers.py Custom expect matchers
|
|
463
|
+
interceptors/
|
|
464
|
+
ai_interceptor.py Automatic httpx/requests interceptor for OpenAI / Gemini / Grok
|
|
465
|
+
```
|
|
466
|
+
|
|
467
|
+
---
|
|
468
|
+
|
|
469
|
+
## Programmatic API
|
|
470
|
+
|
|
471
|
+
```python
|
|
472
|
+
from elasticdash_test import install_ai_interceptor, uninstall_ai_interceptor
|
|
473
|
+
from elasticdash_test.runner import run_files
|
|
474
|
+
from elasticdash_test.reporter import print_results
|
|
475
|
+
|
|
476
|
+
install_ai_interceptor() # patch httpx/requests for automatic LLM tracing
|
|
477
|
+
|
|
478
|
+
results = await run_files(["./tests/flow.ai_test.py"])
|
|
479
|
+
print_results(results)
|
|
480
|
+
|
|
481
|
+
uninstall_ai_interceptor() # restore original transports when done
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
---
|
|
485
|
+
|
|
486
|
+
## Non-Goals
|
|
487
|
+
|
|
488
|
+
This runner intentionally does not support:
|
|
489
|
+
|
|
490
|
+
- Parallel execution
|
|
491
|
+
- Watch mode
|
|
492
|
+
- Snapshot testing
|
|
493
|
+
- Coverage reporting
|
|
494
|
+
- pytest compatibility
|
|
495
|
+
|
|
496
|
+
---
|
|
497
|
+
|
|
498
|
+
## License
|
|
499
|
+
|
|
500
|
+
MIT
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
elasticdash_test/__init__.py,sha256=-NioHH3beMyZtbZjRIDu-R9N4IgwsZLE7rdUmsahA1c,834
|
|
2
|
+
elasticdash_test/browser_ui.py,sha256=GawLSFIoEDJYDx_EVVZERgSbyntN1nKaJl81kLg9GoA,6483
|
|
3
|
+
elasticdash_test/cli.py,sha256=WL1LVTycrv1QCvwpvdsUxztal2Mzz_aTft6ig8XdQ9E,16992
|
|
4
|
+
elasticdash_test/dashboard.py,sha256=tCynZF3EguNu5l6sM4hSFt4NtJz_ld9EfYlpr5VVcbY,38542
|
|
5
|
+
elasticdash_test/matchers.py,sha256=dKlnzKDhnL79x2eY32rqyf8__Vs6-WcZtM25HJ-T0jA,12908
|
|
6
|
+
elasticdash_test/observability.py,sha256=QnPz2wgcLkgIViNFt_8uZZvTSKFKBAsieAA7qjJJocI,6734
|
|
7
|
+
elasticdash_test/portal_executor.py,sha256=NcaYL8cFCTjQwk0KuoKr7AYzdDOOY91iE8ObPyprbWM,9667
|
|
8
|
+
elasticdash_test/portal_server.py,sha256=zEVSBmBCRk5aPymMXYC1Qyk3RR-Bb2GHzKYJTZlsqfI,6258
|
|
9
|
+
elasticdash_test/registry.py,sha256=GJ7g4F7wxrWhzCcgHJdV8NWVmdejVzV8zilYEHPF34c,3374
|
|
10
|
+
elasticdash_test/reporter.py,sha256=w1YZMUz68le3sSafcNnpzDVTTXERV9qNmXxPKK0NnlA,2651
|
|
11
|
+
elasticdash_test/runner.py,sha256=ps7y-dNaFZggwI_xEHPEKwzy1D2aXdYV4xoAQCrW74A,4096
|
|
12
|
+
elasticdash_test/socket_connector.py,sha256=1kZDSsU4dL8CIs6k9Rzhuj0Yy2MoYdi92Ii-nJw-Mr0,2642
|
|
13
|
+
elasticdash_test/telemetry_batcher.py,sha256=mbvcU0V-pmxMgPlvzBeasTVyZN6S4VeZ3xVpsY_80q8,6720
|
|
14
|
+
elasticdash_test/tool_registry.py,sha256=RviGd-NtkeAhgN4253S0Wb7Pyfclbs8uZ6c-zsuHnZQ,3157
|
|
15
|
+
elasticdash_test/trace.py,sha256=e7eo4FW6_QKqcg-Qrd3F1biC6xuzWu4zSzJLTuRIy-4,2403
|
|
16
|
+
elasticdash_test/tracing.py,sha256=ga5eU7Sq9P_bovbQIjqbmCvLpIjQu9stJM03zXIWNB0,1342
|
|
17
|
+
elasticdash_test/trigger_executor.py,sha256=xugvTyT2daWKmcia_Tf-Sfag8HYYdSgsoFOQsthzIUg,3530
|
|
18
|
+
elasticdash_test/workflow_runner.py,sha256=xaeP2CAhI1V39LG-KCeZGidI6vnsJXYMD-5OZxoteIM,2354
|
|
19
|
+
elasticdash_test/capture/__init__.py,sha256=n6Zf778m0uUvCvAJYt4f6JOHjsGb-ROXrOoCznFP6y0,409
|
|
20
|
+
elasticdash_test/capture/event.py,sha256=G2Xf_LTYzJb-eZPik3mkSzwVY_lg1wiEx7E-HOAZ01o,953
|
|
21
|
+
elasticdash_test/capture/recorder.py,sha256=5FrwtjevmLYJdeo_6cfxLqJCJowNDFFLh4oirdnr2T0,2609
|
|
22
|
+
elasticdash_test/capture/replay.py,sha256=OfGLvqNcoQjPSLqPgK4b6NjyifH3mLiS6BeB_5PlqNQ,2673
|
|
23
|
+
elasticdash_test/ci/__init__.py,sha256=HKqcnFkRLMK33tL99jZcQ1yPOYksl7OotGn46C6X0xM,510
|
|
24
|
+
elasticdash_test/ci/api_client.py,sha256=GC5yEcfuVSFbKWZVxwiqSQ0uqqFm3HRhjmOYFC3rXK8,4132
|
|
25
|
+
elasticdash_test/ci/benchmark.py,sha256=-uv_uF9RFTdtENvoAtmp5a0ha-TioewBkOSmPCh3V9I,7352
|
|
26
|
+
elasticdash_test/ci/ed_runner.py,sha256=GJjfbFDE9roqTKO28nGBdcsUs0VfwPIqaGNFGPm5Ogk,6295
|
|
27
|
+
elasticdash_test/ci/executor.py,sha256=Td2E4vUB-PHA9XHD7J_-EsJaBWSSpNjeg3HfRrOUbYw,13088
|
|
28
|
+
elasticdash_test/ci/git_info.py,sha256=5dQpZ2snFV3hp8uC02hDy25GI4l6DfbPj256RkeYg00,4228
|
|
29
|
+
elasticdash_test/ci/measurement.py,sha256=lWOPQhO7rdGge1LnSG8Bga3SL2d0ERB_tMq264xc0hA,651
|
|
30
|
+
elasticdash_test/ci/replay.py,sha256=qIQY29ok2g__82gCwaJMMnEprG2p8WiUlGrnLa9w8Ls,3009
|
|
31
|
+
elasticdash_test/ci/runner.py,sha256=NVJ7bdAxQjGyjKo8OVZMIDnLrAOd4PJmLqkAhILEQC0,4163
|
|
32
|
+
elasticdash_test/ci/test_discovery.py,sha256=ymxtZcR-IV2NTb1mbDAVG_UuCKVoImGIhgnLO9FFFYE,759
|
|
33
|
+
elasticdash_test/ci/test_loader.py,sha256=RcIBu6qv6LBQHFhImv8C7soU4bRpOeEZ0O8paIEWMmM,3287
|
|
34
|
+
elasticdash_test/ci/test_registry.py,sha256=gMEcIacMFoIOceEiVpZI3aR0Kh1JGbVakuD_SG36tLo,913
|
|
35
|
+
elasticdash_test/ci/trace_schema.py,sha256=CD6i2zYEwpk-dhJUVYFWx6A-iCQs2Dafq0t4wbmdRHA,2614
|
|
36
|
+
elasticdash_test/ci/trace_writer.py,sha256=ZUMluX8pXqSEUyQTS-LHgSeZfAov8u5Pkuf1skp6VL0,1319
|
|
37
|
+
elasticdash_test/ci/types.py,sha256=rSdtQ96U0eYks0BbT5QhIXk6afa1z8jHudLUinFn6Dw,4864
|
|
38
|
+
elasticdash_test/ci/upload_client.py,sha256=xtGAKkKt82UXMNFNg1M7mgqy9NkKUiarIimeiuL8Kp0,4064
|
|
39
|
+
elasticdash_test/ci/reporters/__init__.py,sha256=xIobkO-GXwu_ZblgSo-DDM9-i3mgTRy_QWhL1XELMY4,856
|
|
40
|
+
elasticdash_test/ci/reporters/default.py,sha256=zeidIOhJocjBZNHiOzBM0MkFgK8fPB4VjhUAuRsPZoM,1423
|
|
41
|
+
elasticdash_test/ci/reporters/json_reporter.py,sha256=-hSS9Ni4P_gJDnVc9UU6k0fLUSQE2u7KbYMZ6rE5Sfc,612
|
|
42
|
+
elasticdash_test/ci/reporters/junit.py,sha256=hK3CE63SlF2yjca3VlljgUu7zLijcjN9fdywfxIu_Ac,1613
|
|
43
|
+
elasticdash_test/core/__init__.py,sha256=UPh2la5hMoc41Cplqu5Vkp3eMAaOx1tH9SB2ZXArgpA,541
|
|
44
|
+
elasticdash_test/core/agent_state.py,sha256=AKaeHcqDrFmP1TWLOG4YZtuEdvc_jD4rB4YgvgWVQM4,5697
|
|
45
|
+
elasticdash_test/core/judge_utils.py,sha256=H480cIvvAKkpHLIEdgMTP2Vwzyq_7Kc9CVpiSM-1D4c,6591
|
|
46
|
+
elasticdash_test/execution/__init__.py,sha256=MHsJ6WAP3TTxZSsx0BQ3AtKsySoONYQPOuMAs-H00R4,392
|
|
47
|
+
elasticdash_test/execution/tool_runner.py,sha256=fbpEHBsdle0D9un7MJGvQDNVPLkdz1k6KqYg2tsEoVs,6807
|
|
48
|
+
elasticdash_test/interceptors/__init__.py,sha256=8Ik3Nvoi0XWSko3_I6XPdR_SZfx1Ot72ovk_n2GNo2w,1764
|
|
49
|
+
elasticdash_test/interceptors/ai_interceptor.py,sha256=he11MK5cpMPtpVrHRaLyqgXTet7hiSub7GMw6HxliLA,16821
|
|
50
|
+
elasticdash_test/interceptors/db.py,sha256=rs66SOYNsCyJk5wCJCVOGFohPHVGVJeKWDIBZHyizfY,6764
|
|
51
|
+
elasticdash_test/interceptors/db_auto.py,sha256=EvjVAmUSsmiGCz5bH0ul9T5h5_Rcjgg-QoMM7Sa94Fw,5881
|
|
52
|
+
elasticdash_test/interceptors/http.py,sha256=yjKxAlky0dEpQZ6RrTAtRuTVrrGfInsV4Lfy8N-jONA,8112
|
|
53
|
+
elasticdash_test/interceptors/side_effects.py,sha256=USOOJiNt645EY53lWKblX2iEFogmqbUHA_5xrch_aQk,4294
|
|
54
|
+
elasticdash_test/interceptors/telemetry_push.py,sha256=arNXp5KjfSfUbwtWh1N1p15427ndElVuOjffCEurMJU,6971
|
|
55
|
+
elasticdash_test/interceptors/tool.py,sha256=KzjhpgEHiGAdgl5n7VSOxqf_NPhTlI2lYQlCazVxHAM,6649
|
|
56
|
+
elasticdash_test/interceptors/workflow_ai.py,sha256=cDYl1vfjh1TmXEvc6ti38yQK7GBtVSwks4zYB6o-pVU,6200
|
|
57
|
+
elasticdash_test/proxy/__init__.py,sha256=8nL8R5kEg2K4Wbb_X_rEWEZAyFjaQPdkZ50OECWGaYU,176
|
|
58
|
+
elasticdash_test/proxy/llm_capture.py,sha256=0hFkuuLoZbPaMrLT997c5-MjSzbkmm5jDszt12PfhN8,6030
|
|
59
|
+
elasticdash_test/utils/__init__.py,sha256=aHhNPw0ecJ58ZpuAOZYQ5T0k2BpPE2tb4DTioTrlv4M,178
|
|
60
|
+
elasticdash_test/utils/debug.py,sha256=gLmOg9tJl-6Q6hwmahcvQXidJBH_pIiAY2j1R_-K3m8,886
|
|
61
|
+
elasticdash_test/utils/redact.py,sha256=NVfrqiwU8jM7Dk7U886na9mceX0EVTzb2woaZUUZ4oc,1147
|
|
62
|
+
elasticdash_sdk-0.1.2a2.dist-info/METADATA,sha256=ggbn1583f6tQijRNpxY_IcpjfxISKVM972UzhnMfWwQ,16301
|
|
63
|
+
elasticdash_sdk-0.1.2a2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
64
|
+
elasticdash_sdk-0.1.2a2.dist-info/entry_points.txt,sha256=Qd-5AP5vimmIVpUlmzHz4Itg24Cvu3ztTBhcHMmRm6g,58
|
|
65
|
+
elasticdash_sdk-0.1.2a2.dist-info/licenses/LICENSE,sha256=1mOjzaqLsqw8szofGZHOx-Z6pNvBAKoJUNaHeA7XZ3I,1068
|
|
66
|
+
elasticdash_sdk-0.1.2a2.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 ElasticDash
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Elasticdash AI test framework Python port."""
|
|
2
|
+
|
|
3
|
+
from .registry import ai_test, before_all, after_all, before_each, after_each, clear_registry, get_registry
|
|
4
|
+
from .matchers import expect
|
|
5
|
+
from .trace import (
|
|
6
|
+
LLMStep,
|
|
7
|
+
ToolCall,
|
|
8
|
+
CustomStep,
|
|
9
|
+
TraceHandle,
|
|
10
|
+
AITestContext,
|
|
11
|
+
set_current_trace,
|
|
12
|
+
get_current_trace,
|
|
13
|
+
)
|
|
14
|
+
from .interceptors.ai_interceptor import install_ai_interceptor, uninstall_ai_interceptor
|
|
15
|
+
from .tool_registry import ed_tool
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"ai_test",
|
|
19
|
+
"before_all",
|
|
20
|
+
"after_all",
|
|
21
|
+
"before_each",
|
|
22
|
+
"after_each",
|
|
23
|
+
"expect",
|
|
24
|
+
"LLMStep",
|
|
25
|
+
"ToolCall",
|
|
26
|
+
"CustomStep",
|
|
27
|
+
"TraceHandle",
|
|
28
|
+
"AITestContext",
|
|
29
|
+
"set_current_trace",
|
|
30
|
+
"get_current_trace",
|
|
31
|
+
"install_ai_interceptor",
|
|
32
|
+
"uninstall_ai_interceptor",
|
|
33
|
+
"clear_registry",
|
|
34
|
+
"get_registry",
|
|
35
|
+
"ed_tool",
|
|
36
|
+
]
|