elasticdash-sdk 0.2.6 → 0.2.7-beta-2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +133 -26
- package/dist/dashboard-server.d.ts.map +1 -1
- package/dist/dashboard-server.js +76 -3
- package/dist/dashboard-server.js.map +1 -1
- package/dist/execution/tool-runner.d.ts.map +1 -1
- package/dist/execution/tool-runner.js +66 -5
- package/dist/execution/tool-runner.js.map +1 -1
- package/dist/index.cjs +57 -6
- package/dist/tool-runner-worker.js +27 -2
- package/dist/tool-runner-worker.js.map +1 -1
- package/dist/trigger-executor.d.ts.map +1 -1
- package/dist/trigger-executor.js +3 -1
- package/dist/trigger-executor.js.map +1 -1
- package/dist/workflow-runner-worker.js +24 -0
- package/dist/workflow-runner-worker.js.map +1 -1
- package/docs/agent-coding-instructions.md +8 -5
- package/docs/agent-integration-guide.md +158 -16
- package/docs/partial-mocking.md +10 -4
- package/docs/workflow-modes.md +6 -7
- package/package.json +1 -1
- package/src/dashboard-server.ts +71 -3
- package/src/execution/tool-runner.ts +62 -5
- package/src/tool-runner-worker.ts +22 -2
- package/src/trigger-executor.ts +3 -1
- package/src/workflow-runner-worker.ts +23 -0
package/README.md
CHANGED
|
@@ -46,28 +46,44 @@ npm install elasticdash-sdk
|
|
|
46
46
|
|
|
47
47
|
**Requirements:** Node 20+. For Deno projects, see [Using elasticdash-sdk in Deno](docs/deno.md).
|
|
48
48
|
|
|
49
|
-
### Setup with a Coding Agent
|
|
49
|
+
### Setup with a Coding Agent (required after install)
|
|
50
50
|
|
|
51
|
-
|
|
51
|
+
`npm install` alone wires nothing — the SDK only starts capturing traces once `ed_tools.ts` and `ed_workflows.ts` exist and the init call runs. Use a coding agent to do that wiring. The smoothest path is two extra steps after install:
|
|
52
52
|
|
|
53
|
+
**Step 2 — bake the integration guide into your project so the coding agent always sees it:**
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
npx elasticdash init-guide --target CLAUDE.md # Claude Code
|
|
57
|
+
npx elasticdash init-guide --target AGENTS.md # default — works for Codex, Windsurf, others
|
|
58
|
+
npx elasticdash init-guide --target .cursor/rules/elasticdash.md # Cursor
|
|
59
|
+
npx elasticdash init-guide --target .github/copilot-instructions.md # Copilot
|
|
53
60
|
```
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
61
|
+
|
|
62
|
+
If the target file already exists, the guide is appended (not overwritten). Use `--force` to replace the file entirely. Pick the target your agent actually reads; one file is enough.
|
|
63
|
+
|
|
64
|
+
**Step 3 — tell your coding agent:**
|
|
65
|
+
|
|
66
|
+
```
|
|
67
|
+
Complete the elasticdash-sdk integration following the guide that was just added to this project.
|
|
57
68
|
```
|
|
58
69
|
|
|
59
|
-
|
|
70
|
+
That's it. The agent reads the baked-in guide (which transcludes the same content as `node_modules/elasticdash-sdk/docs/agent-coding-instructions.md` and `agent-integration-guide.md`), then creates `ed_tools.ts`, `ed_workflows.ts`, calls `edInitObservability` from the entry point, updates source files to route tool calls through `ed_tools`, and validates the connection.
|
|
60
71
|
|
|
61
|
-
**
|
|
72
|
+
> **Do not shortcut this step.** Without `ed_tools.ts` and `ed_workflows.ts` plus the init call, the SDK does not intercept tool or AI calls — your project will run without errors and produce zero traces. A vague prompt like "install elasticdash-sdk" lets the agent stop at `npm install`; the prompt above is explicit about completing integration.
|
|
62
73
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
74
|
+
> **Init must go through `edInitObservability` (the helper inside `ed_workflows.ts`), not `import { initObservability } from 'elasticdash-sdk'` in your entry file.** Both files in the integration share one CJS module instance via `createRequire(import.meta.url)`; importing `initObservability` directly hits a *different* ESM instance, leaving `_ed.startTrace` reading from an empty store. The symptom is `[elasticdash] startTrace: observability not initialised` at runtime. The integration guide's Step 3 explains why; the `edInitObservability` helper is the only correct path. For CLI scripts, also call `edShutdownObservability()` from a `finally` block at process exit — the SDK's auto-registered exit hooks are async and short-lived processes can terminate before the final batch flushes.
|
|
75
|
+
|
|
76
|
+
> **Important: do not use `eval('require')` to load the SDK in `ed_tools.ts`.** The `eval('require')(...)` trick that older versions of this guide recommended works only in CJS — in any project with `"type": "module"` in `package.json`, it throws "require is not defined", the catch silently swallows the error, and the entire integration no-ops with zero logs and zero traces. Use `createRequire(import.meta.url)` from `node:module` instead; it works in both ESM and CJS.
|
|
77
|
+
|
|
78
|
+
**Fallback** — if you don't want to add a file to your repo, you can skip `init-guide` and use this prompt instead, which directs the agent at the docs inside `node_modules/`:
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
Integrate elasticdash-sdk into this project.
|
|
82
|
+
Read node_modules/elasticdash-sdk/docs/agent-coding-instructions.md for how to proceed,
|
|
83
|
+
and node_modules/elasticdash-sdk/docs/agent-integration-guide.md for technical reference.
|
|
68
84
|
```
|
|
69
85
|
|
|
70
|
-
|
|
86
|
+
This works but is more fragile — relies on the agent following the doc-reading instruction literally, and breaks if a different agent picks up the project later without the same prompt.
|
|
71
87
|
|
|
72
88
|
### Cloud Setup
|
|
73
89
|
|
|
@@ -235,20 +251,46 @@ expect(ctx.trace).toHaveCustomStep({ kind: 'rag', name: 'pokemon-search' })
|
|
|
235
251
|
### AI Interception
|
|
236
252
|
|
|
237
253
|
The runner automatically intercepts and records calls to:
|
|
254
|
+
- Anthropic (`api.anthropic.com`)
|
|
238
255
|
- OpenAI (`api.openai.com`)
|
|
239
256
|
- Gemini (`generativelanguage.googleapis.com`)
|
|
240
257
|
- Grok/xAI (`api.x.ai`)
|
|
241
258
|
|
|
242
|
-
No code changes needed — just run your workflow and assertions work automatically.
|
|
259
|
+
No code changes needed — just run your workflow and assertions work automatically. Because these providers are auto-captured, most workflows do **not** need to wrap LLM calls with `wrapAI`. See [Picking a wrapper](#picking-a-wrapper) below.
|
|
260
|
+
|
|
261
|
+
### Picking a wrapper
|
|
262
|
+
|
|
263
|
+
The SDK exposes three wrappers that look similar but solve different problems. Pick by what your function actually does:
|
|
264
|
+
|
|
265
|
+
| Your function is… | Use | Why |
|
|
266
|
+
|---|---|---|
|
|
267
|
+
| Deterministic (REST call, DB query, file IO — no LLM inside) | **`edTool`** | Records as a `tool` event AND registers in the global tool registry so CLI `run-tool`, MCP `run_tool`, and dashboard rerun can find it by name. |
|
|
268
|
+
| Exactly one LLM round-trip, AND you need prompt mocks, AI output mocks by name, OR the provider isn't auto-intercepted | **`wrapAI`** | Records as an `ai` event with token usage. Only `wrapAI` supports prompt rewriting (`resolvePromptMock` / `resolveUserPromptMock`) and named AI output mocks. |
|
|
269
|
+
| An agent loop (LLM + inner tools, multiple round-trips) | **`edTool`** on the outer boundary | The inner LLM calls are auto-captured by the AI interceptor. Wrapping the outer agent with `wrapAI` would hide the inner detail. |
|
|
270
|
+
| A direct single call to an auto-intercepted provider SDK (Anthropic / OpenAI / Gemini / Grok) | **No wrapper** | The AI interceptor already records it as an `ai` event with token usage. |
|
|
271
|
+
|
|
272
|
+
> **`wrapTool`** is the primitive that `edTool` builds on. Use `wrapTool` directly only when you specifically do not want registry registration — for example, wrapping an inline closure inside another function.
|
|
243
273
|
|
|
244
274
|
### Tool Recording
|
|
245
275
|
|
|
246
|
-
**Recommended: `
|
|
276
|
+
**Recommended: `edTool`** wraps a tool function (recording its name, input, output, duration, and any streaming output) *and* registers it in a global tool registry so it can be invoked by name from the CLI (`npx elasticdash run-tool <name>`), the MCP `run_tool`, and dashboard rerun:
|
|
247
277
|
|
|
248
278
|
```ts
|
|
249
|
-
import {
|
|
279
|
+
import { edTool } from 'elasticdash-sdk'
|
|
250
280
|
import { runSelectQuery } from './services/dataService'
|
|
251
281
|
|
|
282
|
+
export const dataService = edTool('dataService', async (input: { query: string }) => {
|
|
283
|
+
return await runSelectQuery(input.query)
|
|
284
|
+
})
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
Same event shape as `wrapTool` (`type: 'tool'`), so the existing tool-mock pipeline (`snapshot_mock_profile`, `mocked_tools_overrides`, strict mode) works unchanged. `defineTool` is an exported alias of `edTool`.
|
|
288
|
+
|
|
289
|
+
**Lower-level: `wrapTool`** — same tracing behavior without the registry registration. Use this only when you have a specific reason to keep the tool unregistered (e.g., a closure created inside another function):
|
|
290
|
+
|
|
291
|
+
```ts
|
|
292
|
+
import { wrapTool } from 'elasticdash-sdk'
|
|
293
|
+
|
|
252
294
|
export const dataService = wrapTool('dataService', async (input: { query: string }) => {
|
|
253
295
|
return await runSelectQuery(input.query)
|
|
254
296
|
})
|
|
@@ -287,9 +329,37 @@ In manual mode, always isolate tracing in a separate `try/catch` so trace loggin
|
|
|
287
329
|
|
|
288
330
|
**→ See [Tool Recording & Replay](docs/tools.md) for checkpoint-based replay and freezing**
|
|
289
331
|
|
|
332
|
+
### Agent-loop pattern
|
|
333
|
+
|
|
334
|
+
If your "tool" is actually an agent — a function that calls an LLM and may iterate through tool-use blocks — wrap the outer boundary with `edTool`, **not** `wrapAI`. The AI interceptor will auto-record each inner LLM call as a separate `ai` event nested under the trace:
|
|
335
|
+
|
|
336
|
+
```ts
|
|
337
|
+
import { edTool } from 'elasticdash-sdk'
|
|
338
|
+
import Anthropic from '@anthropic-ai/sdk'
|
|
339
|
+
|
|
340
|
+
const client = new Anthropic()
|
|
341
|
+
|
|
342
|
+
async function runSearchAgent(input: { query: string }) {
|
|
343
|
+
// Agent loop: each iteration produces its own auto-recorded `ai` event
|
|
344
|
+
while (true) {
|
|
345
|
+
const res = await client.messages.create({
|
|
346
|
+
model: 'claude-sonnet-4-5-20250929',
|
|
347
|
+
max_tokens: 1024,
|
|
348
|
+
messages: [/* ... */],
|
|
349
|
+
})
|
|
350
|
+
if (res.stop_reason === 'end_turn') return res
|
|
351
|
+
// ... handle tool_use blocks, append tool_result, loop
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
export const search = edTool('search', runSearchAgent)
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
Wrapping `runSearchAgent` with `wrapAI` instead would record one `ai` event covering the whole loop and hide the per-iteration calls. `edTool` keeps the agent visible as a single named, rerunnable, mockable boundary while leaving inner LLM detail intact for assertions and replay.
|
|
359
|
+
|
|
290
360
|
### AI Call Recording
|
|
291
361
|
|
|
292
|
-
**`wrapAI`** wraps
|
|
362
|
+
**`wrapAI`** wraps a **single** LLM call and records it as a `type: 'ai'` event with name, input, output, duration, and token usage (auto-detected for Anthropic, OpenAI, and Gemini SDK responses):
|
|
293
363
|
|
|
294
364
|
```ts
|
|
295
365
|
import { wrapAI } from 'elasticdash-sdk'
|
|
@@ -306,7 +376,20 @@ export const callClaude = wrapAI('claude-sonnet-4-5', async (messages: Anthropic
|
|
|
306
376
|
})
|
|
307
377
|
```
|
|
308
378
|
|
|
309
|
-
Use `wrapAI` when
|
|
379
|
+
#### Use `wrapAI` when
|
|
380
|
+
|
|
381
|
+
The function body is essentially one LLM round-trip, AND at least one of the following applies:
|
|
382
|
+
|
|
383
|
+
- The provider is **not auto-intercepted** (anything outside Anthropic / OpenAI / Gemini / Grok — e.g., Mistral, Cohere, local Ollama, Bedrock).
|
|
384
|
+
- You want **prompt mocks** — system or user prompt rewriting via `resolvePromptMock` / `resolveUserPromptMock` keyed by the name you pass to `wrapAI`. This is exclusive to `wrapAI`.
|
|
385
|
+
- You want **AI output mocks keyed by a named step** — e.g., mock the `"router"` call without mocking every call to the same model. `resolveAIMock` keys off the name argument.
|
|
386
|
+
- You want **one labelled boundary per logical step** in the trace (e.g., `"router"`, `"summarizer"`) with token usage attributed to that label, distinct from the raw provider-level event.
|
|
387
|
+
|
|
388
|
+
#### Do NOT use `wrapAI` when
|
|
389
|
+
|
|
390
|
+
- The function is an **agent loop** (LLM + inner tool calls, multiple round-trips). Use `edTool` on the outer boundary and let the AI interceptor record each inner LLM call. See [Agent-loop pattern](#agent-loop-pattern) above.
|
|
391
|
+
- The function is a **direct single-call use** of an auto-intercepted provider's SDK. The interceptor already records it as a `type: 'ai'` event with token usage — adding `wrapAI` only adds a redundant labelled wrapper.
|
|
392
|
+
- The function **does not call an LLM**. Use `edTool`.
|
|
310
393
|
|
|
311
394
|
**AI mocking (subprocess / test runner mode):** `wrapAI` also checks `resolveAIMock` at call time, so the dashboard can mock LLM responses the same way it mocks tool calls — without modifying your server code. Configure an `AIMockConfig` in the dashboard UI or pass it programmatically via the `aiMockConfig` option when running a workflow.
|
|
312
395
|
|
|
@@ -396,21 +479,25 @@ This file loads the SDK, shares the module instance with `ed_workflows.ts`, and
|
|
|
396
479
|
|
|
397
480
|
```ts
|
|
398
481
|
// ed_tools.ts
|
|
482
|
+
import { createRequire } from 'node:module';
|
|
399
483
|
import { setElasticDashModule } from './ed_workflows';
|
|
400
484
|
|
|
401
|
-
let
|
|
485
|
+
let edTool: <T extends (...args: any[]) => any>(name: string, fn: T) => T = (_name, fn) => fn;
|
|
486
|
+
|
|
487
|
+
// `createRequire(import.meta.url)` works in BOTH ESM (`"type": "module"`)
|
|
488
|
+
// and CJS projects. Do NOT use `eval('require')` — it silently throws in
|
|
489
|
+
// ESM and the whole integration produces zero traces with zero logs.
|
|
490
|
+
const nodeRequire = createRequire(import.meta.url);
|
|
402
491
|
|
|
403
492
|
try {
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
const _edModule = (eval('require') as (id: string) => any)('elasticdash-sdk');
|
|
407
|
-
wrapTool = _edModule.wrapTool ?? wrapTool;
|
|
493
|
+
const _edModule = nodeRequire('elasticdash-sdk');
|
|
494
|
+
edTool = _edModule.edTool ?? _edModule.wrapTool ?? edTool;
|
|
408
495
|
setElasticDashModule(_edModule);
|
|
409
|
-
} catch {
|
|
410
|
-
|
|
496
|
+
} catch (err) {
|
|
497
|
+
console.error('[ed_tools] failed to load elasticdash-sdk:', err);
|
|
411
498
|
}
|
|
412
499
|
|
|
413
|
-
export const myTool =
|
|
500
|
+
export const myTool = edTool('myTool', async (input: { query: string }) => {
|
|
414
501
|
// ... your tool logic
|
|
415
502
|
});
|
|
416
503
|
```
|
|
@@ -711,6 +798,26 @@ npx elasticdash dashboard --port 5000
|
|
|
711
798
|
|
|
712
799
|
Optional project file: `ed_workers.ts` can be used by your app architecture (for example, exporting worker handlers), but it is not required or discovered by the ElasticDash CLI/dashboard.
|
|
713
800
|
|
|
801
|
+
### Debugging reruns
|
|
802
|
+
|
|
803
|
+
Workflow and tool reruns each run in an isolated subprocess. When a rerun hangs, runs unexpectedly slow, or fails with an opaque error, set these environment variables to surface what the parent and the worker are doing:
|
|
804
|
+
|
|
805
|
+
| Variable | Default | Effect |
|
|
806
|
+
|---|---|---|
|
|
807
|
+
| `ELASTICDASH_DEBUG` | unset | When `1`, parent and worker emit stage breadcrumbs to stderr (`stage=spawned`, `stage=payload-written`, `stage=first-stdout`, `stage=workflow-call-start/end`, `stage=closed`, etc.) with `pid` and `elapsedMs`. |
|
|
808
|
+
| `ELASTICDASH_HEARTBEAT_MS` | `5000` | Interval (ms) for the parent to log `still running pid=… elapsedMs=…` while a subprocess is alive. Set `0` to disable. Only emitted when `ELASTICDASH_DEBUG=1`. |
|
|
809
|
+
| `ELASTICDASH_TOOL_TIMEOUT_MS` | unset (no timeout) | When set, the parent kills the **tool** subprocess after N ms (`SIGTERM`, then `SIGKILL` after a 2s grace) and surfaces `Tool subprocess timed out after Nms` with the child's exit code, signal, and last stderr. |
|
|
810
|
+
| `ELASTICDASH_WORKFLOW_TIMEOUT_MS` | unset (no timeout) | Same as above for the **workflow** subprocess. |
|
|
811
|
+
|
|
812
|
+
On failure, the parent's `error` string now always includes `[exit=… signal=… elapsedMs=… pid=… stderrBytes=…]` plus the last 1 KB of stderr — so an empty-output failure is no longer indistinguishable from a crash or signal kill.
|
|
813
|
+
|
|
814
|
+
Example:
|
|
815
|
+
|
|
816
|
+
```bash
|
|
817
|
+
ELASTICDASH_DEBUG=1 ELASTICDASH_HEARTBEAT_MS=2000 ELASTICDASH_TOOL_TIMEOUT_MS=30000 \
|
|
818
|
+
npx elasticdash dashboard
|
|
819
|
+
```
|
|
820
|
+
|
|
714
821
|
## TypeScript Setup
|
|
715
822
|
|
|
716
823
|
For typed globals and matchers, extend your test directory's `tsconfig.json`:
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dashboard-server.d.ts","sourceRoot":"","sources":["../src/dashboard-server.ts"],"names":[],"mappings":"AAeA,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,OAAO,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,QAAQ,EAAE,MAAM,CAAA;IAChB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,OAAO,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,QAAQ,EAAE,MAAM,CAAA;IAChB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,SAAS;IACxB,SAAS,EAAE,YAAY,EAAE,CAAA;IACzB,KAAK,EAAE,QAAQ,EAAE,CAAA;CAClB;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,QAAQ,CAAC,EAAE,OAAO,CAAA;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAA;IACX,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;CACvB;AA2CD,6DAA6D;AAC7D,MAAM,WAAW,aAAa;IAC5B,oHAAoH;IACpH,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,eAAe,CAAA;IAC3C,uEAAuE;IACvE,WAAW,CAAC,EAAE,MAAM,EAAE,CAAA;IACtB,wEAAwE;IACxE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,CAAA;CAClC;AAED,iEAAiE;AACjE,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,eAAe,CAAA;IAC3C,WAAW,CAAC,EAAE,MAAM,EAAE,CAAA;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC;AAED,MAAM,WAAW,YAAY;IAC3B,CAAC,SAAS,EAAE,MAAM,GAAG,WAAW,CAAA;CACjC;
|
|
1
|
+
{"version":3,"file":"dashboard-server.d.ts","sourceRoot":"","sources":["../src/dashboard-server.ts"],"names":[],"mappings":"AAeA,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,OAAO,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,QAAQ,EAAE,MAAM,CAAA;IAChB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,OAAO,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,QAAQ,EAAE,MAAM,CAAA;IAChB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,SAAS;IACxB,SAAS,EAAE,YAAY,EAAE,CAAA;IACzB,KAAK,EAAE,QAAQ,EAAE,CAAA;CAClB;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,QAAQ,CAAC,EAAE,OAAO,CAAA;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAA;IACX,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;CACvB;AA2CD,6DAA6D;AAC7D,MAAM,WAAW,aAAa;IAC5B,oHAAoH;IACpH,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,eAAe,CAAA;IAC3C,uEAAuE;IACvE,WAAW,CAAC,EAAE,MAAM,EAAE,CAAA;IACtB,wEAAwE;IACxE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,CAAA;CAClC;AAED,iEAAiE;AACjE,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,eAAe,CAAA;IAC3C,WAAW,CAAC,EAAE,MAAM,EAAE,CAAA;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CACnC;AAED,MAAM,WAAW,YAAY;IAC3B,CAAC,SAAS,EAAE,MAAM,GAAG,WAAW,CAAA;CACjC;AAg7GD,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,MAAM,CAAA;IACX,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAChC,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IACtC,cAAc,CAAC,EAAE,kBAAkB,GAAG,MAAM,CAAA;CAC7C;AA+ID;;GAEG;AACH,wBAAsB,oBAAoB,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,sBAA2B,GACnC,OAAO,CAAC,eAAe,CAAC,CA2d1B;AAiFD,eAAO,MAAM,aAAa,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAa,CAAC"}
|
package/dist/dashboard-server.js
CHANGED
|
@@ -288,6 +288,12 @@ function runToolInSubprocess(toolsModulePath, toolName, args) {
|
|
|
288
288
|
}
|
|
289
289
|
function runWorkflowInSubprocess(workflowsModulePath, toolsModulePath, workflowName, args, input, options) {
|
|
290
290
|
return new Promise((resolve) => {
|
|
291
|
+
const startMs = Date.now();
|
|
292
|
+
const elapsed = () => Date.now() - startMs;
|
|
293
|
+
const debug = (...a) => {
|
|
294
|
+
if (process.env.ELASTICDASH_DEBUG === '1')
|
|
295
|
+
console.error(...a);
|
|
296
|
+
};
|
|
291
297
|
const workerScript = new URL('./workflow-runner-worker.js', import.meta.url).pathname;
|
|
292
298
|
const projectDir = path.dirname(workflowsModulePath);
|
|
293
299
|
const denoProject = isDenoProject(projectDir);
|
|
@@ -304,13 +310,55 @@ function runWorkflowInSubprocess(workflowsModulePath, toolsModulePath, workflowN
|
|
|
304
310
|
cwd: projectDir,
|
|
305
311
|
stdio: ['pipe', 'pipe', 'pipe', 'pipe'],
|
|
306
312
|
});
|
|
313
|
+
const pid = child.pid ?? -1;
|
|
314
|
+
debug(`[elasticdash dashboard] workflow subprocess stage=spawned pid=${pid} elapsedMs=${elapsed()} workflow=${workflowName}`);
|
|
315
|
+
// Heartbeat — workflows can be long; without this the dashboard is blind.
|
|
316
|
+
// 0 disables. Default 5s.
|
|
317
|
+
const heartbeatMs = Number(process.env.ELASTICDASH_HEARTBEAT_MS ?? 5000);
|
|
318
|
+
const heartbeat = heartbeatMs > 0
|
|
319
|
+
? setInterval(() => {
|
|
320
|
+
debug(`[elasticdash dashboard] workflow subprocess heartbeat pid=${pid} elapsedMs=${elapsed()} workflow=${workflowName}`);
|
|
321
|
+
}, heartbeatMs)
|
|
322
|
+
: null;
|
|
323
|
+
// Optional kill switch. Default unset = no timeout (preserves prior behavior).
|
|
324
|
+
let timedOut = false;
|
|
325
|
+
const timeoutMs = Number(process.env.ELASTICDASH_WORKFLOW_TIMEOUT_MS ?? 0);
|
|
326
|
+
const timeout = timeoutMs > 0
|
|
327
|
+
? setTimeout(() => {
|
|
328
|
+
timedOut = true;
|
|
329
|
+
debug(`[elasticdash dashboard] workflow subprocess TIMEOUT pid=${pid} after ${timeoutMs}ms — sending SIGTERM`);
|
|
330
|
+
try {
|
|
331
|
+
child.kill('SIGTERM');
|
|
332
|
+
}
|
|
333
|
+
catch { /* already dead */ }
|
|
334
|
+
setTimeout(() => {
|
|
335
|
+
try {
|
|
336
|
+
child.kill('SIGKILL');
|
|
337
|
+
}
|
|
338
|
+
catch { /* already dead */ }
|
|
339
|
+
}, 2000);
|
|
340
|
+
}, timeoutMs)
|
|
341
|
+
: null;
|
|
342
|
+
const cleanup = () => {
|
|
343
|
+
if (heartbeat)
|
|
344
|
+
clearInterval(heartbeat);
|
|
345
|
+
if (timeout)
|
|
346
|
+
clearTimeout(timeout);
|
|
347
|
+
};
|
|
307
348
|
let fd3Data = '';
|
|
308
349
|
let stderr = '';
|
|
350
|
+
let sawFd3 = false;
|
|
351
|
+
let sawStdout = false;
|
|
352
|
+
let sawStderr = false;
|
|
309
353
|
// Line-buffer stdout so that large result JSON lines split across multiple
|
|
310
354
|
// data events are reassembled before processing.
|
|
311
355
|
const WORKFLOW_RESULT_PREFIX = '__ELASTICDASH_RESULT__:';
|
|
312
356
|
let stdoutBuf = '';
|
|
313
357
|
child.stdout.on('data', (chunk) => {
|
|
358
|
+
if (!sawStdout) {
|
|
359
|
+
sawStdout = true;
|
|
360
|
+
debug(`[elasticdash dashboard] workflow subprocess stage=first-stdout pid=${pid} elapsedMs=${elapsed()}`);
|
|
361
|
+
}
|
|
314
362
|
stdoutBuf += chunk.toString();
|
|
315
363
|
const lines = stdoutBuf.split('\n');
|
|
316
364
|
stdoutBuf = lines.pop() ?? ''; // keep last (possibly incomplete) line
|
|
@@ -325,14 +373,25 @@ function runWorkflowInSubprocess(workflowsModulePath, toolsModulePath, workflowN
|
|
|
325
373
|
}
|
|
326
374
|
});
|
|
327
375
|
child.stderr.on('data', (chunk) => {
|
|
376
|
+
if (!sawStderr) {
|
|
377
|
+
sawStderr = true;
|
|
378
|
+
debug(`[elasticdash dashboard] workflow subprocess stage=first-stderr pid=${pid} elapsedMs=${elapsed()}`);
|
|
379
|
+
}
|
|
328
380
|
stderr += chunk.toString();
|
|
329
381
|
process.stderr.write(chunk);
|
|
330
382
|
});
|
|
331
383
|
const fd3 = child.stdio[3];
|
|
332
384
|
fd3?.on('data', (chunk) => {
|
|
385
|
+
if (!sawFd3) {
|
|
386
|
+
sawFd3 = true;
|
|
387
|
+
debug(`[elasticdash dashboard] workflow subprocess stage=first-fd3 pid=${pid} elapsedMs=${elapsed()}`);
|
|
388
|
+
}
|
|
333
389
|
fd3Data += chunk.toString();
|
|
334
390
|
});
|
|
335
|
-
child.on('close', () => {
|
|
391
|
+
child.on('close', (code, signal) => {
|
|
392
|
+
cleanup();
|
|
393
|
+
const elapsedMs = elapsed();
|
|
394
|
+
debug(`[elasticdash dashboard] workflow subprocess stage=closed pid=${pid} code=${code} signal=${signal ?? 'none'} elapsedMs=${elapsedMs} stderrBytes=${stderr.length} fd3Bytes=${fd3Data.length}`);
|
|
336
395
|
// Flush any remaining buffered stdout line (e.g. result with no trailing newline)
|
|
337
396
|
if (stdoutBuf.startsWith(WORKFLOW_RESULT_PREFIX)) {
|
|
338
397
|
fd3Data += stdoutBuf.slice(WORKFLOW_RESULT_PREFIX.length);
|
|
@@ -345,11 +404,24 @@ function runWorkflowInSubprocess(workflowsModulePath, toolsModulePath, workflowN
|
|
|
345
404
|
resolve(JSON.parse(fd3Data));
|
|
346
405
|
return;
|
|
347
406
|
}
|
|
348
|
-
catch {
|
|
407
|
+
catch (parseErr) {
|
|
408
|
+
const detail = `[exit=${code} signal=${signal ?? 'none'} elapsedMs=${elapsedMs} pid=${pid}] fd3 payload failed to parse: ${parseErr.message}`;
|
|
409
|
+
resolve({ ok: false, error: detail });
|
|
410
|
+
return;
|
|
411
|
+
}
|
|
349
412
|
}
|
|
350
|
-
|
|
413
|
+
const stderrExcerpt = stderr.length > 1024 ? `…${stderr.slice(-1024)}` : stderr;
|
|
414
|
+
const detail = `[exit=${code} signal=${signal ?? 'none'} elapsedMs=${elapsedMs} pid=${pid} stderrBytes=${stderr.length}]`;
|
|
415
|
+
const baseError = timedOut
|
|
416
|
+
? `Workflow subprocess timed out after ${timeoutMs}ms`
|
|
417
|
+
: (stderr.trim() || 'Workflow subprocess produced no output.');
|
|
418
|
+
const errorMsg = stderr.trim()
|
|
419
|
+
? `${baseError} ${detail}`
|
|
420
|
+
: `${baseError} ${detail}${stderrExcerpt ? `\nLast stderr: ${stderrExcerpt}` : ''}`;
|
|
421
|
+
resolve({ ok: false, error: errorMsg });
|
|
351
422
|
});
|
|
352
423
|
child.on('error', (err) => {
|
|
424
|
+
cleanup();
|
|
353
425
|
const hint = denoProject && err.code === 'ENOENT'
|
|
354
426
|
? ' (Deno project detected — ensure "deno" is installed and available in PATH)'
|
|
355
427
|
: '';
|
|
@@ -373,6 +445,7 @@ function runWorkflowInSubprocess(workflowsModulePath, toolsModulePath, workflowN
|
|
|
373
445
|
});
|
|
374
446
|
child.stdin.write(payload);
|
|
375
447
|
child.stdin.end(); // Always close stdin to avoid subprocess hang
|
|
448
|
+
debug(`[elasticdash dashboard] workflow subprocess stage=payload-written pid=${pid} elapsedMs=${elapsed()} payloadBytes=${payload.length}`);
|
|
376
449
|
});
|
|
377
450
|
}
|
|
378
451
|
async function runToolObservation(cwd, observation, tools) {
|