elasticdash-sdk 0.2.6 → 0.2.7-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +119 -18
- package/dist/dashboard-server.d.ts.map +1 -1
- package/dist/dashboard-server.js +76 -3
- package/dist/dashboard-server.js.map +1 -1
- package/dist/execution/tool-runner.d.ts.map +1 -1
- package/dist/execution/tool-runner.js +66 -5
- package/dist/execution/tool-runner.js.map +1 -1
- package/dist/index.cjs +57 -6
- package/dist/tool-runner-worker.js +27 -2
- package/dist/tool-runner-worker.js.map +1 -1
- package/dist/trigger-executor.d.ts.map +1 -1
- package/dist/trigger-executor.js +3 -1
- package/dist/trigger-executor.js.map +1 -1
- package/dist/workflow-runner-worker.js +24 -0
- package/dist/workflow-runner-worker.js.map +1 -1
- package/docs/agent-coding-instructions.md +8 -5
- package/docs/agent-integration-guide.md +130 -10
- package/package.json +1 -1
- package/src/dashboard-server.ts +71 -3
- package/src/execution/tool-runner.ts +62 -5
- package/src/tool-runner-worker.ts +22 -2
- package/src/trigger-executor.ts +3 -1
- package/src/workflow-runner-worker.ts +23 -0
|
@@ -49,11 +49,13 @@ Add to `.gitignore`:
|
|
|
49
49
|
.ed_traces/
|
|
50
50
|
```
|
|
51
51
|
|
|
52
|
+
> **Do not stop here — Step 1 is not a complete integration.** Without Steps 2–4, no tool calls are wrapped and no traces are produced. Continue to Step 2 before reporting "done" to the user.
|
|
53
|
+
|
|
52
54
|
---
|
|
53
55
|
|
|
54
56
|
## Step 2: Create `ed_tools.ts`
|
|
55
57
|
|
|
56
|
-
Create `ed_tools.ts` in the project root. This file wraps each tool function with `
|
|
58
|
+
Create `ed_tools.ts` in the project root. This file wraps each tool function with `edTool()` for automatic tracing, mocking, telemetry, and CLI/MCP rerun discovery. (`edTool` is `wrapTool` + global registry registration — prefer it as the default. Drop down to `wrapTool` only for inline closures that should NOT be discoverable by name.)
|
|
57
59
|
|
|
58
60
|
### Template
|
|
59
61
|
|
|
@@ -70,14 +72,14 @@ import { originalTool2 } from './utils/YOUR_SOURCE_2'
|
|
|
70
72
|
// ---------------------------------------------------------------------------
|
|
71
73
|
|
|
72
74
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
73
|
-
type
|
|
75
|
+
type EdToolFn = <T extends (...args: any[]) => any>(name: string, fn: T) => T
|
|
74
76
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
75
|
-
let
|
|
77
|
+
let edTool: EdToolFn = (_name: string, fn: any) => fn
|
|
76
78
|
|
|
77
79
|
try {
|
|
78
80
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
79
81
|
const _edModule = (eval('require') as (id: string) => any)('elasticdash-sdk')
|
|
80
|
-
|
|
82
|
+
edTool = _edModule.edTool ?? _edModule.wrapTool ?? edTool
|
|
81
83
|
// Share the module instance with ed_workflows.ts so trace hooks use the same context
|
|
82
84
|
setElasticDashModule(_edModule)
|
|
83
85
|
} catch {
|
|
@@ -88,11 +90,11 @@ try {
|
|
|
88
90
|
// Wrapped tools — one export per tool
|
|
89
91
|
// ---------------------------------------------------------------------------
|
|
90
92
|
|
|
91
|
-
export const myTool1 =
|
|
93
|
+
export const myTool1 = edTool('myTool1', async (input: any) => {
|
|
92
94
|
return await originalTool1(input)
|
|
93
95
|
})
|
|
94
96
|
|
|
95
|
-
export const myTool2 =
|
|
97
|
+
export const myTool2 = edTool('myTool2', async (input: any) => {
|
|
96
98
|
const { someField } = input as { someField: string }
|
|
97
99
|
return await originalTool2(someField)
|
|
98
100
|
})
|
|
@@ -100,14 +102,14 @@ export const myTool2 = wrapTool('myTool2', async (input: any) => {
|
|
|
100
102
|
|
|
101
103
|
### Key patterns
|
|
102
104
|
|
|
103
|
-
- **`
|
|
105
|
+
- **`edTool(name, fn)`** wraps the function with automatic tracing, mocking, telemetry, and global registry registration so the CLI `run-tool <name>` and MCP `run_tool` can rerun it by name. Falls back to a passthrough if `elasticdash-sdk` is not installed.
|
|
104
106
|
- **`eval('require')`** is used instead of `import()` to share the same module instance across `ed_tools.ts` and `ed_workflows.ts`. This avoids ESM/CJS dual-instance issues.
|
|
105
|
-
- **`setElasticDashModule`** shares the loaded module with `ed_workflows.ts` so `edStartTrace`/`edEndTrace` use the same tracing context as `
|
|
107
|
+
- **`setElasticDashModule`** shares the loaded module with `ed_workflows.ts` so `edStartTrace`/`edEndTrace` use the same tracing context as `edTool`.
|
|
106
108
|
- The exported name (e.g., `myTool1`) can differ from the original function name (e.g., `originalTool1`). The call sites in existing source files will be updated to use the new name in Step 4.
|
|
107
109
|
|
|
108
110
|
### Important rules
|
|
109
111
|
|
|
110
|
-
- The string name passed to `wrapTool()` **must match** the exported function name exactly.
|
|
112
|
+
- The string name passed to `edTool()` (or `wrapTool()`) **must match** the exported function name exactly.
|
|
111
113
|
- Each tool function must accept a single input object and return a plain value (JSON-serializable).
|
|
112
114
|
- Tool functions must not close over HTTP context, framework state, or database clients — extract pure logic first.
|
|
113
115
|
|
|
@@ -141,15 +143,43 @@ Every `ed_workflows.ts` should export `edStartTrace` and `edEndTrace`. These are
|
|
|
141
143
|
|
|
142
144
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
143
145
|
let _ed: any = null
|
|
146
|
+
let _obsInitialised = false
|
|
144
147
|
|
|
145
148
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
146
149
|
export function setElasticDashModule(mod: any): void {
|
|
147
150
|
_ed = mod
|
|
148
151
|
}
|
|
149
152
|
|
|
153
|
+
/**
|
|
154
|
+
* Initialise observability through the SHARED SDK module instance.
|
|
155
|
+
* Call this once at process startup (e.g. from main.ts or your server
|
|
156
|
+
* entry point) BEFORE any workflow runs. If env vars are set this is
|
|
157
|
+
* also called lazily from edStartTrace, so this explicit form is for
|
|
158
|
+
* projects that want predictable, fail-fast init.
|
|
159
|
+
*/
|
|
160
|
+
export function edInitObservability(opts?: { serverUrl?: string; apiKey?: string }): void {
|
|
161
|
+
if (!_ed || _obsInitialised) return
|
|
162
|
+
const serverUrl = opts?.serverUrl
|
|
163
|
+
?? process.env.ELASTICDASH_API_URL
|
|
164
|
+
?? process.env.ELASTICDASH_SERVER_URL
|
|
165
|
+
?? process.env.ELASTICDASH_SERVER
|
|
166
|
+
const apiKey = opts?.apiKey ?? process.env.ELASTICDASH_API_KEY
|
|
167
|
+
if (!serverUrl || !apiKey) return
|
|
168
|
+
try {
|
|
169
|
+
_ed.initObservability({ serverUrl, apiKey })
|
|
170
|
+
_obsInitialised = true
|
|
171
|
+
} catch (err) {
|
|
172
|
+
console.error('[ed_workflows] edInitObservability error:', err)
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
150
176
|
export const edStartTrace = async (workflowName: string): Promise<void> => {
|
|
151
177
|
if (!_ed) return
|
|
152
178
|
try {
|
|
179
|
+
// Lazy init from env vars on first trace — keeps the simple case
|
|
180
|
+
// ("set ELASTICDASH_API_URL + ELASTICDASH_API_KEY, just run") working
|
|
181
|
+
// without an explicit init call.
|
|
182
|
+
if (!_obsInitialised) edInitObservability()
|
|
153
183
|
await _ed.tryAutoInitHttpContext()
|
|
154
184
|
_ed.startTrace(workflowName)
|
|
155
185
|
} catch (err) {
|
|
@@ -165,8 +195,32 @@ export const edEndTrace = (): void => {
|
|
|
165
195
|
console.error('[ed_workflows] edEndTrace error:', err)
|
|
166
196
|
}
|
|
167
197
|
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Flush remaining trace events and close the backend connection.
|
|
201
|
+
* Call from a `finally` block at the end of your process lifecycle
|
|
202
|
+
* (CLI: in main() finally; HTTP server: rarely needed — the SDK
|
|
203
|
+
* auto-registers SIGTERM/SIGINT handlers that call this).
|
|
204
|
+
*
|
|
205
|
+
* The SDK's auto-exit hooks (registered by initObservability) are
|
|
206
|
+
* async; for short-lived CLI scripts the process can terminate BEFORE
|
|
207
|
+
* those hooks complete and drop the final event batch. Explicit
|
|
208
|
+
* shutdown via this helper is the only guarantee that the last batch
|
|
209
|
+
* lands.
|
|
210
|
+
*/
|
|
211
|
+
export const edShutdownObservability = async (): Promise<void> => {
|
|
212
|
+
if (!_ed || !_obsInitialised) return
|
|
213
|
+
try {
|
|
214
|
+
await _ed.shutdownObservability()
|
|
215
|
+
_obsInitialised = false
|
|
216
|
+
} catch (err) {
|
|
217
|
+
console.error('[ed_workflows] edShutdownObservability error:', err)
|
|
218
|
+
}
|
|
219
|
+
}
|
|
168
220
|
```
|
|
169
221
|
|
|
222
|
+
> **Why route init through `_ed` instead of importing `initObservability` directly?** The SDK uses AsyncLocalStorage to correlate events. Both `ed_tools.ts` and `ed_workflows.ts` must share the same SDK module instance — that's why `ed_tools.ts` loads the SDK via `eval('require')` and passes it through `setElasticDashModule`. If `main.ts` does `import { initObservability } from 'elasticdash-sdk'` directly, the ESM-loaded copy is a **different module instance** from the CJS-loaded copy that `_ed` references — init writes to one store, `startTrace` reads from another, and you get `[elasticdash] startTrace: observability not initialised` at runtime. Always init through `edInitObservability` from `ed_workflows.ts`.
|
|
223
|
+
|
|
170
224
|
### Workflow exports — simple case
|
|
171
225
|
|
|
172
226
|
For non-framework projects where the workflow can be imported directly:
|
|
@@ -221,7 +275,7 @@ export const YOUR_WORKFLOW = async (input: {
|
|
|
221
275
|
```
|
|
222
276
|
ed_tools.ts
|
|
223
277
|
├── imports original functions from services/utils
|
|
224
|
-
├── wraps each with
|
|
278
|
+
├── wraps each with edTool() for tracing + rerun registration
|
|
225
279
|
└── exports wrapped versions with the SAME or similar names
|
|
226
280
|
|
|
227
281
|
ed_workflows.ts
|
|
@@ -239,6 +293,32 @@ Existing source files (MODIFIED):
|
|
|
239
293
|
|
|
240
294
|
### What to do
|
|
241
295
|
|
|
296
|
+
**0. Add `edInitObservability` to your entry point.**
|
|
297
|
+
|
|
298
|
+
Call `edInitObservability` once at process startup so observability is wired up through the SAME SDK module instance that `ed_tools.ts` and `ed_workflows.ts` share. Do this BEFORE any workflow runs. Skipping this is the #1 cause of `[elasticdash] startTrace: observability not initialised` errors at runtime.
|
|
299
|
+
|
|
300
|
+
For a CLI / standalone Node script — init at the top, shutdown in a `finally` block:
|
|
301
|
+
|
|
302
|
+
```ts
|
|
303
|
+
// src/main.ts
|
|
304
|
+
import 'dotenv/config'
|
|
305
|
+
import { edInitObservability, edShutdownObservability } from '../ed_workflows.js'
|
|
306
|
+
import { researchManager } from './manager.js'
|
|
307
|
+
|
|
308
|
+
async function main() {
|
|
309
|
+
edInitObservability() // env vars: ELASTICDASH_API_URL + ELASTICDASH_API_KEY
|
|
310
|
+
try {
|
|
311
|
+
// ... rest of your main() ...
|
|
312
|
+
} finally {
|
|
313
|
+
await edShutdownObservability() // guarantees the final batch lands
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
The `finally + edShutdownObservability` block is **required for CLIs**. The SDK auto-registers `beforeExit`/`SIGTERM`/`SIGINT` handlers, but those are async; for short-lived scripts the process can tear down before they complete, dropping the final batch.
|
|
319
|
+
|
|
320
|
+
For Next.js / Remix / SvelteKit / Express, call `edInitObservability()` in your framework's instrumentation hook OR at the very top of your server entry file before any route handler is registered. Explicit shutdown is rarely needed — the server stays up; the auto-registered SIGTERM handler covers graceful restarts. Do NOT replace `edInitObservability` with `import { initObservability } from 'elasticdash-sdk'` — that hits a different module instance and the error returns. See Step 3's "Why route init through `_ed`?" callout.
|
|
321
|
+
|
|
242
322
|
**1. Find every file that calls a tool function and update its imports:**
|
|
243
323
|
|
|
244
324
|
For each tool exported from `ed_tools.ts`, search the codebase for files that import the original function. Update the import to come from `ed_tools` instead.
|
|
@@ -433,6 +513,46 @@ This confirms:
|
|
|
433
513
|
|
|
434
514
|
**If it fails:** Check that `.env` has valid `ELASTICDASH_API_URL` and `ELASTICDASH_API_KEY` values. If the API key is rejected, the user needs to get a new one from https://app.elasticdash.com.
|
|
435
515
|
|
|
516
|
+
### Verifying programmatic init from the integrated app
|
|
517
|
+
|
|
518
|
+
`npx elasticdash observe` is the *CLI* path — it runs `initObservability` in its own process. The actual integrated app (via `edInitObservability` in `ed_workflows.ts`) also calls `initObservability` programmatically, opens the same socket, installs the same AI interceptor, and pushes events on the same batcher. **You do not need `npx elasticdash observe` running for the integrated app to produce traces.**
|
|
519
|
+
|
|
520
|
+
To verify the programmatic path is working, set `ELASTICDASH_DEBUG=1` in the user's `.env` and run the user's app once. You should see these lines on stderr on the first trace:
|
|
521
|
+
|
|
522
|
+
```
|
|
523
|
+
[elasticdash] Observability initialised — sessionId=<uuid> workflow=<name> server=<url>
|
|
524
|
+
[elasticdash] Socket connected: <socket-id>
|
|
525
|
+
[elasticdash] startTrace: <workflow-name>, capture=false
|
|
526
|
+
[elasticdash] Flushed N events (status 200)
|
|
527
|
+
```
|
|
528
|
+
|
|
529
|
+
If you see NONE of them, init is not running. The most common causes are the dual-module-instance trap (someone imported `initObservability` directly instead of using `edInitObservability`), missing env vars, or `ed_tools.ts` was never imported by the running code path (so `setElasticDashModule` was never called and `_ed` is null — `edStartTrace` / `edInitObservability` silently return).
|
|
530
|
+
|
|
531
|
+
If you see the first three lines but not the "Flushed" line, the final batch is being dropped on exit — confirm that the entry point calls `edShutdownObservability()` in a `finally` block.
|
|
532
|
+
|
|
533
|
+
### Where to view the traces
|
|
534
|
+
|
|
535
|
+
Once `[elasticdash] Flushed N events` appears on stderr, the events are live on the backend. Two ways to view them:
|
|
536
|
+
|
|
537
|
+
1. **Cloud dashboard** — open `https://app.elasticdash.com`, sign in with the project the API key belongs to, and the run shows up in the traces list. This is the simplest path for users who already have a cloud project.
|
|
538
|
+
2. **Local dashboard** — run `npx elasticdash dashboard` (opens `http://localhost:4573` and proxies the same backend). Useful for debugging since it lets the agent rerun individual workflow steps with mocked tools, browse the captured tool/AI events, and edit prompts in place. See [Workflows Dashboard](dashboard.md) for the full feature set.
|
|
539
|
+
|
|
540
|
+
For framework projects (Next.js / Remix / etc.), the dashboard can also **trigger workflow runs directly against the running dev server** via HTTP-mode reruns — see [HTTP Workflow Mode](../README.md#http-workflow-mode) in the README. Confirm by running `npm run dev` in the user's project, then `npx elasticdash dashboard` in a second terminal, and triggering a rerun from the dashboard UI.
|
|
541
|
+
|
|
542
|
+
**End-to-end flow recap** for the agent reporting "done" to the user:
|
|
543
|
+
|
|
544
|
+
```
|
|
545
|
+
1. npm install elasticdash-sdk ← Step 1
|
|
546
|
+
2. ed_tools.ts wraps tool functions ← Step 2
|
|
547
|
+
3. ed_workflows.ts has init/start/end/shutdown helpers + workflow exports ← Step 3
|
|
548
|
+
4. Entry point calls edInitObservability() then runs the workflow, finally edShutdownObservability() ← Step 4
|
|
549
|
+
5. .env has ELASTICDASH_API_URL + ELASTICDASH_API_KEY ← Step 6
|
|
550
|
+
6. User runs their app → sees [elasticdash] ... logs on stderr ← this section
|
|
551
|
+
7. User opens https://app.elasticdash.com or `npx elasticdash dashboard` → sees the trace
|
|
552
|
+
```
|
|
553
|
+
|
|
554
|
+
Only after step 7 has been confirmed is the integration end-to-end. If step 7 fails (logs say "Flushed" but trace doesn't appear), the most likely cause is the API key belongs to a different project than the one the user is viewing — check the project picker in the dashboard.
|
|
555
|
+
|
|
436
556
|
After validation, stop the observe process (Ctrl+C) and inform the user that ElasticDash is integrated. Provide these commands for ongoing use:
|
|
437
557
|
|
|
438
558
|
```bash
|
package/package.json
CHANGED
package/src/dashboard-server.ts
CHANGED
|
@@ -453,6 +453,11 @@ function runWorkflowInSubprocess(
|
|
|
453
453
|
},
|
|
454
454
|
): Promise<WorkflowSubprocessResult> {
|
|
455
455
|
return new Promise((resolve) => {
|
|
456
|
+
const startMs = Date.now()
|
|
457
|
+
const elapsed = () => Date.now() - startMs
|
|
458
|
+
const debug = (...a: unknown[]) => {
|
|
459
|
+
if (process.env.ELASTICDASH_DEBUG === '1') console.error(...a)
|
|
460
|
+
}
|
|
456
461
|
const workerScript = new URL('./workflow-runner-worker.js', import.meta.url).pathname
|
|
457
462
|
const projectDir = path.dirname(workflowsModulePath)
|
|
458
463
|
const denoProject = isDenoProject(projectDir)
|
|
@@ -472,15 +477,52 @@ function runWorkflowInSubprocess(
|
|
|
472
477
|
cwd: projectDir,
|
|
473
478
|
stdio: ['pipe', 'pipe', 'pipe', 'pipe'],
|
|
474
479
|
})
|
|
480
|
+
const pid = child.pid ?? -1
|
|
481
|
+
debug(`[elasticdash dashboard] workflow subprocess stage=spawned pid=${pid} elapsedMs=${elapsed()} workflow=${workflowName}`)
|
|
482
|
+
|
|
483
|
+
// Heartbeat — workflows can be long; without this the dashboard is blind.
|
|
484
|
+
// 0 disables. Default 5s.
|
|
485
|
+
const heartbeatMs = Number(process.env.ELASTICDASH_HEARTBEAT_MS ?? 5000)
|
|
486
|
+
const heartbeat = heartbeatMs > 0
|
|
487
|
+
? setInterval(() => {
|
|
488
|
+
debug(`[elasticdash dashboard] workflow subprocess heartbeat pid=${pid} elapsedMs=${elapsed()} workflow=${workflowName}`)
|
|
489
|
+
}, heartbeatMs)
|
|
490
|
+
: null
|
|
491
|
+
|
|
492
|
+
// Optional kill switch. Default unset = no timeout (preserves prior behavior).
|
|
493
|
+
let timedOut = false
|
|
494
|
+
const timeoutMs = Number(process.env.ELASTICDASH_WORKFLOW_TIMEOUT_MS ?? 0)
|
|
495
|
+
const timeout = timeoutMs > 0
|
|
496
|
+
? setTimeout(() => {
|
|
497
|
+
timedOut = true
|
|
498
|
+
debug(`[elasticdash dashboard] workflow subprocess TIMEOUT pid=${pid} after ${timeoutMs}ms — sending SIGTERM`)
|
|
499
|
+
try { child.kill('SIGTERM') } catch { /* already dead */ }
|
|
500
|
+
setTimeout(() => {
|
|
501
|
+
try { child.kill('SIGKILL') } catch { /* already dead */ }
|
|
502
|
+
}, 2000)
|
|
503
|
+
}, timeoutMs)
|
|
504
|
+
: null
|
|
505
|
+
|
|
506
|
+
const cleanup = () => {
|
|
507
|
+
if (heartbeat) clearInterval(heartbeat)
|
|
508
|
+
if (timeout) clearTimeout(timeout)
|
|
509
|
+
}
|
|
475
510
|
|
|
476
511
|
let fd3Data = ''
|
|
477
512
|
let stderr = ''
|
|
513
|
+
let sawFd3 = false
|
|
514
|
+
let sawStdout = false
|
|
515
|
+
let sawStderr = false
|
|
478
516
|
|
|
479
517
|
// Line-buffer stdout so that large result JSON lines split across multiple
|
|
480
518
|
// data events are reassembled before processing.
|
|
481
519
|
const WORKFLOW_RESULT_PREFIX = '__ELASTICDASH_RESULT__:'
|
|
482
520
|
let stdoutBuf = ''
|
|
483
521
|
child.stdout.on('data', (chunk) => {
|
|
522
|
+
if (!sawStdout) {
|
|
523
|
+
sawStdout = true
|
|
524
|
+
debug(`[elasticdash dashboard] workflow subprocess stage=first-stdout pid=${pid} elapsedMs=${elapsed()}`)
|
|
525
|
+
}
|
|
484
526
|
stdoutBuf += chunk.toString()
|
|
485
527
|
const lines = stdoutBuf.split('\n')
|
|
486
528
|
stdoutBuf = lines.pop() ?? '' // keep last (possibly incomplete) line
|
|
@@ -494,15 +536,27 @@ function runWorkflowInSubprocess(
|
|
|
494
536
|
}
|
|
495
537
|
})
|
|
496
538
|
child.stderr.on('data', (chunk) => {
|
|
539
|
+
if (!sawStderr) {
|
|
540
|
+
sawStderr = true
|
|
541
|
+
debug(`[elasticdash dashboard] workflow subprocess stage=first-stderr pid=${pid} elapsedMs=${elapsed()}`)
|
|
542
|
+
}
|
|
497
543
|
stderr += chunk.toString()
|
|
498
544
|
process.stderr.write(chunk)
|
|
499
545
|
})
|
|
500
546
|
const fd3 = child.stdio[3] as import('stream').Readable | null
|
|
501
547
|
fd3?.on('data', (chunk: Buffer | string) => {
|
|
548
|
+
if (!sawFd3) {
|
|
549
|
+
sawFd3 = true
|
|
550
|
+
debug(`[elasticdash dashboard] workflow subprocess stage=first-fd3 pid=${pid} elapsedMs=${elapsed()}`)
|
|
551
|
+
}
|
|
502
552
|
fd3Data += chunk.toString()
|
|
503
553
|
})
|
|
504
554
|
|
|
505
|
-
child.on('close', () => {
|
|
555
|
+
child.on('close', (code, signal) => {
|
|
556
|
+
cleanup()
|
|
557
|
+
const elapsedMs = elapsed()
|
|
558
|
+
debug(`[elasticdash dashboard] workflow subprocess stage=closed pid=${pid} code=${code} signal=${signal ?? 'none'} elapsedMs=${elapsedMs} stderrBytes=${stderr.length} fd3Bytes=${fd3Data.length}`)
|
|
559
|
+
|
|
506
560
|
// Flush any remaining buffered stdout line (e.g. result with no trailing newline)
|
|
507
561
|
if (stdoutBuf.startsWith(WORKFLOW_RESULT_PREFIX)) {
|
|
508
562
|
fd3Data += stdoutBuf.slice(WORKFLOW_RESULT_PREFIX.length)
|
|
@@ -514,12 +568,25 @@ function runWorkflowInSubprocess(
|
|
|
514
568
|
try {
|
|
515
569
|
resolve(JSON.parse(fd3Data))
|
|
516
570
|
return
|
|
517
|
-
} catch {
|
|
571
|
+
} catch (parseErr) {
|
|
572
|
+
const detail = `[exit=${code} signal=${signal ?? 'none'} elapsedMs=${elapsedMs} pid=${pid}] fd3 payload failed to parse: ${(parseErr as Error).message}`
|
|
573
|
+
resolve({ ok: false, error: detail })
|
|
574
|
+
return
|
|
575
|
+
}
|
|
518
576
|
}
|
|
519
|
-
|
|
577
|
+
const stderrExcerpt = stderr.length > 1024 ? `…${stderr.slice(-1024)}` : stderr
|
|
578
|
+
const detail = `[exit=${code} signal=${signal ?? 'none'} elapsedMs=${elapsedMs} pid=${pid} stderrBytes=${stderr.length}]`
|
|
579
|
+
const baseError = timedOut
|
|
580
|
+
? `Workflow subprocess timed out after ${timeoutMs}ms`
|
|
581
|
+
: (stderr.trim() || 'Workflow subprocess produced no output.')
|
|
582
|
+
const errorMsg = stderr.trim()
|
|
583
|
+
? `${baseError} ${detail}`
|
|
584
|
+
: `${baseError} ${detail}${stderrExcerpt ? `\nLast stderr: ${stderrExcerpt}` : ''}`
|
|
585
|
+
resolve({ ok: false, error: errorMsg })
|
|
520
586
|
})
|
|
521
587
|
|
|
522
588
|
child.on('error', (err) => {
|
|
589
|
+
cleanup()
|
|
523
590
|
const hint = denoProject && (err as NodeJS.ErrnoException).code === 'ENOENT'
|
|
524
591
|
? ' (Deno project detected — ensure "deno" is installed and available in PATH)'
|
|
525
592
|
: ''
|
|
@@ -544,6 +611,7 @@ function runWorkflowInSubprocess(
|
|
|
544
611
|
})
|
|
545
612
|
child.stdin.write(payload)
|
|
546
613
|
child.stdin.end() // Always close stdin to avoid subprocess hang
|
|
614
|
+
debug(`[elasticdash dashboard] workflow subprocess stage=payload-written pid=${pid} elapsedMs=${elapsed()} payloadBytes=${payload.length}`)
|
|
547
615
|
})
|
|
548
616
|
}
|
|
549
617
|
|
|
@@ -118,6 +118,7 @@ export function runToolInSubprocess(
|
|
|
118
118
|
return new Promise((resolve) => {
|
|
119
119
|
debugLog('[elasticdash portal] Spawning tool subprocess', { toolsModulePath, toolName, args, frozenEventsCount: frozenEvents?.length ?? 0 })
|
|
120
120
|
const startMs = Date.now()
|
|
121
|
+
const elapsed = () => Date.now() - startMs
|
|
121
122
|
const workerScript = resolveWorkerScript('../tool-runner-worker.js')
|
|
122
123
|
const projectDir = path.dirname(toolsModulePath)
|
|
123
124
|
const denoProject = isDenoProject(projectDir)
|
|
@@ -136,15 +137,50 @@ export function runToolInSubprocess(
|
|
|
136
137
|
cwd: projectDir,
|
|
137
138
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
138
139
|
})
|
|
140
|
+
const pid = child.pid ?? -1
|
|
141
|
+
debugLog(`[elasticdash portal] tool subprocess stage=spawned pid=${pid} elapsedMs=${elapsed()} tool=${toolName}`)
|
|
142
|
+
|
|
143
|
+
// Heartbeat: prove the parent is still waiting on a live child. 0 disables.
|
|
144
|
+
const heartbeatMs = Number(process.env.ELASTICDASH_HEARTBEAT_MS ?? 5000)
|
|
145
|
+
const heartbeat = heartbeatMs > 0
|
|
146
|
+
? setInterval(() => {
|
|
147
|
+
debugLog(`[elasticdash portal] tool subprocess heartbeat pid=${pid} elapsedMs=${elapsed()} tool=${toolName}`)
|
|
148
|
+
}, heartbeatMs)
|
|
149
|
+
: null
|
|
150
|
+
|
|
151
|
+
// Optional kill switch. Default unset = no timeout (preserves prior behavior).
|
|
152
|
+
let timedOut = false
|
|
153
|
+
const timeoutMs = Number(process.env.ELASTICDASH_TOOL_TIMEOUT_MS ?? 0)
|
|
154
|
+
const timeout = timeoutMs > 0
|
|
155
|
+
? setTimeout(() => {
|
|
156
|
+
timedOut = true
|
|
157
|
+
debugLog(`[elasticdash portal] tool subprocess TIMEOUT pid=${pid} after ${timeoutMs}ms — sending SIGTERM`)
|
|
158
|
+
try { child.kill('SIGTERM') } catch { /* already dead */ }
|
|
159
|
+
setTimeout(() => {
|
|
160
|
+
try { child.kill('SIGKILL') } catch { /* already dead */ }
|
|
161
|
+
}, 2000)
|
|
162
|
+
}, timeoutMs)
|
|
163
|
+
: null
|
|
164
|
+
|
|
165
|
+
const cleanup = () => {
|
|
166
|
+
if (heartbeat) clearInterval(heartbeat)
|
|
167
|
+
if (timeout) clearTimeout(timeout)
|
|
168
|
+
}
|
|
139
169
|
|
|
140
170
|
const RESULT_PREFIX = '__ELASTICDASH_RESULT__:'
|
|
141
171
|
let resultLine = ''
|
|
142
172
|
let stderr = ''
|
|
173
|
+
let sawStdout = false
|
|
174
|
+
let sawStderr = false
|
|
143
175
|
|
|
144
176
|
// Line-buffer stdout so that large result JSON lines split across multiple
|
|
145
177
|
// data events are reassembled before processing.
|
|
146
178
|
let stdoutBuf = ''
|
|
147
179
|
child.stdout.on('data', (chunk: Buffer) => {
|
|
180
|
+
if (!sawStdout) {
|
|
181
|
+
sawStdout = true
|
|
182
|
+
debugLog(`[elasticdash portal] tool subprocess stage=first-stdout pid=${pid} elapsedMs=${elapsed()}`)
|
|
183
|
+
}
|
|
148
184
|
stdoutBuf += chunk.toString()
|
|
149
185
|
const lines = stdoutBuf.split('\n')
|
|
150
186
|
stdoutBuf = lines.pop() ?? '' // keep last (possibly incomplete) line
|
|
@@ -157,12 +193,18 @@ export function runToolInSubprocess(
|
|
|
157
193
|
}
|
|
158
194
|
})
|
|
159
195
|
child.stderr.on('data', (chunk: Buffer) => {
|
|
196
|
+
if (!sawStderr) {
|
|
197
|
+
sawStderr = true
|
|
198
|
+
debugLog(`[elasticdash portal] tool subprocess stage=first-stderr pid=${pid} elapsedMs=${elapsed()}`)
|
|
199
|
+
}
|
|
160
200
|
stderr += chunk.toString()
|
|
161
201
|
process.stderr.write(chunk)
|
|
162
202
|
})
|
|
163
203
|
|
|
164
|
-
child.on('close', () => {
|
|
165
|
-
|
|
204
|
+
child.on('close', (code, signal) => {
|
|
205
|
+
cleanup()
|
|
206
|
+
const currentDurationMs = elapsed()
|
|
207
|
+
debugLog(`[elasticdash portal] tool subprocess stage=closed pid=${pid} code=${code} signal=${signal ?? 'none'} elapsedMs=${currentDurationMs} stderrBytes=${stderr.length}`)
|
|
166
208
|
|
|
167
209
|
// Flush any remaining buffered stdout line (e.g. result with no trailing newline)
|
|
168
210
|
if (stdoutBuf.startsWith(RESULT_PREFIX)) {
|
|
@@ -175,17 +217,31 @@ export function runToolInSubprocess(
|
|
|
175
217
|
try {
|
|
176
218
|
resolve({ ...JSON.parse(resultLine), currentDurationMs })
|
|
177
219
|
return
|
|
178
|
-
} catch {
|
|
220
|
+
} catch (parseErr) {
|
|
221
|
+
const detail = `[exit=${code} signal=${signal ?? 'none'} elapsedMs=${currentDurationMs} pid=${pid}] resultLine failed to parse: ${(parseErr as Error).message}`
|
|
222
|
+
resolve({ ok: false, error: detail, currentDurationMs })
|
|
223
|
+
return
|
|
224
|
+
}
|
|
179
225
|
}
|
|
180
|
-
|
|
226
|
+
|
|
227
|
+
const stderrExcerpt = stderr.length > 1024 ? `…${stderr.slice(-1024)}` : stderr
|
|
228
|
+
const detail = `[exit=${code} signal=${signal ?? 'none'} elapsedMs=${currentDurationMs} pid=${pid} stderrBytes=${stderr.length}]`
|
|
229
|
+
const baseError = timedOut
|
|
230
|
+
? `Tool subprocess timed out after ${timeoutMs}ms`
|
|
231
|
+
: (stderr.trim() || 'Tool subprocess produced no output.')
|
|
232
|
+
const errorMsg = stderr.trim()
|
|
233
|
+
? `${baseError} ${detail}`
|
|
234
|
+
: `${baseError} ${detail}${stderrExcerpt ? `\nLast stderr: ${stderrExcerpt}` : ''}`
|
|
235
|
+
resolve({ ok: false, error: errorMsg, currentDurationMs })
|
|
181
236
|
})
|
|
182
237
|
|
|
183
238
|
child.on('error', (err) => {
|
|
239
|
+
cleanup()
|
|
184
240
|
const hint = denoProject && (err as NodeJS.ErrnoException).code === 'ENOENT'
|
|
185
241
|
? ' (Deno project detected — ensure "deno" is installed and available in PATH)'
|
|
186
242
|
: ''
|
|
187
243
|
debugLog(`[elasticdash portal] Failed to spawn tool subprocess: ${err.message}${hint}`)
|
|
188
|
-
resolve({ ok: false, error: `Failed to spawn tool subprocess: ${err.message}${hint}`, currentDurationMs:
|
|
244
|
+
resolve({ ok: false, error: `Failed to spawn tool subprocess: ${err.message}${hint}`, currentDurationMs: elapsed() })
|
|
189
245
|
})
|
|
190
246
|
|
|
191
247
|
const payload = JSON.stringify({
|
|
@@ -196,6 +252,7 @@ export function runToolInSubprocess(
|
|
|
196
252
|
})
|
|
197
253
|
child.stdin.write(payload)
|
|
198
254
|
child.stdin.end()
|
|
255
|
+
debugLog(`[elasticdash portal] tool subprocess stage=payload-written pid=${pid} elapsedMs=${elapsed()} payloadBytes=${payload.length}`)
|
|
199
256
|
})
|
|
200
257
|
}
|
|
201
258
|
|
|
@@ -23,6 +23,13 @@ import { pathToFileURL } from 'node:url'
|
|
|
23
23
|
|
|
24
24
|
const RESULT_PREFIX = '__ELASTICDASH_RESULT__:'
|
|
25
25
|
|
|
26
|
+
const WORKER_START_MS = Date.now()
|
|
27
|
+
function stage(name: string, extra?: Record<string, unknown>): void {
|
|
28
|
+
if (process.env.ELASTICDASH_DEBUG !== '1') return
|
|
29
|
+
const tail = extra ? ' ' + Object.entries(extra).map(([k, v]) => `${k}=${typeof v === 'string' ? v : JSON.stringify(v)}`).join(' ') : ''
|
|
30
|
+
process.stderr.write(`[elasticdash-worker tool] stage=${name} pid=${process.pid} elapsedMs=${Date.now() - WORKER_START_MS}${tail}\n`)
|
|
31
|
+
}
|
|
32
|
+
|
|
26
33
|
function writeResult(result: unknown): Promise<void> {
|
|
27
34
|
return new Promise((resolve, reject) => {
|
|
28
35
|
process.stdout.write(RESULT_PREFIX + JSON.stringify(result) + '\n', (err) =>
|
|
@@ -158,6 +165,7 @@ function installFrozenFetchFallback(frozenEvents: FrozenEvent[]): void {
|
|
|
158
165
|
}
|
|
159
166
|
|
|
160
167
|
async function main() {
|
|
168
|
+
stage('boot')
|
|
161
169
|
const originalExit = process.exit.bind(process)
|
|
162
170
|
|
|
163
171
|
// Prevent the SDK's tryAutoInitHttpContext from triggering full observability
|
|
@@ -175,6 +183,7 @@ async function main() {
|
|
|
175
183
|
for await (const chunk of process.stdin) {
|
|
176
184
|
raw += chunk
|
|
177
185
|
}
|
|
186
|
+
stage('stdin-eof', { bytes: raw.length })
|
|
178
187
|
|
|
179
188
|
let payload: { toolsModulePath: string; toolName: string; args: unknown[]; frozenEvents?: FrozenEvent[] }
|
|
180
189
|
try {
|
|
@@ -184,6 +193,7 @@ async function main() {
|
|
|
184
193
|
originalExit(1)
|
|
185
194
|
return
|
|
186
195
|
}
|
|
196
|
+
stage('payload-parsed')
|
|
187
197
|
|
|
188
198
|
const { toolsModulePath, toolName, args, frozenEvents } = payload
|
|
189
199
|
|
|
@@ -193,12 +203,16 @@ async function main() {
|
|
|
193
203
|
const hasFrozen = frozenEvents && frozenEvents.length > 0
|
|
194
204
|
if (hasFrozen) {
|
|
195
205
|
await setupFrozenContext(frozenEvents)
|
|
206
|
+
stage('frozen-context-ready', { count: frozenEvents.length })
|
|
207
|
+
} else {
|
|
208
|
+
stage('frozen-context-skipped')
|
|
196
209
|
}
|
|
197
210
|
|
|
198
211
|
try {
|
|
199
212
|
let mod: any
|
|
200
213
|
try {
|
|
201
214
|
mod = await import(pathToFileURL(toolsModulePath).href)
|
|
215
|
+
stage('tool-module-imported')
|
|
202
216
|
} catch (importErr) {
|
|
203
217
|
const ie = importErr as Error
|
|
204
218
|
await writeResult({ ok: false, error: `Failed to import tool module: ${ie.stack || ie.message}` })
|
|
@@ -210,31 +224,37 @@ async function main() {
|
|
|
210
224
|
// as long as their containing module is reachable from toolsModulePath's
|
|
211
225
|
// import graph. Falls back to ed_tools-style module export lookup.
|
|
212
226
|
let fn: ((...a: unknown[]) => unknown) | undefined
|
|
227
|
+
let resolvedVia = 'none'
|
|
213
228
|
try {
|
|
214
229
|
const reg = await import('./tool-registry.js')
|
|
215
230
|
const registered = reg.getRegisteredTool(toolName)
|
|
216
|
-
if (registered) fn = registered.wrapped
|
|
231
|
+
if (registered) { fn = registered.wrapped; resolvedVia = 'registry' }
|
|
217
232
|
} catch {
|
|
218
233
|
// Registry module not available (older SDK build); fall through to export lookup.
|
|
219
234
|
}
|
|
220
235
|
if (!fn) {
|
|
221
236
|
const exported = mod[toolName]
|
|
222
|
-
if (typeof exported === 'function') fn = exported
|
|
237
|
+
if (typeof exported === 'function') { fn = exported; resolvedVia = 'module-export' }
|
|
223
238
|
}
|
|
224
239
|
if (typeof fn !== 'function') {
|
|
225
240
|
await writeResult({ ok: false, error: `"${toolName}" not found via edTool() registry or as an exported function in the module.` })
|
|
226
241
|
originalExit(1)
|
|
227
242
|
return
|
|
228
243
|
}
|
|
244
|
+
stage('tool-resolved', { tool: toolName, via: resolvedVia })
|
|
229
245
|
|
|
246
|
+
stage('tool-call-start', { tool: toolName })
|
|
230
247
|
const currentOutput = await fn(...args)
|
|
248
|
+
stage('tool-call-end', { tool: toolName })
|
|
231
249
|
await writeResult({ ok: true, currentOutput })
|
|
250
|
+
stage('result-written')
|
|
232
251
|
originalExit(0)
|
|
233
252
|
} catch (e) {
|
|
234
253
|
const err = e as Error
|
|
235
254
|
const errorMsg = err.stack || err.message || String(e)
|
|
236
255
|
process.stderr.write(`[elasticdash-worker] Tool execution failed:\n${errorMsg}\n`)
|
|
237
256
|
await writeResult({ ok: false, error: errorMsg })
|
|
257
|
+
stage('result-written', { ok: false })
|
|
238
258
|
originalExit(1)
|
|
239
259
|
} finally {
|
|
240
260
|
if (hasFrozen) restoreFrozenFetch()
|
package/src/trigger-executor.ts
CHANGED
|
@@ -101,6 +101,8 @@ export async function executeTrigger(
|
|
|
101
101
|
const runs: StepRunResult[] = []
|
|
102
102
|
|
|
103
103
|
for (let i = 0; i < trigger.runCount; i++) {
|
|
104
|
+
const runStart = Date.now()
|
|
105
|
+
debugLog(`[elasticdash] Trigger ${trigger.triggerId} step=${stepIndex + 1}/${totalSteps} name=${step.eventName} run=${i + 1}/${trigger.runCount} phase=start`)
|
|
104
106
|
const result = await executePortalTask(
|
|
105
107
|
{
|
|
106
108
|
taskId: `trigger-${trigger.triggerId}-${step.eventName}-${i}`,
|
|
@@ -130,7 +132,7 @@ export async function executeTrigger(
|
|
|
130
132
|
usageTotalTokens: result.usage?.totalTokens,
|
|
131
133
|
})
|
|
132
134
|
|
|
133
|
-
debugLog(`[elasticdash] Trigger ${trigger.triggerId} step=${step.eventName} run=${i} ok=${result.ok}`)
|
|
135
|
+
debugLog(`[elasticdash] Trigger ${trigger.triggerId} step=${stepIndex + 1}/${totalSteps} name=${step.eventName} run=${i + 1}/${trigger.runCount} phase=done ok=${result.ok} elapsedMs=${Date.now() - runStart}`)
|
|
134
136
|
}
|
|
135
137
|
|
|
136
138
|
stepResult = {
|