nodebench-mcp 2.28.0 → 2.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -0
- package/dist/db.js +69 -0
- package/dist/db.js.map +1 -1
- package/dist/engine/conformance.d.ts +31 -0
- package/dist/engine/conformance.js +81 -0
- package/dist/engine/conformance.js.map +1 -0
- package/dist/engine/contextBridge.d.ts +67 -0
- package/dist/engine/contextBridge.js +392 -0
- package/dist/engine/contextBridge.js.map +1 -0
- package/dist/engine/server.d.ts +23 -0
- package/dist/engine/server.js +481 -0
- package/dist/engine/server.js.map +1 -0
- package/dist/engine/session.d.ts +55 -0
- package/dist/engine/session.js +139 -0
- package/dist/engine/session.js.map +1 -0
- package/dist/index.js +113 -11
- package/dist/index.js.map +1 -1
- package/dist/sandboxApi.d.ts +20 -0
- package/dist/sandboxApi.js +99 -0
- package/dist/sandboxApi.js.map +1 -0
- package/dist/tools/contextSandboxTools.d.ts +15 -0
- package/dist/tools/contextSandboxTools.js +469 -0
- package/dist/tools/contextSandboxTools.js.map +1 -0
- package/dist/tools/contextTools.d.ts +11 -0
- package/dist/tools/contextTools.js +175 -0
- package/dist/tools/contextTools.js.map +1 -0
- package/dist/tools/designGovernanceTools.d.ts +20 -0
- package/dist/tools/designGovernanceTools.js +872 -0
- package/dist/tools/designGovernanceTools.js.map +1 -0
- package/dist/tools/openclawTools.d.ts +1 -0
- package/dist/tools/openclawTools.js +780 -0
- package/dist/tools/openclawTools.js.map +1 -1
- package/dist/tools/progressiveDiscoveryTools.js +3 -3
- package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
- package/dist/tools/researchOptimizerTools.d.ts +17 -0
- package/dist/tools/researchOptimizerTools.js +454 -0
- package/dist/tools/researchOptimizerTools.js.map +1 -0
- package/dist/tools/scraplingTools.d.ts +15 -0
- package/dist/tools/scraplingTools.js +278 -0
- package/dist/tools/scraplingTools.js.map +1 -0
- package/dist/tools/thompsonProtocolTools.d.ts +58 -0
- package/dist/tools/thompsonProtocolTools.js +864 -0
- package/dist/tools/thompsonProtocolTools.js.map +1 -0
- package/dist/tools/toolRegistry.js +625 -0
- package/dist/tools/toolRegistry.js.map +1 -1
- package/dist/toolsetRegistry.js +14 -0
- package/dist/toolsetRegistry.js.map +1 -1
- package/package.json +6 -2
package/README.md
CHANGED
|
@@ -194,6 +194,92 @@ All analytics data is stored locally in `~/.nodebench/analytics.db` and never le
|
|
|
194
194
|
|
|
195
195
|
---
|
|
196
196
|
|
|
197
|
+
## Headless Engine API (v2.30.0)
|
|
198
|
+
|
|
199
|
+
NodeBench now ships a **headless, API-first Agentic Engine** — plug it into any client workflow and sell results, not software seats.
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
# Start MCP server with engine API on port 6276
|
|
203
|
+
npx nodebench-mcp --engine
|
|
204
|
+
|
|
205
|
+
# With auth token
|
|
206
|
+
npx nodebench-mcp --engine --engine-secret "your-token"
|
|
207
|
+
# or: ENGINE_SECRET=your-token npx nodebench-mcp --engine
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### API Endpoints
|
|
211
|
+
|
|
212
|
+
| Method | Path | Purpose |
|
|
213
|
+
|--------|------|---------|
|
|
214
|
+
| GET | `/` | Engine status, tool count, uptime |
|
|
215
|
+
| GET | `/api/health` | Health check |
|
|
216
|
+
| GET | `/api/tools` | List all available tools |
|
|
217
|
+
| POST | `/api/tools/:name` | Execute a single tool |
|
|
218
|
+
| GET | `/api/workflows` | List all 32 workflow chains |
|
|
219
|
+
| POST | `/api/workflows/:name` | Execute a workflow (with SSE streaming) |
|
|
220
|
+
| POST | `/api/sessions` | Create an isolated session |
|
|
221
|
+
| GET | `/api/sessions/:id` | Session status + call history |
|
|
222
|
+
| GET | `/api/sessions/:id/trace` | Full disclosure trace |
|
|
223
|
+
| GET | `/api/sessions/:id/report` | Conformance report |
|
|
224
|
+
| DELETE | `/api/sessions/:id` | End session |
|
|
225
|
+
| GET | `/api/presets` | List presets with tool counts |
|
|
226
|
+
|
|
227
|
+
### Quick Examples
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
# Execute a single tool
|
|
231
|
+
curl -X POST http://127.0.0.1:6276/api/tools/discover_tools \
|
|
232
|
+
-H "Content-Type: application/json" \
|
|
233
|
+
-d '{"args": {"query": "security audit"}, "preset": "full"}'
|
|
234
|
+
|
|
235
|
+
# Run a workflow with streaming
|
|
236
|
+
curl -N -X POST http://127.0.0.1:6276/api/workflows/fix_bug \
|
|
237
|
+
-H "Content-Type: application/json" \
|
|
238
|
+
-d '{"preset": "web_dev", "streaming": true}'
|
|
239
|
+
|
|
240
|
+
# Create a session, execute tools, get conformance report
|
|
241
|
+
SESSION=$(curl -s -X POST http://127.0.0.1:6276/api/sessions \
|
|
242
|
+
-H "Content-Type: application/json" \
|
|
243
|
+
-d '{"preset": "web_dev"}' | jq -r .sessionId)
|
|
244
|
+
|
|
245
|
+
curl -X POST "http://127.0.0.1:6276/api/tools/run_recon" \
|
|
246
|
+
-H "Content-Type: application/json" \
|
|
247
|
+
-d "{\"args\": {\"focusArea\": \"web\"}, \"sessionId\": \"$SESSION\"}"
|
|
248
|
+
|
|
249
|
+
curl "http://127.0.0.1:6276/api/sessions/$SESSION/report"
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
### Conformance Reports
|
|
253
|
+
|
|
254
|
+
Every workflow execution produces a conformance report scoring:
|
|
255
|
+
- **Step completeness** — did all required tools execute?
|
|
256
|
+
- **Quality gate** — did the quality gate pass?
|
|
257
|
+
- **Test layers** — were unit/integration/e2e results logged?
|
|
258
|
+
- **Flywheel** — was the methodology completed?
|
|
259
|
+
- **Learnings** — were findings banked for next time?
|
|
260
|
+
|
|
261
|
+
Grades: A (90+) / B (75+) / C (60+) / D (40+) / F (<40). Sell these reports as "Zero-bug deployment certificates" or "Automated WebMCP Conformance Reports."
|
|
262
|
+
|
|
263
|
+
### SSE Streaming
|
|
264
|
+
|
|
265
|
+
Workflow execution supports Server-Sent Events for real-time progress:
|
|
266
|
+
|
|
267
|
+
```
|
|
268
|
+
event: start
|
|
269
|
+
data: {"workflow":"fix_bug","totalSteps":7,"sessionId":"eng_..."}
|
|
270
|
+
|
|
271
|
+
event: step
|
|
272
|
+
data: {"stepIndex":0,"tool":"search_all_knowledge","status":"running"}
|
|
273
|
+
|
|
274
|
+
event: step
|
|
275
|
+
data: {"stepIndex":0,"tool":"search_all_knowledge","status":"complete","durationMs":42}
|
|
276
|
+
|
|
277
|
+
event: complete
|
|
278
|
+
data: {"totalSteps":7,"totalDurationMs":340,"conformanceScore":88,"grade":"B"}
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
---
|
|
282
|
+
|
|
197
283
|
## What You Get — The AI Flywheel
|
|
198
284
|
|
|
199
285
|
The default setup (no `--preset` flag) gives you **50 tools** that implement the complete [AI Flywheel](https://github.com/HomenShum/nodebench-ai/blob/main/AI_FLYWHEEL.md) methodology — two interlocking loops that compound quality over time:
|
package/dist/db.js
CHANGED
|
@@ -652,6 +652,75 @@ CREATE TABLE IF NOT EXISTS skill_sync_history (
|
|
|
652
652
|
CREATE INDEX IF NOT EXISTS idx_skills_skill_id ON skills(skill_id);
|
|
653
653
|
CREATE INDEX IF NOT EXISTS idx_skills_status ON skills(status);
|
|
654
654
|
CREATE INDEX IF NOT EXISTS idx_skill_sync_history_skill ON skill_sync_history(skill_id);
|
|
655
|
+
|
|
656
|
+
-- ═══════════════════════════════════════════
|
|
657
|
+
-- ENGINE CONTEXT PERSISTENCE
|
|
658
|
+
-- Conformance reports, workflow runs, content archive
|
|
659
|
+
-- ═══════════════════════════════════════════
|
|
660
|
+
|
|
661
|
+
CREATE TABLE IF NOT EXISTS engine_reports (
|
|
662
|
+
id TEXT PRIMARY KEY,
|
|
663
|
+
session_id TEXT NOT NULL,
|
|
664
|
+
workflow TEXT NOT NULL,
|
|
665
|
+
preset TEXT NOT NULL,
|
|
666
|
+
score REAL NOT NULL,
|
|
667
|
+
grade TEXT NOT NULL,
|
|
668
|
+
breakdown TEXT NOT NULL,
|
|
669
|
+
summary TEXT NOT NULL,
|
|
670
|
+
total_steps INTEGER NOT NULL,
|
|
671
|
+
successful_steps INTEGER NOT NULL,
|
|
672
|
+
failed_steps INTEGER NOT NULL,
|
|
673
|
+
total_duration_ms INTEGER NOT NULL,
|
|
674
|
+
generated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
675
|
+
);
|
|
676
|
+
|
|
677
|
+
CREATE INDEX IF NOT EXISTS idx_engine_reports_workflow ON engine_reports(workflow);
|
|
678
|
+
CREATE INDEX IF NOT EXISTS idx_engine_reports_generated ON engine_reports(generated_at);
|
|
679
|
+
|
|
680
|
+
CREATE TABLE IF NOT EXISTS engine_workflow_runs (
|
|
681
|
+
id TEXT PRIMARY KEY,
|
|
682
|
+
session_id TEXT NOT NULL,
|
|
683
|
+
workflow TEXT NOT NULL,
|
|
684
|
+
preset TEXT NOT NULL,
|
|
685
|
+
step_count INTEGER NOT NULL,
|
|
686
|
+
success_count INTEGER NOT NULL DEFAULT 0,
|
|
687
|
+
failed_count INTEGER NOT NULL DEFAULT 0,
|
|
688
|
+
duration_ms INTEGER NOT NULL DEFAULT 0,
|
|
689
|
+
context_loaded TEXT,
|
|
690
|
+
outcome_summary TEXT,
|
|
691
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
692
|
+
);
|
|
693
|
+
|
|
694
|
+
CREATE INDEX IF NOT EXISTS idx_engine_runs_workflow ON engine_workflow_runs(workflow);
|
|
695
|
+
CREATE INDEX IF NOT EXISTS idx_engine_runs_created ON engine_workflow_runs(created_at);
|
|
696
|
+
|
|
697
|
+
CREATE TABLE IF NOT EXISTS content_archive (
|
|
698
|
+
id TEXT PRIMARY KEY,
|
|
699
|
+
title TEXT NOT NULL,
|
|
700
|
+
content_type TEXT NOT NULL,
|
|
701
|
+
digest TEXT,
|
|
702
|
+
full_content TEXT,
|
|
703
|
+
themes TEXT,
|
|
704
|
+
workflow TEXT,
|
|
705
|
+
published_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
706
|
+
engagement TEXT
|
|
707
|
+
);
|
|
708
|
+
|
|
709
|
+
CREATE INDEX IF NOT EXISTS idx_content_archive_type ON content_archive(content_type);
|
|
710
|
+
CREATE INDEX IF NOT EXISTS idx_content_archive_published ON content_archive(published_at);
|
|
711
|
+
|
|
712
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS content_archive_fts USING fts5(
|
|
713
|
+
title,
|
|
714
|
+
digest,
|
|
715
|
+
themes,
|
|
716
|
+
content='content_archive',
|
|
717
|
+
content_rowid='rowid'
|
|
718
|
+
);
|
|
719
|
+
|
|
720
|
+
CREATE TRIGGER IF NOT EXISTS content_archive_fts_insert AFTER INSERT ON content_archive BEGIN
|
|
721
|
+
INSERT INTO content_archive_fts(rowid, title, digest, themes)
|
|
722
|
+
VALUES (new.rowid, new.title, COALESCE(new.digest, ''), COALESCE(new.themes, ''));
|
|
723
|
+
END;
|
|
655
724
|
`;
|
|
656
725
|
export function getDb() {
|
|
657
726
|
if (_db)
|
package/dist/db.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"db.js","sourceRoot":"","sources":["../src/db.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AACtC,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC,IAAI,GAAG,GAA6B,IAAI,CAAC;AAEzC,MAAM,UAAU,GAAG
|
|
1
|
+
{"version":3,"file":"db.js","sourceRoot":"","sources":["../src/db.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AACtC,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC,IAAI,GAAG,GAA6B,IAAI,CAAC;AAEzC,MAAM,UAAU,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA8sBlB,CAAC;AAEF,MAAM,UAAU,KAAK;IACnB,IAAI,GAAG;QAAE,OAAO,GAAG,CAAC;IACpB,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,YAAY,CAAC,CAAC;IAC1C,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACpC,GAAG,GAAG,IAAI,QAAQ,CAAC,IAAI,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC,CAAC;IAC9C,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAErB,gFAAgF;IAChF,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,GAAG,CAAC,OAAO,CAAC,0CAA0C,CAAC,CAAC,GAAG,EAAS,CAAC;QAChF,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAC,8CAA8C,CAAC,CAAC,GAAG,EAAS,CAAC;QACvF,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;YAC9B,GAAG,CAAC,IAAI,CAAC,sEAAsE,CAAC,CAAC;QACnF,CAAC;QACD,MAAM,CAAC,GAAG,GAAG,CAAC,OAAO,CAAC,gCAAgC,CAAC,CAAC,GAAG,EAAS,CAAC;QACrE,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,oCAAoC,CAAC,CAAC,GAAG,EAAS,CAAC;QAC5E,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;YAC5B,GAAG,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;IAAC,MAAM,CAAC,CAAC,sEAAsE,CAAC,CAAC;IAElF,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,UAAU;IACxB,MAAM,EAAE,GAAG,KAAK,EAAE,CAAC;IACnB,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,2CAA2C,CAAC,CAAC,GAAG,EAAS,CAAC;IACjF,OAAO,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;AACrB,CAAC;AAED,MAAM,UAAU,KAAK,CAAC,MAAc;IAClC,OAAO,GAAG,MAAM,IAAI,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;AAC9E,CAAC"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Engine Conformance Scoring
|
|
3
|
+
*
|
|
4
|
+
* Computes a deterministic conformance score from a session's tool call history.
|
|
5
|
+
* Used to generate "Conformance Reports" — the sellable output of the engine.
|
|
6
|
+
*/
|
|
7
|
+
import type { EngineSession } from "./session.js";
|
|
8
|
+
export interface ConformanceBreakdown {
|
|
9
|
+
stepsCompleted: boolean;
|
|
10
|
+
qualityGatePassed: boolean;
|
|
11
|
+
testLayersLogged: boolean;
|
|
12
|
+
flywheelCompleted: boolean;
|
|
13
|
+
learningsRecorded: boolean;
|
|
14
|
+
reconPerformed: boolean;
|
|
15
|
+
verificationCycleStarted: boolean;
|
|
16
|
+
noErrors: boolean;
|
|
17
|
+
}
|
|
18
|
+
export interface ConformanceReport {
|
|
19
|
+
sessionId: string;
|
|
20
|
+
preset: string;
|
|
21
|
+
score: number;
|
|
22
|
+
grade: "A" | "B" | "C" | "D" | "F";
|
|
23
|
+
breakdown: ConformanceBreakdown;
|
|
24
|
+
summary: string;
|
|
25
|
+
totalSteps: number;
|
|
26
|
+
successfulSteps: number;
|
|
27
|
+
failedSteps: number;
|
|
28
|
+
totalDurationMs: number;
|
|
29
|
+
generatedAt: number;
|
|
30
|
+
}
|
|
31
|
+
export declare function computeConformance(session: EngineSession, expectedSteps?: number): ConformanceReport;
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Engine Conformance Scoring
|
|
3
|
+
*
|
|
4
|
+
* Computes a deterministic conformance score from a session's tool call history.
|
|
5
|
+
* Used to generate "Conformance Reports" — the sellable output of the engine.
|
|
6
|
+
*/
|
|
7
|
+
const FLYWHEEL_TOOLS = [
|
|
8
|
+
"start_flywheel",
|
|
9
|
+
"log_flywheel_step",
|
|
10
|
+
"run_quality_gate",
|
|
11
|
+
];
|
|
12
|
+
const TEST_TOOLS = [
|
|
13
|
+
"log_test_result",
|
|
14
|
+
];
|
|
15
|
+
const LEARNING_TOOLS = [
|
|
16
|
+
"log_learning",
|
|
17
|
+
"save_session_note",
|
|
18
|
+
"search_all_knowledge",
|
|
19
|
+
];
|
|
20
|
+
const RECON_TOOLS = [
|
|
21
|
+
"run_recon",
|
|
22
|
+
"log_recon_finding",
|
|
23
|
+
];
|
|
24
|
+
const VERIFICATION_TOOLS = [
|
|
25
|
+
"start_verification_cycle",
|
|
26
|
+
"log_verification_step",
|
|
27
|
+
];
|
|
28
|
+
const QUALITY_GATE_TOOLS = [
|
|
29
|
+
"run_quality_gate",
|
|
30
|
+
];
|
|
31
|
+
function hasToolCalled(history, toolNames) {
|
|
32
|
+
return history.some((r) => toolNames.includes(r.toolName) && r.status === "success");
|
|
33
|
+
}
|
|
34
|
+
function countByStatus(history, status) {
|
|
35
|
+
return history.filter((r) => r.status === status).length;
|
|
36
|
+
}
|
|
37
|
+
export function computeConformance(session, expectedSteps) {
|
|
38
|
+
const history = session.callHistory;
|
|
39
|
+
const successful = countByStatus(history, "success");
|
|
40
|
+
const failed = countByStatus(history, "error");
|
|
41
|
+
const total = history.length;
|
|
42
|
+
const totalDurationMs = history.reduce((sum, r) => sum + r.durationMs, 0);
|
|
43
|
+
const breakdown = {
|
|
44
|
+
stepsCompleted: expectedSteps ? successful >= expectedSteps : successful > 0,
|
|
45
|
+
qualityGatePassed: hasToolCalled(history, QUALITY_GATE_TOOLS),
|
|
46
|
+
testLayersLogged: hasToolCalled(history, TEST_TOOLS),
|
|
47
|
+
flywheelCompleted: hasToolCalled(history, FLYWHEEL_TOOLS),
|
|
48
|
+
learningsRecorded: hasToolCalled(history, LEARNING_TOOLS),
|
|
49
|
+
reconPerformed: hasToolCalled(history, RECON_TOOLS),
|
|
50
|
+
verificationCycleStarted: hasToolCalled(history, VERIFICATION_TOOLS),
|
|
51
|
+
noErrors: failed === 0,
|
|
52
|
+
};
|
|
53
|
+
// Score: each check is worth 12.5 points (8 checks × 12.5 = 100)
|
|
54
|
+
const checks = Object.values(breakdown);
|
|
55
|
+
const passed = checks.filter(Boolean).length;
|
|
56
|
+
const score = Math.round((passed / checks.length) * 100);
|
|
57
|
+
const grade = score >= 90 ? "A" :
|
|
58
|
+
score >= 75 ? "B" :
|
|
59
|
+
score >= 60 ? "C" :
|
|
60
|
+
score >= 40 ? "D" : "F";
|
|
61
|
+
const failedChecks = Object.entries(breakdown)
|
|
62
|
+
.filter(([, v]) => !v)
|
|
63
|
+
.map(([k]) => k.replace(/([A-Z])/g, " $1").toLowerCase().trim());
|
|
64
|
+
const summary = score === 100
|
|
65
|
+
? `All conformance checks passed. ${successful}/${total} tool calls succeeded in ${totalDurationMs}ms.`
|
|
66
|
+
: `Score ${score}/100 (${grade}). Missing: ${failedChecks.join(", ")}. ${successful}/${total} calls succeeded.`;
|
|
67
|
+
return {
|
|
68
|
+
sessionId: session.id,
|
|
69
|
+
preset: session.preset,
|
|
70
|
+
score,
|
|
71
|
+
grade,
|
|
72
|
+
breakdown,
|
|
73
|
+
summary,
|
|
74
|
+
totalSteps: total,
|
|
75
|
+
successfulSteps: successful,
|
|
76
|
+
failedSteps: failed,
|
|
77
|
+
totalDurationMs,
|
|
78
|
+
generatedAt: Date.now(),
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
//# sourceMappingURL=conformance.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"conformance.js","sourceRoot":"","sources":["../../src/engine/conformance.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AA6BH,MAAM,cAAc,GAAG;IACrB,gBAAgB;IAChB,mBAAmB;IACnB,kBAAkB;CACnB,CAAC;AAEF,MAAM,UAAU,GAAG;IACjB,iBAAiB;CAClB,CAAC;AAEF,MAAM,cAAc,GAAG;IACrB,cAAc;IACd,mBAAmB;IACnB,sBAAsB;CACvB,CAAC;AAEF,MAAM,WAAW,GAAG;IAClB,WAAW;IACX,mBAAmB;CACpB,CAAC;AAEF,MAAM,kBAAkB,GAAG;IACzB,0BAA0B;IAC1B,uBAAuB;CACxB,CAAC;AAEF,MAAM,kBAAkB,GAAG;IACzB,kBAAkB;CACnB,CAAC;AAEF,SAAS,aAAa,CAAC,OAAyB,EAAE,SAAmB;IACnE,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;AACvF,CAAC;AAED,SAAS,aAAa,CAAC,OAAyB,EAAE,MAA2B;IAC3E,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;AAC3D,CAAC;AAED,MAAM,UAAU,kBAAkB,CAChC,OAAsB,EACtB,aAAsB;IAEtB,MAAM,OAAO,GAAG,OAAO,CAAC,WAAW,CAAC;IACpC,MAAM,UAAU,GAAG,aAAa,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IACrD,MAAM,MAAM,GAAG,aAAa,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAC/C,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;IAC7B,MAAM,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;IAE1E,MAAM,SAAS,GAAyB;QACtC,cAAc,EAAE,aAAa,CAAC,CAAC,CAAC,UAAU,IAAI,aAAa,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC;QAC5E,iBAAiB,EAAE,aAAa,CAAC,OAAO,EAAE,kBAAkB,CAAC;QAC7D,gBAAgB,EAAE,aAAa,CAAC,OAAO,EAAE,UAAU,CAAC;QACpD,iBAAiB,EAAE,aAAa,CAAC,OAAO,EAAE,cAAc,CAAC;QACzD,iBAAiB,EAAE,aAAa,CAAC,OAAO,EAAE,cAAc,CAAC;QACzD,cAAc,EAAE,aAAa,CAAC,OAAO,EAAE,WAAW,CAAC;QACnD,wBAAwB,EAAE,aAAa,CAAC,OAAO,EAAE,kBAAkB,CAAC;QACpE,QAAQ,EAAE,MAAM,KAAK,CAAC;KACvB,CAAC;IAEF,iEAAiE;IACjE,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IACxC,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAC7C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC;IAEzD,MAAM,KAAK,GACT,KAAK,IAAI,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACnB,KAAK,IAAI,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACnB,KAAK,IAAI,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACnB,KAAK,IAAI,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE1B,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC;SAC3C,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;SACrB,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC;IAEnE,MAAM,OAAO,GAAG,KAAK,KAAK,GAAG;QAC3B,CAAC,CAAC,kCAAkC,UAAU,IAAI,KAAK,4BAA4B,eAAe,KAAK;QACvG,CAAC,CAAC,SAAS,KAAK,SAAS,KAAK,eAAe,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,UAAU,IAAI,KAAK,mBAAmB,CAAC;IAElH,OAAO;QACL,SAAS,EAAE,OAAO,CAAC,EAAE;QACrB,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,KAAK;QACL,KAAK;QACL,SAAS;QACT,OAAO;QACP,UAAU,EAAE,KAAK;QACjB,eAAe,EAAE,UAAU;QAC3B,WAAW,EAAE,MAAM;QACnB,eAAe;QACf,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;KACxB,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Engine Context Bridge
|
|
3
|
+
*
|
|
4
|
+
* Connects the engine's ephemeral sessions to SQLite persistent storage.
|
|
5
|
+
* Loads accumulated context before workflow execution and persists outcomes after.
|
|
6
|
+
*/
|
|
7
|
+
import type { ConformanceReport } from "./conformance.js";
|
|
8
|
+
import type { ToolCallRecord } from "./session.js";
|
|
9
|
+
export interface SessionContext {
|
|
10
|
+
recentRuns: Array<{
|
|
11
|
+
workflow: string;
|
|
12
|
+
score: number;
|
|
13
|
+
grade: string;
|
|
14
|
+
durationMs: number;
|
|
15
|
+
createdAt: string;
|
|
16
|
+
}>;
|
|
17
|
+
relevantLearnings: Array<{
|
|
18
|
+
key: string;
|
|
19
|
+
content: string;
|
|
20
|
+
category: string;
|
|
21
|
+
}>;
|
|
22
|
+
conformanceTrend: {
|
|
23
|
+
direction: "improving" | "stable" | "regressing" | "insufficient_data";
|
|
24
|
+
avgScore: number;
|
|
25
|
+
runCount: number;
|
|
26
|
+
};
|
|
27
|
+
recentContentThemes: string[];
|
|
28
|
+
openGapCount: number;
|
|
29
|
+
}
|
|
30
|
+
export interface ContextHealth {
|
|
31
|
+
learningsCount: number;
|
|
32
|
+
recentRunScores: number[];
|
|
33
|
+
trendDirection: string;
|
|
34
|
+
contentArchiveSize: number;
|
|
35
|
+
daysSinceLastLearning: number | null;
|
|
36
|
+
workflowCoverage: Record<string, number>;
|
|
37
|
+
}
|
|
38
|
+
export declare function loadSessionContext(workflow: string, _preset: string): SessionContext;
|
|
39
|
+
export declare function persistSessionOutcome(sessionId: string, report: ConformanceReport, workflow: string, preset: string, callHistory?: ToolCallRecord[]): void;
|
|
40
|
+
export declare function getContextHealth(): ContextHealth;
|
|
41
|
+
export declare function archiveContent(title: string, contentType: string, digest: string, themes: string[], workflow?: string, fullContent?: string): void;
|
|
42
|
+
export declare function searchContentArchive(query: string, contentType?: string, limit?: number): Array<{
|
|
43
|
+
id: string;
|
|
44
|
+
title: string;
|
|
45
|
+
contentType: string;
|
|
46
|
+
digest: string;
|
|
47
|
+
themes: string[];
|
|
48
|
+
publishedAt: string;
|
|
49
|
+
}>;
|
|
50
|
+
export declare function getWorkflowHistory(workflow: string, limit?: number): Array<{
|
|
51
|
+
sessionId: string;
|
|
52
|
+
workflow: string;
|
|
53
|
+
preset: string;
|
|
54
|
+
score: number;
|
|
55
|
+
grade: string;
|
|
56
|
+
durationMs: number;
|
|
57
|
+
stepCount: number;
|
|
58
|
+
successCount: number;
|
|
59
|
+
failedCount: number;
|
|
60
|
+
createdAt: string;
|
|
61
|
+
}>;
|
|
62
|
+
export declare function searchLearnings(query: string, limit?: number): Array<{
|
|
63
|
+
key: string;
|
|
64
|
+
content: string;
|
|
65
|
+
category: string;
|
|
66
|
+
createdAt: string;
|
|
67
|
+
}>;
|