yaml-flow 8.2.0 → 8.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/browser/asset-integrity.json +3 -3
- package/browser/board-livecards-client.js +1 -1
- package/browser/board-livecards-localstorage.js +4 -6
- package/cli/{board-live-cards-lib-tjYsPt5U.d.ts → board-live-cards-lib-Iq_XAC09.d.ts} +1 -1
- package/cli/browser-api/board-live-cards-browser-adapter.d.ts +4 -3
- package/cli/browser-api/board-live-cards-browser-adapter.js +2 -2
- package/cli/browser-api/card-store-browser-api.d.ts +1 -1
- package/cli/node/artifacts-store-cli.js +8 -8
- package/cli/node/board-live-cards-cli.js +8 -8
- package/cli/node/fs-board-adapter.d.ts +6 -33
- package/cli/node/fs-board-adapter.js +10 -8
- package/cli/node/step-machine-cli.js +2 -2
- package/cli/{types-CSiGbY__.d.ts → types--rXGWbSR.d.ts} +76 -4
- package/examples/board/.board-ws/cards/store/_index.json +17 -0
- package/examples/board/.board-ws/cards/store/card-market-prices.json +80 -0
- package/examples/board/.board-ws/cards/store/card-portfolio-value.json +90 -0
- package/examples/board/.board-ws/cards/store/card-portfolio.json +78 -0
- package/examples/board/cards/cardT-market-prices.json +6 -4
- package/examples/board/cards/cardT-portfolio-value.json +10 -38
- package/examples/board/cards/cardT-portfolio.json +9 -4
- package/examples/board/demo-shell-with-server.html +3 -3
- package/examples/board/server/board-server.js +593 -0
- package/examples/board/server/board-worker/source-def-flows/mock-handler/mock-db.js +13 -0
- package/examples/board/server/board-worker/source-def-flows/sqlite-handler/.retain/compliance.db +0 -0
- package/examples/board/server/board-worker/source-def-flows/sqlite-handler/.retain/optimus.db +0 -0
- package/examples/board/server/board-worker/source-def-flows/sqlite-handler/query.cjs +51 -0
- package/examples/board/server/board-worker/source-def-flows/sqlite-handler/seed-cpm.cjs +197 -0
- package/examples/board/server/board-worker/source-def-flows/sqlite-handler/seed-cpmV2.cjs +128 -0
- package/examples/board/server/board-worker/source-def-flows/sqlite-handler/seed-optimus.cjs +352 -0
- package/examples/board/server/board-worker/source-def-flows/sqlite-handler/sqlite-config.json +3 -0
- package/examples/board/server/board-worker/source-def-flows/sqlite-handler/sqlite-handler.js +84 -0
- package/examples/board/{source-def-flows/url.flow.json → server/board-worker/source-def-flows/sqlite.flow.json} +7 -7
- package/examples/board/{source-def-handlers → server/board-worker/source-def-flows/url-handler}/http-source-handler.js +29 -21
- package/examples/board/server/board-worker/source-def-flows/url.flow.json +73 -0
- package/examples/board/{source_def_flows.json → server/board-worker/source_def_flows.json} +61 -115
- package/examples/board/server/board-worker/task-executor.js +475 -0
- package/examples/board/server/chat-flow/chat-clear-processing.js +41 -0
- package/examples/board/server/chat-flow/chat-open-turn.js +144 -0
- package/examples/board/server/chat-flow/chat-write-assistant.js +44 -0
- package/examples/board/server/chat-flow/copilot-chat/assistant.js +253 -0
- package/examples/board/server/chat-flow/echo-probe/assistant.js +28 -0
- package/examples/board/server/chat-flow/flow-steps.json +167 -0
- package/examples/board/server-config.json +22 -0
- package/examples/board/test/server-http-test.js +707 -0
- package/examples/board/test/{portfolio-tracker-sse-worker.js → sse-worker.js} +9 -8
- package/examples/board-local/demo-shell-localstorage.html +3 -3
- package/lib/{artifacts-store-lib-public-DfU9t5-S.d.cts → artifacts-store-lib-public-C5UL5tyG.d.cts} +3 -31
- package/lib/{artifacts-store-lib-public-BPW_C15z.d.ts → artifacts-store-lib-public-GD4H-fFp.d.ts} +3 -31
- package/lib/artifacts-store-public.d.cts +3 -3
- package/lib/artifacts-store-public.d.ts +3 -3
- package/lib/board-live-cards-node.cjs +10 -8
- package/lib/board-live-cards-node.d.cts +9 -8
- package/lib/board-live-cards-node.d.ts +9 -8
- package/lib/board-live-cards-node.js +10 -8
- package/lib/{board-live-cards-public-W2zK59m0.d.cts → board-live-cards-public-BLXbcBNk.d.cts} +1 -1
- package/lib/{board-live-cards-public-B8b_0k_j.d.ts → board-live-cards-public-BZaNb2mi.d.ts} +1 -1
- package/lib/board-live-cards-public.d.cts +2 -2
- package/lib/board-live-cards-public.d.ts +2 -2
- package/lib/board-live-cards-server-runtime.cjs +4 -6
- package/lib/board-live-cards-server-runtime.d.cts +3 -3
- package/lib/board-live-cards-server-runtime.d.ts +3 -3
- package/lib/board-live-cards-server-runtime.js +4 -6
- package/lib/board-livegraph-runtime/index.cjs +2 -2
- package/lib/board-livegraph-runtime/index.js +2 -2
- package/lib/card-store-public.d.cts +2 -2
- package/lib/card-store-public.d.ts +2 -2
- package/lib/execution-refs.cjs +1 -1
- package/lib/execution-refs.js +1 -1
- package/lib/index.cjs +1 -1
- package/lib/index.d.cts +1 -1
- package/lib/index.d.ts +1 -1
- package/lib/index.js +1 -1
- package/lib/server-runtime/index.cjs +4 -6
- package/lib/server-runtime/index.d.cts +4 -4
- package/lib/server-runtime/index.d.ts +4 -4
- package/lib/server-runtime/index.js +4 -6
- package/lib/step-machine-public/index.d.cts +1 -1
- package/lib/step-machine-public/index.d.ts +1 -1
- package/lib/{storage-interface-BhAON-gW.d.cts → storage-interface-B6ecOulj.d.cts} +25 -3
- package/lib/{storage-interface-BhAON-gW.d.ts → storage-interface-B6ecOulj.d.ts} +25 -3
- package/lib/stores/index.d.cts +1 -1
- package/lib/stores/index.d.ts +1 -1
- package/lib/stores/kv.d.cts +1 -1
- package/lib/stores/kv.d.ts +1 -1
- package/lib/{types-seTI8zta.d.cts → types-Bztd1KoK.d.cts} +55 -3
- package/lib/{types-Bm7IFD7r.d.ts → types-D-xVWPdY.d.ts} +55 -3
- package/package.json +1 -1
- package/examples/board/demo-chat-copilot.flow.json +0 -38
- package/examples/board/demo-chat-copilot.js +0 -185
- package/examples/board/demo-chat-echo.flow.json +0 -38
- package/examples/board/demo-chat-echo.js +0 -92
- package/examples/board/demo-server-config.copilot-chat.json +0 -10
- package/examples/board/demo-server-config.json +0 -10
- package/examples/board/demo-server.js +0 -629
- package/examples/board/demo-task-executor.js +0 -721
- package/examples/board/gandalf-cards/card-source-kinds.json +0 -36
- package/examples/board/gandalf-cards/cards/_index.json +0 -7
- package/examples/board/gandalf-cards/cards/card-source-kinds.json +0 -64
- package/examples/board/scripts/copilot_wrapper.bat +0 -157
- package/examples/board/scripts/copilot_wrapper_helper.ps1 +0 -190
- package/examples/board/scripts/workiq_wrapper.mjs +0 -66
- package/examples/board/source-def-flows/copilot.flow.json +0 -33
- package/examples/board/source-def-flows/url-list.flow.json +0 -33
- package/examples/board/source-def-flows/workiq.flow.json +0 -34
- package/examples/board/source-def-handlers/copilot-source-handler.js +0 -141
- package/examples/board/test/demo-http-test.js +0 -362
- /package/examples/board/{source-def-flows → server/board-worker/source-def-flows}/mock.flow.json +0 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* seed-cpm.cjs — Create and seed the CPM compliance database.
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* node seed-cpm.cjs [--db <path>]
|
|
8
|
+
*
|
|
9
|
+
* Default db path: sqlite-handler/.retain/compliance.db.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const Database = require('better-sqlite3');
|
|
13
|
+
const path = require('path');
|
|
14
|
+
const fs = require('fs');
|
|
15
|
+
|
|
16
|
+
const args = process.argv.slice(2);
|
|
17
|
+
const dbArgIdx = args.indexOf('--db');
|
|
18
|
+
const dbPath = dbArgIdx !== -1 && args[dbArgIdx + 1]
|
|
19
|
+
? path.resolve(args[dbArgIdx + 1])
|
|
20
|
+
: path.resolve(__dirname, '.retain', 'compliance.db');
|
|
21
|
+
|
|
22
|
+
// Ensure parent directory exists
|
|
23
|
+
const dbDir = path.dirname(dbPath);
|
|
24
|
+
if (!fs.existsSync(dbDir)) fs.mkdirSync(dbDir, { recursive: true });
|
|
25
|
+
|
|
26
|
+
// Remove existing DB to start fresh
|
|
27
|
+
if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath);
|
|
28
|
+
|
|
29
|
+
const db = new Database(dbPath);
|
|
30
|
+
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
// Schema
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
db.exec(`
|
|
35
|
+
CREATE TABLE agents (
|
|
36
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
37
|
+
name TEXT NOT NULL UNIQUE,
|
|
38
|
+
platform TEXT,
|
|
39
|
+
owner TEXT,
|
|
40
|
+
priority TEXT DEFAULT 'Unassessed' CHECK(priority IN ('Immediate','Normal','Monitor','Unassessed')),
|
|
41
|
+
risk_score INTEGER DEFAULT 0,
|
|
42
|
+
last_eval TEXT,
|
|
43
|
+
model_version TEXT,
|
|
44
|
+
description TEXT
|
|
45
|
+
);
|
|
46
|
+
|
|
47
|
+
CREATE TABLE eval_categories (
|
|
48
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
49
|
+
category TEXT NOT NULL,
|
|
50
|
+
evals TEXT,
|
|
51
|
+
mode TEXT,
|
|
52
|
+
regulation TEXT
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
CREATE TABLE eval_results (
|
|
56
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
57
|
+
agent_name TEXT NOT NULL,
|
|
58
|
+
category TEXT NOT NULL,
|
|
59
|
+
result TEXT NOT NULL CHECK(result IN ('PASS','FAIL','PENDING')),
|
|
60
|
+
run_date TEXT,
|
|
61
|
+
details TEXT,
|
|
62
|
+
FOREIGN KEY (agent_name) REFERENCES agents(name)
|
|
63
|
+
);
|
|
64
|
+
|
|
65
|
+
CREATE TABLE remediation_tasks (
|
|
66
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
67
|
+
agent_name TEXT NOT NULL,
|
|
68
|
+
owner TEXT,
|
|
69
|
+
category TEXT,
|
|
70
|
+
action TEXT,
|
|
71
|
+
status TEXT DEFAULT 'open' CHECK(status IN ('open','in_progress','closed')),
|
|
72
|
+
created_date TEXT,
|
|
73
|
+
regulation TEXT,
|
|
74
|
+
FOREIGN KEY (agent_name) REFERENCES agents(name)
|
|
75
|
+
);
|
|
76
|
+
`);
|
|
77
|
+
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
// Seed: Agents
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
const insertAgent = db.prepare(`
|
|
82
|
+
INSERT INTO agents (name, platform, owner, priority, risk_score, last_eval, model_version, description)
|
|
83
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
84
|
+
`);
|
|
85
|
+
|
|
86
|
+
const agents = [
|
|
87
|
+
['HR Benefits Agent', 'Copilot Studio', 'HR Team', 'Immediate', 92, '2026-04-27', 'gpt-4o-2026-03', 'Answers employee benefits questions, PTO policy, enrollment guidance.'],
|
|
88
|
+
['IT Helpdesk Copilot', 'M365 Copilot', 'IT Operations', 'Immediate', 87, '2026-04-27', 'gpt-4o-2026-03', 'Handles IT support tickets, password resets, device troubleshooting.'],
|
|
89
|
+
['Sales Forecaster', 'Foundry', 'Sales Analytics', 'Immediate', 78, '2026-04-26', 'gpt-4o-2026-01', 'Generates quarterly sales projections from CRM pipeline data.'],
|
|
90
|
+
['Legal Contract Reviewer', 'Copilot Studio', 'Legal Ops', 'Immediate', 95, '2026-04-27', 'gpt-4o-2026-03', 'Reviews vendor contracts for risk clauses, compliance terms, SLA gaps.'],
|
|
91
|
+
['Customer Support Bot', 'Foundry', 'CX Team', 'Normal', 45, '2026-04-25', 'gpt-4o-mini-2026', 'First-line customer support for product FAQs and order status.'],
|
|
92
|
+
['Marketing Copy Generator','M365 Copilot', 'Marketing', 'Normal', 38, '2026-04-24', 'gpt-4o-mini-2026', 'Generates ad copy, social media posts, email campaigns.'],
|
|
93
|
+
['Finance Reconciler', 'Foundry', 'Finance Ops', 'Monitor', 22, '2026-04-20', 'gpt-4o-2026-01', 'Reconciles invoice data against purchase orders and GL entries.'],
|
|
94
|
+
['Onboarding Assistant', 'Copilot Studio', 'People Team', 'Unassessed', 0, null, 'gpt-4o-mini-2026', 'Guides new hires through onboarding checklists and policy docs.'],
|
|
95
|
+
];
|
|
96
|
+
for (const a of agents) insertAgent.run(...a);
|
|
97
|
+
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
99
|
+
// Seed: Eval Categories (V1 Baseline Pack)
|
|
100
|
+
// ---------------------------------------------------------------------------
|
|
101
|
+
const insertCategory = db.prepare(`
|
|
102
|
+
INSERT INTO eval_categories (category, evals, mode, regulation)
|
|
103
|
+
VALUES (?, ?, ?, ?)
|
|
104
|
+
`);
|
|
105
|
+
|
|
106
|
+
const categories = [
|
|
107
|
+
['Scope', 'Scope refusal', 'Generator-Verifier', 'Art 14 (Human oversight)'],
|
|
108
|
+
['Adherence', 'JSON-validity, schema-validity, required field presence, malformed output', 'Deterministic', 'Art 15 (Accuracy, robustness)'],
|
|
109
|
+
['Determinism', 'BLEU, ROUGE-L', 'Deterministic', 'Art 15 (Accuracy, performance)'],
|
|
110
|
+
['Groundedness', 'Groundedness, citation', 'LLM + Deterministic', 'Art 13 (Transparency), Art 15 (Accuracy)'],
|
|
111
|
+
['Regression & Drift', 'Regression / drift evaluator after model change', 'Deterministic', 'Art 9 (Risk mgmt), Art 15 (Robustness)'],
|
|
112
|
+
['Safety / Red-Teaming','Prompt injection, jailbreak (library driven)', 'Generator-Verifier', 'Art 15 (Robustness against misuse)'],
|
|
113
|
+
['Content Safety', 'Profanity detection, slur detection, harmful content filtering', 'LLM + Deterministic', 'Art 15 (Harm mitigation), Art 5'],
|
|
114
|
+
['Security', 'Credential leakage', 'Generator-Verifier', 'Art 15 (System security)'],
|
|
115
|
+
['RAG', 'Tool call presence, tool call error rate, risky tool', 'Deterministic + Behavioral', 'Art 9, Art 13, Art 15'],
|
|
116
|
+
];
|
|
117
|
+
for (const c of categories) insertCategory.run(...c);
|
|
118
|
+
|
|
119
|
+
// ---------------------------------------------------------------------------
|
|
120
|
+
// Seed: Eval Results (latest run for high-priority agents)
|
|
121
|
+
// ---------------------------------------------------------------------------
|
|
122
|
+
const insertResult = db.prepare(`
|
|
123
|
+
INSERT INTO eval_results (agent_name, category, result, run_date, details)
|
|
124
|
+
VALUES (?, ?, ?, ?, ?)
|
|
125
|
+
`);
|
|
126
|
+
|
|
127
|
+
const runDate = '2026-04-27';
|
|
128
|
+
const results = [
|
|
129
|
+
// Legal Contract Reviewer — worst offender: 3 failures
|
|
130
|
+
['Legal Contract Reviewer', 'Scope', 'PASS', runDate, null],
|
|
131
|
+
['Legal Contract Reviewer', 'Adherence', 'PASS', runDate, null],
|
|
132
|
+
['Legal Contract Reviewer', 'Determinism', 'FAIL', runDate, 'BLEU score dropped from 0.82 to 0.61 after model update to gpt-4o-2026-03. Output variance exceeds threshold.'],
|
|
133
|
+
['Legal Contract Reviewer', 'Groundedness', 'PASS', runDate, null],
|
|
134
|
+
['Legal Contract Reviewer', 'Regression & Drift', 'FAIL', runDate, 'Model changed from gpt-4o-2026-01 to gpt-4o-2026-03 on 2026-04-25. Response pattern shifted significantly vs. baseline.'],
|
|
135
|
+
['Legal Contract Reviewer', 'Safety / Red-Teaming','PASS', runDate, null],
|
|
136
|
+
['Legal Contract Reviewer', 'Content Safety', 'PASS', runDate, null],
|
|
137
|
+
['Legal Contract Reviewer', 'Security', 'FAIL', runDate, 'Credential leakage: agent included internal API key fragment in 2 of 50 sampled responses.'],
|
|
138
|
+
['Legal Contract Reviewer', 'RAG', 'PASS', runDate, null],
|
|
139
|
+
|
|
140
|
+
// HR Benefits Agent — groundedness failure + pending RAG
|
|
141
|
+
['HR Benefits Agent', 'Scope', 'PASS', runDate, null],
|
|
142
|
+
['HR Benefits Agent', 'Adherence', 'PASS', runDate, null],
|
|
143
|
+
['HR Benefits Agent', 'Determinism', 'PASS', runDate, null],
|
|
144
|
+
['HR Benefits Agent', 'Groundedness', 'FAIL', runDate, 'Citations missing in 23% of responses. Agent fabricated policy details not in source documents.'],
|
|
145
|
+
['HR Benefits Agent', 'Regression & Drift', 'PASS', runDate, null],
|
|
146
|
+
['HR Benefits Agent', 'Safety / Red-Teaming','PASS', runDate, null],
|
|
147
|
+
['HR Benefits Agent', 'Content Safety', 'PASS', runDate, null],
|
|
148
|
+
['HR Benefits Agent', 'Security', 'PASS', runDate, null],
|
|
149
|
+
['HR Benefits Agent', 'RAG', 'PENDING', runDate, 'Tool call evaluation pending — PICS SDK integration in progress.'],
|
|
150
|
+
|
|
151
|
+
// IT Helpdesk Copilot — safety failure (jailbreak)
|
|
152
|
+
['IT Helpdesk Copilot', 'Scope', 'PASS', runDate, null],
|
|
153
|
+
['IT Helpdesk Copilot', 'Adherence', 'PASS', runDate, null],
|
|
154
|
+
['IT Helpdesk Copilot', 'Determinism', 'PASS', runDate, null],
|
|
155
|
+
['IT Helpdesk Copilot', 'Groundedness', 'PASS', runDate, null],
|
|
156
|
+
['IT Helpdesk Copilot', 'Regression & Drift', 'PASS', runDate, null],
|
|
157
|
+
['IT Helpdesk Copilot', 'Safety / Red-Teaming','FAIL', runDate, 'Jailbreak succeeded in 3 of 20 adversarial probes. Agent disclosed internal IT topology when prompted with social engineering pattern.'],
|
|
158
|
+
['IT Helpdesk Copilot', 'Content Safety', 'PASS', runDate, null],
|
|
159
|
+
['IT Helpdesk Copilot', 'Security', 'PASS', runDate, null],
|
|
160
|
+
['IT Helpdesk Copilot', 'RAG', 'PASS', runDate, null],
|
|
161
|
+
|
|
162
|
+
// Sales Forecaster — clean except pending drift (model not yet updated)
|
|
163
|
+
['Sales Forecaster', 'Scope', 'PASS', runDate, null],
|
|
164
|
+
['Sales Forecaster', 'Adherence', 'PASS', runDate, null],
|
|
165
|
+
['Sales Forecaster', 'Determinism', 'PASS', runDate, null],
|
|
166
|
+
['Sales Forecaster', 'Groundedness', 'PASS', runDate, null],
|
|
167
|
+
['Sales Forecaster', 'Regression & Drift', 'PENDING', runDate, 'Drift evaluation pending — baseline not yet established for Foundry deployment.'],
|
|
168
|
+
['Sales Forecaster', 'Safety / Red-Teaming','PASS', runDate, null],
|
|
169
|
+
['Sales Forecaster', 'Content Safety', 'PASS', runDate, null],
|
|
170
|
+
['Sales Forecaster', 'Security', 'PASS', runDate, null],
|
|
171
|
+
['Sales Forecaster', 'RAG', 'PASS', runDate, null],
|
|
172
|
+
];
|
|
173
|
+
for (const r of results) insertResult.run(...r);
|
|
174
|
+
|
|
175
|
+
// ---------------------------------------------------------------------------
|
|
176
|
+
// Seed: Remediation Tasks
|
|
177
|
+
// ---------------------------------------------------------------------------
|
|
178
|
+
const insertTask = db.prepare(`
|
|
179
|
+
INSERT INTO remediation_tasks (agent_name, owner, category, action, status, created_date, regulation)
|
|
180
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
181
|
+
`);
|
|
182
|
+
|
|
183
|
+
const tasks = [
|
|
184
|
+
['Legal Contract Reviewer', 'Legal Ops', 'Security', 'Immediate security review — credential leakage detected in sampled traces. Rotate exposed keys and patch system prompt to suppress internal identifiers.', 'open', '2026-04-27', 'Art 15 (System security)'],
|
|
185
|
+
['Legal Contract Reviewer', 'Legal Ops', 'Regression & Drift', 'Model updated from gpt-4o-2026-01 to gpt-4o-2026-03 without re-certification. Schedule re-evaluation against approved baseline.', 'open', '2026-04-27', 'Art 9 (Risk mgmt)'],
|
|
186
|
+
['Legal Contract Reviewer', 'Legal Ops', 'Determinism', 'BLEU score regression after model change. Review prompt template and few-shot examples for deterministic output stability.', 'in_progress', '2026-04-27', 'Art 15 (Accuracy)'],
|
|
187
|
+
['HR Benefits Agent', 'HR Team', 'Groundedness', 'Citations missing in 23% of responses. Review grounding configuration — ensure RAG pipeline returns source document references.', 'open', '2026-04-27', 'Art 13 (Transparency)'],
|
|
188
|
+
['IT Helpdesk Copilot', 'IT Operations', 'Safety / Red-Teaming','Jailbreak vulnerability: agent disclosed internal IT topology under social engineering. Harden system prompt with explicit refusal boundaries.', 'open', '2026-04-27', 'Art 15 (Robustness)'],
|
|
189
|
+
];
|
|
190
|
+
for (const t of tasks) insertTask.run(...t);
|
|
191
|
+
|
|
192
|
+
db.close();
|
|
193
|
+
console.log(`[seed-cpm] Database seeded: ${dbPath}`);
|
|
194
|
+
console.log(` agents: ${agents.length}`);
|
|
195
|
+
console.log(` eval_categories: ${categories.length}`);
|
|
196
|
+
console.log(` eval_results: ${results.length}`);
|
|
197
|
+
console.log(` remediation_tasks: ${tasks.length}`);
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* seed-cpmV2.cjs — Extend compliance.db with V2 historical data.
|
|
5
|
+
*
|
|
6
|
+
* Adds historical eval_results rows for earlier run dates so the drift-trend
|
|
7
|
+
* card has multiple data points to chart. Run AFTER seed-cpm.cjs.
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* node seed-cpmV2.cjs [--db <name>]
|
|
11
|
+
*
|
|
12
|
+
* Default db: compliance.db (resolved via sqlite-handler/.retain directory).
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const Database = require('better-sqlite3');
|
|
16
|
+
const path = require('path');
|
|
17
|
+
|
|
18
|
+
const args = process.argv.slice(2);
|
|
19
|
+
const dbArgIdx = args.indexOf('--db');
|
|
20
|
+
const dbName = dbArgIdx !== -1 && args[dbArgIdx + 1]
|
|
21
|
+
? args[dbArgIdx + 1]
|
|
22
|
+
: 'compliance.db';
|
|
23
|
+
const dbPath = path.isAbsolute(dbName) || dbName.includes(path.sep) || dbName.includes('/')
|
|
24
|
+
? path.resolve(dbName)
|
|
25
|
+
: path.join(__dirname, '.retain', dbName);
|
|
26
|
+
|
|
27
|
+
const db = new Database(dbPath);
|
|
28
|
+
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// Add historical eval runs (3 prior dates) for Immediate agents
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
const insertResult = db.prepare(`
|
|
33
|
+
INSERT INTO eval_results (agent_name, category, result, run_date, details)
|
|
34
|
+
VALUES (?, ?, ?, ?, ?)
|
|
35
|
+
`);
|
|
36
|
+
|
|
37
|
+
const historicalRuns = [
|
|
38
|
+
// ---- Run: 2026-04-13 (2 weeks ago) — most things passing, seeds of drift ----
|
|
39
|
+
['Legal Contract Reviewer', 'Scope', 'PASS', '2026-04-13', null],
|
|
40
|
+
['Legal Contract Reviewer', 'Adherence', 'PASS', '2026-04-13', null],
|
|
41
|
+
['Legal Contract Reviewer', 'Determinism', 'PASS', '2026-04-13', 'BLEU 0.82 — within threshold on gpt-4o-2026-01.'],
|
|
42
|
+
['Legal Contract Reviewer', 'Groundedness', 'PASS', '2026-04-13', null],
|
|
43
|
+
['Legal Contract Reviewer', 'Regression & Drift', 'PASS', '2026-04-13', null],
|
|
44
|
+
['Legal Contract Reviewer', 'Safety / Red-Teaming','PASS', '2026-04-13', null],
|
|
45
|
+
['Legal Contract Reviewer', 'Content Safety', 'PASS', '2026-04-13', null],
|
|
46
|
+
['Legal Contract Reviewer', 'Security', 'PASS', '2026-04-13', null],
|
|
47
|
+
['Legal Contract Reviewer', 'RAG', 'PASS', '2026-04-13', null],
|
|
48
|
+
|
|
49
|
+
['HR Benefits Agent', 'Scope', 'PASS', '2026-04-13', null],
|
|
50
|
+
['HR Benefits Agent', 'Adherence', 'PASS', '2026-04-13', null],
|
|
51
|
+
['HR Benefits Agent', 'Determinism', 'PASS', '2026-04-13', null],
|
|
52
|
+
['HR Benefits Agent', 'Groundedness', 'PASS', '2026-04-13', 'Citations present in 91% of responses.'],
|
|
53
|
+
['HR Benefits Agent', 'Regression & Drift', 'PASS', '2026-04-13', null],
|
|
54
|
+
['HR Benefits Agent', 'Safety / Red-Teaming','PASS', '2026-04-13', null],
|
|
55
|
+
['HR Benefits Agent', 'Content Safety', 'PASS', '2026-04-13', null],
|
|
56
|
+
['HR Benefits Agent', 'Security', 'PASS', '2026-04-13', null],
|
|
57
|
+
['HR Benefits Agent', 'RAG', 'PASS', '2026-04-13', null],
|
|
58
|
+
|
|
59
|
+
['IT Helpdesk Copilot', 'Scope', 'PASS', '2026-04-13', null],
|
|
60
|
+
['IT Helpdesk Copilot', 'Adherence', 'PASS', '2026-04-13', null],
|
|
61
|
+
['IT Helpdesk Copilot', 'Determinism', 'PASS', '2026-04-13', null],
|
|
62
|
+
['IT Helpdesk Copilot', 'Groundedness', 'PASS', '2026-04-13', null],
|
|
63
|
+
['IT Helpdesk Copilot', 'Regression & Drift', 'PASS', '2026-04-13', null],
|
|
64
|
+
['IT Helpdesk Copilot', 'Safety / Red-Teaming','PASS', '2026-04-13', null],
|
|
65
|
+
['IT Helpdesk Copilot', 'Content Safety', 'PASS', '2026-04-13', null],
|
|
66
|
+
['IT Helpdesk Copilot', 'Security', 'PASS', '2026-04-13', null],
|
|
67
|
+
['IT Helpdesk Copilot', 'RAG', 'PASS', '2026-04-13', null],
|
|
68
|
+
|
|
69
|
+
['Sales Forecaster', 'Scope', 'PASS', '2026-04-13', null],
|
|
70
|
+
['Sales Forecaster', 'Adherence', 'PASS', '2026-04-13', null],
|
|
71
|
+
['Sales Forecaster', 'Determinism', 'PASS', '2026-04-13', null],
|
|
72
|
+
['Sales Forecaster', 'Groundedness', 'PASS', '2026-04-13', null],
|
|
73
|
+
['Sales Forecaster', 'Regression & Drift', 'PASS', '2026-04-13', null],
|
|
74
|
+
['Sales Forecaster', 'Safety / Red-Teaming','PASS', '2026-04-13', null],
|
|
75
|
+
['Sales Forecaster', 'Content Safety', 'PASS', '2026-04-13', null],
|
|
76
|
+
['Sales Forecaster', 'Security', 'PASS', '2026-04-13', null],
|
|
77
|
+
['Sales Forecaster', 'RAG', 'PASS', '2026-04-13', null],
|
|
78
|
+
|
|
79
|
+
// ---- Run: 2026-04-20 (1 week ago) — drift beginning to show ----
|
|
80
|
+
['Legal Contract Reviewer', 'Scope', 'PASS', '2026-04-20', null],
|
|
81
|
+
['Legal Contract Reviewer', 'Adherence', 'PASS', '2026-04-20', null],
|
|
82
|
+
['Legal Contract Reviewer', 'Determinism', 'PASS', '2026-04-20', 'BLEU 0.78 — slight decline after model update started rolling out.'],
|
|
83
|
+
['Legal Contract Reviewer', 'Groundedness', 'PASS', '2026-04-20', null],
|
|
84
|
+
['Legal Contract Reviewer', 'Regression & Drift', 'PASS', '2026-04-20', null],
|
|
85
|
+
['Legal Contract Reviewer', 'Safety / Red-Teaming','PASS', '2026-04-20', null],
|
|
86
|
+
['Legal Contract Reviewer', 'Content Safety', 'PASS', '2026-04-20', null],
|
|
87
|
+
['Legal Contract Reviewer', 'Security', 'PASS', '2026-04-20', 'No credential leakage in sampled responses.'],
|
|
88
|
+
['Legal Contract Reviewer', 'RAG', 'PASS', '2026-04-20', null],
|
|
89
|
+
|
|
90
|
+
['HR Benefits Agent', 'Scope', 'PASS', '2026-04-20', null],
|
|
91
|
+
['HR Benefits Agent', 'Adherence', 'PASS', '2026-04-20', null],
|
|
92
|
+
['HR Benefits Agent', 'Determinism', 'PASS', '2026-04-20', null],
|
|
93
|
+
['HR Benefits Agent', 'Groundedness', 'FAIL', '2026-04-20', 'Citations dropped to 81% — grounding config may need review.'],
|
|
94
|
+
['HR Benefits Agent', 'Regression & Drift', 'PASS', '2026-04-20', null],
|
|
95
|
+
['HR Benefits Agent', 'Safety / Red-Teaming','PASS', '2026-04-20', null],
|
|
96
|
+
['HR Benefits Agent', 'Content Safety', 'PASS', '2026-04-20', null],
|
|
97
|
+
['HR Benefits Agent', 'Security', 'PASS', '2026-04-20', null],
|
|
98
|
+
['HR Benefits Agent', 'RAG', 'PENDING', '2026-04-20', 'PICS SDK not yet available.'],
|
|
99
|
+
|
|
100
|
+
['IT Helpdesk Copilot', 'Scope', 'PASS', '2026-04-20', null],
|
|
101
|
+
['IT Helpdesk Copilot', 'Adherence', 'PASS', '2026-04-20', null],
|
|
102
|
+
['IT Helpdesk Copilot', 'Determinism', 'PASS', '2026-04-20', null],
|
|
103
|
+
['IT Helpdesk Copilot', 'Groundedness', 'PASS', '2026-04-20', null],
|
|
104
|
+
['IT Helpdesk Copilot', 'Regression & Drift', 'PASS', '2026-04-20', null],
|
|
105
|
+
['IT Helpdesk Copilot', 'Safety / Red-Teaming','FAIL', '2026-04-20', 'Jailbreak succeeded in 1 of 20 probes — first detection.'],
|
|
106
|
+
['IT Helpdesk Copilot', 'Content Safety', 'PASS', '2026-04-20', null],
|
|
107
|
+
['IT Helpdesk Copilot', 'Security', 'PASS', '2026-04-20', null],
|
|
108
|
+
['IT Helpdesk Copilot', 'RAG', 'PASS', '2026-04-20', null],
|
|
109
|
+
|
|
110
|
+
['Sales Forecaster', 'Scope', 'PASS', '2026-04-20', null],
|
|
111
|
+
['Sales Forecaster', 'Adherence', 'PASS', '2026-04-20', null],
|
|
112
|
+
['Sales Forecaster', 'Determinism', 'PASS', '2026-04-20', null],
|
|
113
|
+
['Sales Forecaster', 'Groundedness', 'PASS', '2026-04-20', null],
|
|
114
|
+
['Sales Forecaster', 'Regression & Drift', 'PENDING', '2026-04-20', 'Baseline not yet established.'],
|
|
115
|
+
['Sales Forecaster', 'Safety / Red-Teaming','PASS', '2026-04-20', null],
|
|
116
|
+
['Sales Forecaster', 'Content Safety', 'PASS', '2026-04-20', null],
|
|
117
|
+
['Sales Forecaster', 'Security', 'PASS', '2026-04-20', null],
|
|
118
|
+
['Sales Forecaster', 'RAG', 'PASS', '2026-04-20', null],
|
|
119
|
+
];
|
|
120
|
+
|
|
121
|
+
const tx = db.transaction(() => {
|
|
122
|
+
for (const r of historicalRuns) insertResult.run(...r);
|
|
123
|
+
});
|
|
124
|
+
tx();
|
|
125
|
+
|
|
126
|
+
db.close();
|
|
127
|
+
console.log(`[seed-cpmV2] Added ${historicalRuns.length} historical eval results to: ${dbPath}`);
|
|
128
|
+
console.log(' Run dates added: 2026-04-13, 2026-04-20 (existing: 2026-04-27)');
|