qualia-framework 6.2.10 → 6.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +1 -0
- package/CLAUDE.md +1 -0
- package/README.md +16 -23
- package/bin/cli.js +49 -2
- package/bin/command-surface.js +71 -0
- package/bin/harness-eval.js +296 -0
- package/bin/install.js +17 -20
- package/bin/knowledge-flush.js +21 -10
- package/bin/knowledge.js +1 -1
- package/bin/project-snapshot.js +20 -0
- package/bin/report-payload.js +18 -0
- package/bin/runtime-manifest.js +3 -0
- package/bin/state.js +31 -0
- package/bin/trust-score.js +3 -11
- package/bin/work-packet.js +228 -0
- package/docs/erp-contract.md +81 -1
- package/docs/onboarding.html +0 -11
- package/guide.md +14 -15
- package/hooks/fawzi-approval-guard.js +143 -0
- package/hooks/pre-deploy-gate.js +74 -1
- package/hooks/session-start.js +29 -1
- package/package.json +1 -1
- package/qualia-design/frontend.md +2 -2
- package/rules/codex-goal.md +1 -1
- package/rules/one-opinion.md +2 -2
- package/rules/speed.md +0 -1
- package/skills/qualia/SKILL.md +4 -4
- package/skills/qualia-feature/SKILL.md +1 -1
- package/skills/qualia-fix/SKILL.md +4 -4
- package/skills/qualia-learn/SKILL.md +1 -1
- package/skills/qualia-polish/REFERENCE.md +1 -1
- package/skills/qualia-polish/SKILL.md +19 -4
- package/skills/{qualia-vibe/scripts/extract.mjs → qualia-polish/scripts/vibe-extract.mjs} +4 -4
- package/skills/{qualia-vibe/scripts/tokens.mjs → qualia-polish/scripts/vibe-tokens.mjs} +6 -6
- package/skills/qualia-road/SKILL.md +15 -20
- package/skills/qualia-ship/SKILL.md +12 -5
- package/skills/qualia-verify/SKILL.md +9 -1
- package/templates/help.html +1 -12
- package/tests/bin.test.sh +144 -72
- package/tests/hooks.test.sh +81 -1
- package/tests/install-smoke.test.sh +13 -3
- package/tests/lib.test.sh +145 -3
- package/tests/published-install-smoke.test.sh +4 -3
- package/tests/refs.test.sh +9 -4
- package/tests/runner.js +29 -28
- package/tests/state.test.sh +68 -0
- package/skills/qualia-debug/SKILL.md +0 -193
- package/skills/qualia-flush/SKILL.md +0 -198
- package/skills/qualia-help/SKILL.md +0 -74
- package/skills/qualia-hook-gen/SKILL.md +0 -206
- package/skills/qualia-idk/SKILL.md +0 -166
- package/skills/qualia-issues/SKILL.md +0 -151
- package/skills/qualia-pause/SKILL.md +0 -68
- package/skills/qualia-resume/SKILL.md +0 -52
- package/skills/qualia-skill-new/SKILL.md +0 -173
- package/skills/qualia-triage/SKILL.md +0 -152
- package/skills/qualia-vibe/SKILL.md +0 -229
- package/skills/qualia-zoom/SKILL.md +0 -51
package/AGENTS.md
CHANGED
|
@@ -11,6 +11,7 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell + Elev
|
|
|
11
11
|
- Feature branches only — never push to main/master
|
|
12
12
|
- MVP first — build only what's asked
|
|
13
13
|
- Root cause on failures — no band-aids
|
|
14
|
+
- No proxy approval — employees cannot claim Fawzi approved; OWNER-only overrides require OWNER config
|
|
14
15
|
|
|
15
16
|
## Discoverable substrate (load on demand, not always)
|
|
16
17
|
- `/qualia-road` — workflow map, every command, when to use it
|
package/CLAUDE.md
CHANGED
|
@@ -11,6 +11,7 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell + Elev
|
|
|
11
11
|
- Feature branches only — never push to main/master
|
|
12
12
|
- MVP first — build only what's asked
|
|
13
13
|
- Root cause on failures — no band-aids
|
|
14
|
+
- No proxy approval — employees cannot claim Fawzi approved; OWNER-only overrides require OWNER config
|
|
14
15
|
|
|
15
16
|
## Discoverable substrate (load on demand, not always)
|
|
16
17
|
- `/qualia-road` — workflow map, every command, when to use it
|
package/README.md
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
# Qualia Framework v6.
|
|
1
|
+
# Qualia Framework v6.3.0
|
|
2
2
|
|
|
3
3
|
A harness engineering framework for Claude Code and OpenAI Codex. It installs into `~/.claude/` and/or `~/.codex/` and wraps your AI-assisted development workflow with structured planning, execution, verification, and deployment gates.
|
|
4
4
|
|
|
5
5
|
It is not an application framework like Rails or Next.js. It doesn't generate code, run servers, or process data. It's an opinionated workflow layer that tells Claude how to plan, build, and verify your projects end-to-end, from "tell me what you want to make" to "here's the handoff doc for your client."
|
|
6
6
|
|
|
7
|
+
**v6.3.0** — Harness hardening pass. Default install surface drops to 23 active skills, retired helper command sources are removed and pruned from older installs, `/qualia-polish --vibe` absorbs the separate vibe command, `harness-eval.js` writes scored eval artifacts, ERP reports/snapshots carry the latest eval score, and `state.js` refuses PASS when machine contract evidence is missing/failing or the verification report contains `INSUFFICIENT EVIDENCE`.
|
|
8
|
+
**v6.2.11** — Owner approval integrity. Fawzi's install code is now `QS-FAWZI-11`; employees cannot use `QUALIA_SHIP_FORCE=1`; deploy refusals say why and what to run next; and employee "Fawzi said OK" proxy-approval claims are silently counted for ERP policy review.
|
|
7
9
|
**v6.2.10** — Codex status line is now a publish-blocking install contract. Installer guarantees `[tui].status_line` in `~/.codex/config.toml`, `/qualia-doctor` verifies the native bottom line, and package smoke tests assert the Codex TUI segments are present.
|
|
8
10
|
**v6.2.9** — Codex hook noise + status line. Conditional PreToolUse hooks no longer status-message on every Bash call (Codex was printing 8 "Running hook…" lines on every command). Self-filtering added to `pre-deploy-gate.js` and `pre-push.js` so they never trip on unrelated commands (Claude's substring matcher was firing them on for-loop arguments). Installer now writes `[tui] status_line = [...]` to Codex's `config.toml` for the rich native bottom status line.
|
|
9
11
|
|
|
@@ -12,10 +14,10 @@ It is not an application framework like Rails or Next.js. It doesn't generate co
|
|
|
12
14
|
**v6.2.7** — Codex runtime compatibility. The installer now writes Codex-native hooks, TOML agents, bin scripts, rules, skills, templates, knowledge, guide, and role config under `~/.codex/`, not just `AGENTS.md`.
|
|
13
15
|
|
|
14
16
|
**The v5 line (preserved):**
|
|
15
|
-
- **v5.0**, alignment discipline. CONTEXT.md domain glossary, decisions/ ADRs,
|
|
17
|
+
- **v5.0**, alignment discipline. CONTEXT.md domain glossary, decisions/ ADRs, zoom/queue helper experiments, slim CLAUDE.md per Matt Pocock's instruction-budget rule, insights-driven hooks.
|
|
16
18
|
- **v5.1**, autonomous visual-polish loop. Screenshots a URL at three viewports, scores design dimensions with vision, fixes top issues, loops until pass or kill-switch. Multi-target installer (Claude Code + Codex AGENTS.md + Both).
|
|
17
19
|
- **v5.2**, polish-loop reliability. `--reduced-motion` capture flag, `--routes URL1,URL2` multi-route mode, first supervised end-to-end run.
|
|
18
|
-
- **v5.3**, Matt Pocock gaps closed.
|
|
20
|
+
- **v5.3**, Matt Pocock gaps closed. hook-generation utility experiment, `/qualia-optimize --deepen` Step 5b parallel-interface design (3 fan-out agents producing radically different interfaces).
|
|
19
21
|
- **v5.4-5.5**, token-discipline and plan-discipline. Cache-aware spawn ordering, scope-reduction prohibition, decision-coverage audit, requirement-coverage check.
|
|
20
22
|
- **v5.6**, Demo vs Full Project gate at kickoff. Mandatory discovery interview via `/qualia-discuss` in PROJECT MODE (8 questions for demos, 14 for full projects). Demo-extension branch in `/qualia-milestone` for client-signs-after-demo conversion.
|
|
21
23
|
- **v5.7**, `/qualia-feature` consolidates `/qualia-quick` + `/qualia-task` into one auto-scoped command.
|
|
@@ -24,7 +26,7 @@ It is not an application framework like Rails or Next.js. It doesn't generate co
|
|
|
24
26
|
- **v5.9.1**, kickoff UX fix. `/qualia-new` now opens with the Demo/Full/Quick gate as Step 1 (`AskUserQuestion`), then exactly one free-text pitch question, then mandatory hand-off to `/qualia-discuss` — no ad-hoc clarification questioning between them. The shape gate drives the whole downstream interview, so it must come first.
|
|
25
27
|
- **v5.9.2**, hook ordering + ERP payload fixes. `pre-push.js` self-gates against `branch-guard.js` so a blocked-push no longer leaves an orphan bot commit in local history. `qualia-report` ERP payload omits empty ISO datetime fields (`session_started_at`, `last_pushed_at`) instead of sending `''`, which the ERP validator rejected as 422.
|
|
26
28
|
- **v6.0.0**, audit + cleanup pass. See CHANGELOG for the full list. Highlights: uninstall/migrate manifests fixed, silent hook `catch{}` blocks now traced, phantom `rules/frontend.md` references replaced, `/qualia-learn` and `/qualia-map` declare their actually-used tools, `/qualia-plan` revision-cycle contradiction reconciled (max 2), `agents/planner.md` and `agents/qa-browser.md` MCP tools declared in frontmatter, `rules/trust-boundary.md` extracted, hardcoded `/tmp` paths replaced with `mktemp`, fail-collect test runner, pre-v4 CHANGELOG archived.
|
|
27
|
-
- **v6.1.0**, `/qualia-vibe` adds a fast layout-preserving design pivot path and strengthens design-surface guards.
|
|
29
|
+
- **v6.1.0**, `/qualia-polish --vibe` adds a fast layout-preserving design pivot path and strengthens design-surface guards.
|
|
28
30
|
- **v6.2.0**, removes hook-created bot commits. The ERP/report contract is `/qualia-report` POSTs, not passive git scraping of `tracking.json`.
|
|
29
31
|
- **v6.2.1**, active-surface drift guard. README, guide, onboarding, ERP contract, road, milestone, polish, verify, and roadmapper wording now align with v6.2 behavior; refs tests fail on the stale claims.
|
|
30
32
|
- **v6.2.2**, Framework/Memory/ERP clarity. ERP can hand a work packet into Framework sessions, reports can carry ERP-native IDs, and public npm install proof is a first-class release smoke.
|
|
@@ -104,37 +106,27 @@ Two human gates per project. One halt case (gap-cycle limit exceeded on a failin
|
|
|
104
106
|
|
|
105
107
|
```
|
|
106
108
|
/qualia # Mechanical state router — "what's my next command?"
|
|
107
|
-
/qualia
|
|
108
|
-
/qualia-
|
|
109
|
-
/qualia-resume # Pick up where you left off
|
|
109
|
+
/qualia # Also handles "resume", "pause", and "I don't know what's going on" diagnostics
|
|
110
|
+
/qualia-road # View and navigate the project road (journey/milestone/phase status)
|
|
110
111
|
```
|
|
111
112
|
|
|
112
113
|
### Quality & shortcuts
|
|
113
114
|
|
|
114
115
|
```
|
|
115
|
-
/qualia-debug # Structured debugging
|
|
116
116
|
/qualia-fix # Repair broken existing behavior (root cause -> patch -> verify -> report)
|
|
117
117
|
/qualia-review # Production audit (scored diagnostics)
|
|
118
118
|
/qualia-optimize # Deep optimization pass (parallel specialist agents, --deepen mode with parallel-interface design)
|
|
119
119
|
/qualia-feature # Auto-scoped new feature build (inline for trivia, fresh spawn for 1-5 files)
|
|
120
120
|
/qualia-test # Generate or run tests (--tdd mode for test-first workflow)
|
|
121
|
-
/qualia-zoom # Focus on a single file or function with full context
|
|
122
|
-
/qualia-issues # Break a phase plan into vertical-slice GitHub issues
|
|
123
|
-
/qualia-triage # Triage open issues through the ready-for-agent state machine
|
|
124
|
-
/qualia-road # View and navigate the project road (journey/milestone/phase status)
|
|
125
121
|
/qualia-polish --loop # Autonomous visual-polish loop: screenshot, vision-eval, fix, repeat
|
|
126
|
-
/qualia-vibe # Fast aesthetic pivot (~3 min): swap design tokens, keep layout. Supports --extract URL (reverse-engineer DESIGN.md) and --sync (code → DESIGN.md back-sync)
|
|
127
|
-
/qualia-hook-gen # Convert a CLAUDE.md/rules instruction into a deterministic hook
|
|
122
|
+
/qualia-polish --vibe # Fast aesthetic pivot (~3 min): swap design tokens, keep layout. Supports --extract URL (reverse-engineer DESIGN.md) and --sync (code → DESIGN.md back-sync)
|
|
128
123
|
```
|
|
129
124
|
|
|
130
125
|
### Knowledge & meta
|
|
131
126
|
|
|
132
127
|
```
|
|
133
128
|
/qualia-learn # Save a pattern, fix, or client pref to the active install home's knowledge/
|
|
134
|
-
/qualia-flush # Promote daily-log raw entries into curated knowledge concepts
|
|
135
129
|
/qualia-postmortem # Self-heal — when verification fails, propose rule/skill deltas
|
|
136
|
-
/qualia-skill-new # Author a new Qualia skill or agent
|
|
137
|
-
/qualia-help # Open the framework reference in your browser
|
|
138
130
|
```
|
|
139
131
|
|
|
140
132
|
### Team-specific
|
|
@@ -165,11 +157,11 @@ Project
|
|
|
165
157
|
|
|
166
158
|
**Why it matters:** non-technical team members can follow the ladder from any entry point. `/qualia` and `/qualia-milestone` render JOURNEY.md as a visual ladder with current position highlighted. In the ERP, the primary operational dates are project deadline, milestone deadline, and employee shift submission date; framework tasks stay internal to agent execution.
|
|
167
159
|
|
|
168
|
-
## What's Inside (v6.
|
|
160
|
+
## What's Inside (v6.3.0)
|
|
169
161
|
|
|
170
|
-
- **
|
|
162
|
+
- **23 installed skills**, focused into Road (new / plan / build / verify / milestone / polish / ship / handoff / report), depth (discuss, research, map), navigation (qualia router + road), quality (fix, review, optimize with `--deepen` parallel-interface design, feature, test), design (`qualia-polish --loop` and `--vibe`), health/reporting (doctor, learn, postmortem), and Zoho workflow support. Retired helper commands are pruned on install rather than exposed as default slash commands.
|
|
171
163
|
- **9 agents** (each runs in fresh context): planner, builder, verifier, qa-browser, researcher, research-synthesizer, roadmapper, plan-checker, visual-evaluator
|
|
172
|
-
- **
|
|
164
|
+
- **12 hooks** (pure Node.js, cross-platform): session-start, auto-update, git-guardrails, branch-guard, pre-push tracking stamp, migration-guard, pre-deploy-gate, stop-session-log, fawzi-approval-guard, vercel-account-guard, env-empty-guard, supabase-destructive-guard
|
|
173
165
|
- **10 installed rules** (`rules/`): grounding, security, infrastructure, deployment, speed, architecture, trust-boundary, codex-goal, one-opinion, and always-on command-output transparency.
|
|
174
166
|
- **7 lazy-loaded design files** (`qualia-design/`): design-laws, design-brand, design-product, design-rubric, design-reference, frontend, graphics — `Read` on demand by design-aware skills/agents only.
|
|
175
167
|
- **25 template files**: project.md, journey.md, plan.md (story-file format), state.md, DESIGN.md, CONTEXT.md (domain glossary), work-packet.md (ERP-approved session context), decisions/ADR-template.md, tracking.json (with `milestone_name` + `milestones[]`), requirements.md (multi-milestone), roadmap.md (current milestone only), phase-context.md, 4 project-type templates (website, ai-agent, voice-agent, mobile-app), 5 research-project templates (STACK, FEATURES, ARCHITECTURE, PITFALLS, SUMMARY), knowledge templates, help.html
|
|
@@ -210,9 +202,10 @@ Splitting planner, builder, and verifier into separate agents with separate cont
|
|
|
210
202
|
|
|
211
203
|
### Production-Grade Hooks
|
|
212
204
|
|
|
213
|
-
All
|
|
205
|
+
All 12 hooks are real ops engineering, not theoretical:
|
|
214
206
|
|
|
215
207
|
- **Pre-deploy gate** — TypeScript, lint, tests, build, and `service_role` leak scan before `vercel --prod`
|
|
208
|
+
- **Fawzi approval guard** — Silently counts employee proxy-approval claims for ERP review
|
|
216
209
|
- **Session start** — Shows project state, next command, update notices, and health warnings at session start
|
|
217
210
|
- **Auto-update** — Daily update check with cached failures so offline/npm issues do not slow every command
|
|
218
211
|
- **Git guardrails** — Blocks destructive git operations like force-push to main/master, `git clean -fd`, and `rm -rf .git`
|
|
@@ -234,7 +227,7 @@ Plans are grouped into waves for parallel execution. No fancy DAG solver — the
|
|
|
234
227
|
|
|
235
228
|
### Diagnostic Intelligence
|
|
236
229
|
|
|
237
|
-
`/qualia
|
|
230
|
+
`/qualia` is a real diagnostician (not a router alias). When the user's confusion is about *understanding the situation*, it spawns two isolated scans in parallel — one reads only `.planning/`, the other reads only source code — then synthesizes a plain-language "What I see / What I think is happening / What to do next" diagnosis. Catches plan↔code drift that a state-only router can't see.
|
|
238
231
|
|
|
239
232
|
## Architecture
|
|
240
233
|
|
|
@@ -243,7 +236,7 @@ npx qualia-framework@latest install
|
|
|
243
236
|
|
|
|
244
237
|
v
|
|
245
238
|
~/.claude/ and/or ~/.codex/
|
|
246
|
-
├── skills/
|
|
239
|
+
├── skills/ 23 installed skills (each may ship SKILL.md + REFERENCE.md + scripts/ + fixtures/)
|
|
247
240
|
├── agents/ 9 agent definitions (Claude .md, Codex .toml)
|
|
248
241
|
├── hooks/ 11 Node.js hooks — cross-platform (no bash dependency)
|
|
249
242
|
├── bin/ state.js + qualia-ui.js + statusline.js + knowledge.js + knowledge-flush.js + slop-detect.mjs + planning-hygiene.js + plan-contract.js + agent-runs.js + ERP/report helpers
|
package/bin/cli.js
CHANGED
|
@@ -205,6 +205,7 @@ const QUALIA_HOOK_FILES = [
|
|
|
205
205
|
"pre-deploy-gate.js",
|
|
206
206
|
"git-guardrails.js",
|
|
207
207
|
"stop-session-log.js",
|
|
208
|
+
"fawzi-approval-guard.js",
|
|
208
209
|
"env-empty-guard.js",
|
|
209
210
|
"supabase-destructive-guard.js",
|
|
210
211
|
"vercel-account-guard.js",
|
|
@@ -499,7 +500,7 @@ async function cmdUninstall() {
|
|
|
499
500
|
|
|
500
501
|
function getDefaultTeam() {
|
|
501
502
|
return {
|
|
502
|
-
"QS-FAWZI-
|
|
503
|
+
"QS-FAWZI-11": { name: "Fawzi Goussous", role: "OWNER", description: "Company owner. Full access. Can push to main, approve deploys, edit secrets." },
|
|
503
504
|
"QS-HASAN-02": { name: "Hasan", role: "EMPLOYEE", description: "Developer. Feature branches only. Cannot push to main or edit .env files." },
|
|
504
505
|
"QS-MOAYAD-03": { name: "Moayad", role: "EMPLOYEE", description: "Developer. Feature branches only. Cannot push to main or edit .env files." },
|
|
505
506
|
"QS-RAMA-04": { name: "Rama", role: "EMPLOYEE", description: "Developer. Feature branches only. Cannot push to main or edit .env files." },
|
|
@@ -1206,6 +1207,23 @@ function cmdProjectSnapshot() {
|
|
|
1206
1207
|
process.exit(typeof r.status === "number" ? r.status : 1);
|
|
1207
1208
|
}
|
|
1208
1209
|
|
|
1210
|
+
function cmdWorkPacket() {
|
|
1211
|
+
const installedScript = path.join(primaryInstallHome(), "bin", "work-packet.js");
|
|
1212
|
+
const localScript = path.join(__dirname, "work-packet.js");
|
|
1213
|
+
const script = fs.existsSync(installedScript) ? installedScript : localScript;
|
|
1214
|
+
if (!fs.existsSync(script)) {
|
|
1215
|
+
console.log(` ${RED}✗${RESET} work-packet.js not available`);
|
|
1216
|
+
console.log(` ${DIM}Run: npx qualia-framework@latest install${RESET}`);
|
|
1217
|
+
process.exit(1);
|
|
1218
|
+
}
|
|
1219
|
+
const args = process.argv.slice(3);
|
|
1220
|
+
const r = spawnSync(process.execPath, [script, ...args], {
|
|
1221
|
+
stdio: "inherit",
|
|
1222
|
+
shell: false,
|
|
1223
|
+
});
|
|
1224
|
+
process.exit(typeof r.status === "number" ? r.status : 1);
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1209
1227
|
function cmdPlanningHygiene() {
|
|
1210
1228
|
const installedScript = path.join(primaryInstallHome(), "bin", "planning-hygiene.js");
|
|
1211
1229
|
const localScript = path.join(__dirname, "planning-hygiene.js");
|
|
@@ -1240,6 +1258,23 @@ function cmdTrust() {
|
|
|
1240
1258
|
process.exit(typeof r.status === "number" ? r.status : 1);
|
|
1241
1259
|
}
|
|
1242
1260
|
|
|
1261
|
+
function cmdHarnessEval() {
|
|
1262
|
+
const installedScript = path.join(primaryInstallHome(), "bin", "harness-eval.js");
|
|
1263
|
+
const localScript = path.join(__dirname, "harness-eval.js");
|
|
1264
|
+
const script = fs.existsSync(installedScript) ? installedScript : localScript;
|
|
1265
|
+
if (!fs.existsSync(script)) {
|
|
1266
|
+
console.log(` ${RED}✗${RESET} harness-eval.js not available`);
|
|
1267
|
+
console.log(` ${DIM}Run: npx qualia-framework@latest install${RESET}`);
|
|
1268
|
+
process.exit(1);
|
|
1269
|
+
}
|
|
1270
|
+
const args = process.argv.slice(3);
|
|
1271
|
+
const r = spawnSync(process.execPath, [script, ...args], {
|
|
1272
|
+
stdio: "inherit",
|
|
1273
|
+
shell: false,
|
|
1274
|
+
});
|
|
1275
|
+
process.exit(typeof r.status === "number" ? r.status : 1);
|
|
1276
|
+
}
|
|
1277
|
+
|
|
1243
1278
|
function cmdFlush() {
|
|
1244
1279
|
const flushScript = path.join(primaryInstallHome(), "bin", "knowledge-flush.js");
|
|
1245
1280
|
if (!fs.existsSync(flushScript)) {
|
|
@@ -1282,12 +1317,14 @@ function cmdDoctor() {
|
|
|
1282
1317
|
"bin/state.js",
|
|
1283
1318
|
"bin/qualia-ui.js",
|
|
1284
1319
|
"bin/statusline.js",
|
|
1320
|
+
"bin/command-surface.js",
|
|
1285
1321
|
"bin/knowledge.js",
|
|
1286
1322
|
"bin/knowledge-flush.js",
|
|
1287
1323
|
"bin/state-ledger.js",
|
|
1288
1324
|
"bin/plan-contract.js",
|
|
1289
1325
|
"bin/contract-runner.js",
|
|
1290
1326
|
"bin/trust-score.js",
|
|
1327
|
+
"bin/harness-eval.js",
|
|
1291
1328
|
"bin/erp-retry.js",
|
|
1292
1329
|
"bin/report-payload.js",
|
|
1293
1330
|
"bin/project-snapshot.js",
|
|
@@ -1518,10 +1555,12 @@ function cmdHelp() {
|
|
|
1518
1555
|
console.log(` qualia-framework ${TEAL}set-erp-key${RESET} Save/enable the ERP API key`);
|
|
1519
1556
|
console.log(` qualia-framework ${TEAL}erp-ping${RESET} Verify ERP connectivity + API key`);
|
|
1520
1557
|
console.log(` qualia-framework ${TEAL}erp-flush${RESET} Retry queued ERP report uploads (${DIM}show|clear${RESET})`);
|
|
1558
|
+
console.log(` qualia-framework ${TEAL}work-packet${RESET} Pull/read ERP mission packet (${DIM}pull --project UUID${RESET})`);
|
|
1521
1559
|
console.log(` qualia-framework ${TEAL}project-snapshot${RESET} Export/upload ERP admin project progress snapshot (${DIM}--write|--upload${RESET})`);
|
|
1522
1560
|
console.log(` qualia-framework ${TEAL}planning-hygiene${RESET} Scan/organize .planning artifacts (${DIM}scan|organize --write${RESET})`);
|
|
1523
1561
|
console.log(` qualia-framework ${TEAL}doctor${RESET} Health-check the install (files, hooks, settings)`);
|
|
1524
1562
|
console.log(` qualia-framework ${TEAL}trust${RESET} Score install, state, contracts, memory, ERP (${DIM}--json${RESET})`);
|
|
1563
|
+
console.log(` qualia-framework ${TEAL}eval${RESET} Write/run project harness eval scoring (${DIM}--run --write --json${RESET})`);
|
|
1525
1564
|
console.log(` qualia-framework ${TEAL}flush${RESET} Promote daily-log → curated knowledge (memory layer)`);
|
|
1526
1565
|
console.log("");
|
|
1527
1566
|
console.log(` ${WHITE}After install:${RESET}`);
|
|
@@ -1532,7 +1571,7 @@ function cmdHelp() {
|
|
|
1532
1571
|
console.log(` ${TG}/qualia-build${RESET} Build it (parallel tasks)`);
|
|
1533
1572
|
console.log(` ${TG}/qualia-verify${RESET} Verify it works`);
|
|
1534
1573
|
console.log(` ${TG}/qualia-polish${RESET} Design pass — any scope (component, route, app, redesign)`);
|
|
1535
|
-
console.log(` ${TG}/qualia-
|
|
1574
|
+
console.log(` ${TG}/qualia-fix${RESET} Root-cause broken behavior, patch, verify`);
|
|
1536
1575
|
console.log(` ${TG}/qualia-review${RESET} Production audit`);
|
|
1537
1576
|
console.log(` ${TG}/qualia-ship${RESET} Deploy to production`);
|
|
1538
1577
|
console.log(` ${TG}/qualia-report${RESET} Log your work`);
|
|
@@ -1595,6 +1634,10 @@ switch (cmd) {
|
|
|
1595
1634
|
case "snapshot":
|
|
1596
1635
|
cmdProjectSnapshot();
|
|
1597
1636
|
break;
|
|
1637
|
+
case "work-packet":
|
|
1638
|
+
case "packet":
|
|
1639
|
+
cmdWorkPacket();
|
|
1640
|
+
break;
|
|
1598
1641
|
case "planning-hygiene":
|
|
1599
1642
|
case "planning":
|
|
1600
1643
|
cmdPlanningHygiene();
|
|
@@ -1608,6 +1651,10 @@ switch (cmd) {
|
|
|
1608
1651
|
case "score":
|
|
1609
1652
|
cmdTrust();
|
|
1610
1653
|
break;
|
|
1654
|
+
case "eval":
|
|
1655
|
+
case "harness-eval":
|
|
1656
|
+
cmdHarnessEval();
|
|
1657
|
+
break;
|
|
1611
1658
|
case "flush":
|
|
1612
1659
|
case "knowledge-flush":
|
|
1613
1660
|
cmdFlush();
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Canonical Qualia command surface.
|
|
3
|
+
//
|
|
4
|
+
// The repo may keep retired skills for migration/history, but installs should
|
|
5
|
+
// expose the small active surface below. This gives users fewer commands while
|
|
6
|
+
// preserving compatibility cleanup for older installs.
|
|
7
|
+
|
|
8
|
+
const ACTIVE_SKILLS = [
|
|
9
|
+
"qualia",
|
|
10
|
+
"qualia-new",
|
|
11
|
+
"qualia-discuss",
|
|
12
|
+
"qualia-map",
|
|
13
|
+
"qualia-research",
|
|
14
|
+
"qualia-plan",
|
|
15
|
+
"qualia-build",
|
|
16
|
+
"qualia-verify",
|
|
17
|
+
"qualia-fix",
|
|
18
|
+
"qualia-feature",
|
|
19
|
+
"qualia-review",
|
|
20
|
+
"qualia-optimize",
|
|
21
|
+
"qualia-polish",
|
|
22
|
+
"qualia-test",
|
|
23
|
+
"qualia-milestone",
|
|
24
|
+
"qualia-ship",
|
|
25
|
+
"qualia-handoff",
|
|
26
|
+
"qualia-report",
|
|
27
|
+
"qualia-doctor",
|
|
28
|
+
"qualia-road",
|
|
29
|
+
"qualia-learn",
|
|
30
|
+
"qualia-postmortem",
|
|
31
|
+
"zoho-workflow",
|
|
32
|
+
];
|
|
33
|
+
|
|
34
|
+
const RETIRED_SKILLS = [
|
|
35
|
+
// Historical folds.
|
|
36
|
+
"qualia-task",
|
|
37
|
+
"qualia-quick",
|
|
38
|
+
"qualia-polish-loop",
|
|
39
|
+
"qualia-design",
|
|
40
|
+
"qualia-prd",
|
|
41
|
+
|
|
42
|
+
// v6.3 surface reduction: keep the behavior under sharper active commands.
|
|
43
|
+
"qualia-debug", // folded into qualia-fix for actionable repairs
|
|
44
|
+
"qualia-vibe", // folded into qualia-polish modes/documentation
|
|
45
|
+
"qualia-help", // guide/help files remain installed; no slash command
|
|
46
|
+
"qualia-idk", // folded into qualia router diagnostic branch
|
|
47
|
+
"qualia-pause", // folded into qualia router handoff branch
|
|
48
|
+
"qualia-resume", // folded into qualia router handoff branch
|
|
49
|
+
"qualia-zoom", // folded into qualia-map/qualia-review as an analysis mode
|
|
50
|
+
"qualia-issues", // GitHub queue externalization is not default workflow
|
|
51
|
+
"qualia-triage", // GitHub queue routing is not default workflow
|
|
52
|
+
"qualia-hook-gen", // framework-authoring utility, not employee default
|
|
53
|
+
"qualia-skill-new", // framework-authoring utility, not employee default
|
|
54
|
+
"qualia-flush", // available as qualia-framework flush / automation
|
|
55
|
+
];
|
|
56
|
+
|
|
57
|
+
function activeSkills() {
|
|
58
|
+
return [...ACTIVE_SKILLS];
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function retiredSkills() {
|
|
62
|
+
return [...RETIRED_SKILLS];
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
module.exports = {
|
|
66
|
+
ACTIVE_SKILLS,
|
|
67
|
+
RETIRED_SKILLS,
|
|
68
|
+
activeSkills,
|
|
69
|
+
retiredSkills,
|
|
70
|
+
};
|
|
71
|
+
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Qualia harness eval — deterministic project scoring + evidence artifact.
|
|
3
|
+
//
|
|
4
|
+
// This is the mechanical layer above trust-score and contract-runner. It writes
|
|
5
|
+
// one JSON/Markdown artifact that can be attached to reports, snapshots, and
|
|
6
|
+
// state transitions instead of relying on a prose "looks good" verdict.
|
|
7
|
+
|
|
8
|
+
const fs = require("fs");
|
|
9
|
+
const path = require("path");
|
|
10
|
+
const { spawnSync } = require("child_process");
|
|
11
|
+
const pc = require("./plan-contract.js");
|
|
12
|
+
const contractRunner = require("./contract-runner.js");
|
|
13
|
+
const trust = require("./trust-score.js");
|
|
14
|
+
|
|
15
|
+
function parseArgs(argv) {
|
|
16
|
+
const args = { cwd: process.cwd() };
|
|
17
|
+
for (let i = 2; i < argv.length; i++) {
|
|
18
|
+
const a = argv[i];
|
|
19
|
+
if (a === "--json") args.json = true;
|
|
20
|
+
else if (a === "--run") args.run = true;
|
|
21
|
+
else if (a === "--write") args.write = true;
|
|
22
|
+
else if (a === "--no-write") args.no_write = true;
|
|
23
|
+
else if (a === "--phase") args.phase = Number(argv[++i]);
|
|
24
|
+
else if (a.startsWith("--phase=")) args.phase = Number(a.slice("--phase=".length));
|
|
25
|
+
else if (a === "--cwd") args.cwd = argv[++i];
|
|
26
|
+
else if (a.startsWith("--cwd=")) args.cwd = a.slice("--cwd=".length);
|
|
27
|
+
}
|
|
28
|
+
return args;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function readJson(file, fallback = null) {
|
|
32
|
+
try { return JSON.parse(fs.readFileSync(file, "utf8")); } catch { return fallback; }
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function readText(file, fallback = "") {
|
|
36
|
+
try { return fs.readFileSync(file, "utf8"); } catch { return fallback; }
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function rel(root, file) {
|
|
40
|
+
return path.relative(root, file).replace(/\\/g, "/");
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function stateCheck(cwd) {
|
|
44
|
+
const r = spawnSync(process.execPath, [path.join(__dirname, "state.js"), "check"], {
|
|
45
|
+
cwd,
|
|
46
|
+
encoding: "utf8",
|
|
47
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
48
|
+
timeout: 5000,
|
|
49
|
+
});
|
|
50
|
+
const parsed = readJsonFromString(r.stdout);
|
|
51
|
+
return {
|
|
52
|
+
ok: r.status === 0 && parsed && parsed.ok === true,
|
|
53
|
+
status: r.status,
|
|
54
|
+
stdout: r.stdout,
|
|
55
|
+
stderr: r.stderr,
|
|
56
|
+
parsed,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function readJsonFromString(text) {
|
|
61
|
+
try { return JSON.parse(text); } catch { return null; }
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function latestEval(cwd) {
|
|
65
|
+
const dir = path.join(cwd, ".planning", "evals");
|
|
66
|
+
try {
|
|
67
|
+
const files = fs.readdirSync(dir)
|
|
68
|
+
.filter((f) => /^harness-eval-.*\.json$/.test(f))
|
|
69
|
+
.map((f) => path.join(dir, f))
|
|
70
|
+
.sort();
|
|
71
|
+
if (!files.length) return null;
|
|
72
|
+
return readJson(files[files.length - 1], null);
|
|
73
|
+
} catch {
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function addCheck(checks, name, weight, status, evidence, data) {
|
|
79
|
+
checks.push({
|
|
80
|
+
name,
|
|
81
|
+
weight,
|
|
82
|
+
status,
|
|
83
|
+
score: status === "pass" || status === "not_applicable" ? weight : status === "warn" ? Math.floor(weight / 2) : 0,
|
|
84
|
+
evidence,
|
|
85
|
+
...(data && typeof data === "object" ? data : {}),
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function verificationVerdict(text) {
|
|
90
|
+
if (!text) return "";
|
|
91
|
+
const explicit = text.match(/^result:\s*(PASS|FAIL)$/im);
|
|
92
|
+
if (explicit) return explicit[1].toLowerCase();
|
|
93
|
+
const md = text.match(/\bVerdict\b.*\b(PASS|FAIL)\b/i);
|
|
94
|
+
if (md) return md[1].toLowerCase();
|
|
95
|
+
if (/PHASE\s+PASS|ALL\s+CRITERIA\s+PASSED/i.test(text)) return "pass";
|
|
96
|
+
if (/PHASE\s+FAIL|FAILED\s+CRITERIA|GAPS\s+FOUND/i.test(text)) return "fail";
|
|
97
|
+
return "";
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function writeArtifacts(cwd, result) {
|
|
101
|
+
const dir = path.join(cwd, ".planning", "evals");
|
|
102
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
103
|
+
const stamp = result.generated_at.replace(/[:.]/g, "-");
|
|
104
|
+
const jsonPath = path.join(dir, `harness-eval-${stamp}.json`);
|
|
105
|
+
const mdPath = path.join(dir, `harness-eval-${stamp}.md`);
|
|
106
|
+
fs.writeFileSync(jsonPath, JSON.stringify(result, null, 2) + "\n");
|
|
107
|
+
const lines = [
|
|
108
|
+
`# Harness Eval`,
|
|
109
|
+
``,
|
|
110
|
+
`**Generated:** ${result.generated_at}`,
|
|
111
|
+
`**Phase:** ${result.phase || "n/a"}`,
|
|
112
|
+
`**Status:** ${result.status}`,
|
|
113
|
+
`**Score:** ${result.score}/100`,
|
|
114
|
+
``,
|
|
115
|
+
`## Checks`,
|
|
116
|
+
``,
|
|
117
|
+
`| Check | Status | Score | Evidence |`,
|
|
118
|
+
`|---|---:|---:|---|`,
|
|
119
|
+
...result.checks.map((c) => `| ${c.name} | ${c.status} | ${c.score}/${c.weight} | ${String(c.evidence || "").replace(/\|/g, "\\|")} |`),
|
|
120
|
+
``,
|
|
121
|
+
];
|
|
122
|
+
fs.writeFileSync(mdPath, lines.join("\n"));
|
|
123
|
+
result.artifacts = {
|
|
124
|
+
json: rel(cwd, jsonPath),
|
|
125
|
+
markdown: rel(cwd, mdPath),
|
|
126
|
+
};
|
|
127
|
+
fs.writeFileSync(jsonPath, JSON.stringify(result, null, 2) + "\n");
|
|
128
|
+
return result.artifacts;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function buildHarnessEval(options = {}) {
|
|
132
|
+
const cwd = path.resolve(options.cwd || process.cwd());
|
|
133
|
+
const planning = path.join(cwd, ".planning");
|
|
134
|
+
const generatedAt = options.now || new Date().toISOString();
|
|
135
|
+
const checks = [];
|
|
136
|
+
|
|
137
|
+
const state = stateCheck(cwd);
|
|
138
|
+
if (!fs.existsSync(planning)) {
|
|
139
|
+
addCheck(checks, "planning_state", 15, "fail", "No .planning directory; run /qualia-new");
|
|
140
|
+
return finalize({ cwd, generatedAt, phase: options.phase || 0, checks, statusOverride: "FAIL" }, options);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const tracking = readJson(path.join(planning, "tracking.json"), {});
|
|
144
|
+
const phase = Number(options.phase || state.parsed?.phase || tracking.phase || 0);
|
|
145
|
+
addCheck(
|
|
146
|
+
checks,
|
|
147
|
+
"planning_state",
|
|
148
|
+
15,
|
|
149
|
+
state.ok ? "pass" : "fail",
|
|
150
|
+
state.ok ? "state.js check returned ok" : "state.js check failed",
|
|
151
|
+
{ state: state.parsed || null }
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
const ledgerResult = (() => {
|
|
155
|
+
try {
|
|
156
|
+
const stateLedger = require("./state-ledger.js");
|
|
157
|
+
return stateLedger.validate(cwd);
|
|
158
|
+
} catch (e) {
|
|
159
|
+
return { ok: false, errors: [e.message] };
|
|
160
|
+
}
|
|
161
|
+
})();
|
|
162
|
+
addCheck(
|
|
163
|
+
checks,
|
|
164
|
+
"state_ledger",
|
|
165
|
+
10,
|
|
166
|
+
ledgerResult.ok ? "pass" : "fail",
|
|
167
|
+
ledgerResult.ok ? `${ledgerResult.count || 0} hash-chained event(s)` : (ledgerResult.errors || []).join("; ")
|
|
168
|
+
);
|
|
169
|
+
|
|
170
|
+
const planPath = path.join(planning, `phase-${phase}-plan.md`);
|
|
171
|
+
const contractPath = path.join(planning, `phase-${phase}-contract.json`);
|
|
172
|
+
const hasPlan = fs.existsSync(planPath);
|
|
173
|
+
const hasContract = fs.existsSync(contractPath);
|
|
174
|
+
let loadedContract = null;
|
|
175
|
+
|
|
176
|
+
if (!hasPlan && !hasContract) {
|
|
177
|
+
addCheck(checks, "plan_contract", 20, "not_applicable", `No current phase plan/contract for phase ${phase}`);
|
|
178
|
+
} else if (!hasContract) {
|
|
179
|
+
addCheck(checks, "plan_contract", 20, "fail", `Missing ${rel(cwd, contractPath)}`);
|
|
180
|
+
} else {
|
|
181
|
+
const loaded = pc.readContractFile(contractPath);
|
|
182
|
+
loadedContract = loaded.ok ? loaded.contract : null;
|
|
183
|
+
const errors = loaded.ok ? pc.validate(loaded.contract) : [loaded.message || loaded.error];
|
|
184
|
+
const drift = hasPlan ? pc.checkDrift(contractPath, planPath) : { ok: true, drift: false };
|
|
185
|
+
const ok = loaded.ok && errors.length === 0 && !(drift.ok && drift.drift);
|
|
186
|
+
addCheck(
|
|
187
|
+
checks,
|
|
188
|
+
"plan_contract",
|
|
189
|
+
20,
|
|
190
|
+
ok ? "pass" : "fail",
|
|
191
|
+
ok ? `${rel(cwd, contractPath)} valid and in sync` : [...errors, drift.drift ? "contract drifted from plan" : ""].filter(Boolean).join("; "),
|
|
192
|
+
{ contract: rel(cwd, contractPath) }
|
|
193
|
+
);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const evidencePath = path.join(planning, "evidence", `phase-${phase}-contract-run.json`);
|
|
197
|
+
let evidence = readJson(evidencePath, null);
|
|
198
|
+
if (loadedContract && options.run) {
|
|
199
|
+
evidence = contractRunner.runContract(loadedContract, { cwd });
|
|
200
|
+
}
|
|
201
|
+
if (!loadedContract) {
|
|
202
|
+
addCheck(checks, "machine_evidence", 20, "not_applicable", "No contract to execute");
|
|
203
|
+
} else if (evidence && evidence.ok === true) {
|
|
204
|
+
addCheck(checks, "machine_evidence", 20, "pass", `${rel(cwd, evidencePath)} passed ${evidence.checked || 0} check(s)`);
|
|
205
|
+
} else {
|
|
206
|
+
addCheck(
|
|
207
|
+
checks,
|
|
208
|
+
"machine_evidence",
|
|
209
|
+
20,
|
|
210
|
+
"fail",
|
|
211
|
+
evidence ? `${evidence.failed || "unknown"} failing machine check(s)` : `Missing ${rel(cwd, evidencePath)}`
|
|
212
|
+
);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
const verificationPath = path.join(planning, `phase-${phase}-verification.md`);
|
|
216
|
+
const verification = readText(verificationPath, "");
|
|
217
|
+
const ieCount = (verification.match(/INSUFFICIENT EVIDENCE/g) || []).length;
|
|
218
|
+
const verdict = verificationVerdict(verification);
|
|
219
|
+
if (!verification) {
|
|
220
|
+
addCheck(checks, "verification_report", 15, "fail", `Missing ${rel(cwd, verificationPath)}`);
|
|
221
|
+
} else if (ieCount > 0) {
|
|
222
|
+
addCheck(checks, "verification_report", 15, "fail", `${ieCount} insufficient-evidence marker(s)`);
|
|
223
|
+
} else if (verdict === "pass" || verdict === "fail") {
|
|
224
|
+
addCheck(checks, "verification_report", 15, verdict === "pass" ? "pass" : "warn", `${rel(cwd, verificationPath)} verdict=${verdict}`);
|
|
225
|
+
} else {
|
|
226
|
+
addCheck(checks, "verification_report", 15, "warn", `${rel(cwd, verificationPath)} has no machine-readable verdict`);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
const trustScore = trust.buildTrustScore(cwd);
|
|
230
|
+
addCheck(
|
|
231
|
+
checks,
|
|
232
|
+
"framework_trust_score",
|
|
233
|
+
10,
|
|
234
|
+
trustScore.status === "FAIL" ? "fail" : trustScore.status === "DEGRADED" ? "warn" : "pass",
|
|
235
|
+
`trust-score=${trustScore.score}/100 status=${trustScore.status}`,
|
|
236
|
+
{ trust_score: trustScore.score }
|
|
237
|
+
);
|
|
238
|
+
|
|
239
|
+
const hasErpId = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(tracking.erp_project_id || "");
|
|
240
|
+
const hasProjectKey = !!(tracking.project_id || tracking.project);
|
|
241
|
+
addCheck(
|
|
242
|
+
checks,
|
|
243
|
+
"erp_linkage",
|
|
244
|
+
10,
|
|
245
|
+
hasErpId ? "pass" : hasProjectKey ? "warn" : "fail",
|
|
246
|
+
hasErpId ? "tracking.json has erp_project_id UUID" : hasProjectKey ? "project key exists, ERP UUID missing" : "missing project/project_id for ERP correlation"
|
|
247
|
+
);
|
|
248
|
+
|
|
249
|
+
return finalize({ cwd, generatedAt, phase, checks }, options);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
function finalize(base, options) {
|
|
253
|
+
const score = Math.max(0, Math.min(100, base.checks.reduce((n, c) => n + (c.score || 0), 0)));
|
|
254
|
+
const failed = base.checks.filter((c) => c.status === "fail").length;
|
|
255
|
+
const warned = base.checks.filter((c) => c.status === "warn").length;
|
|
256
|
+
const status = base.statusOverride || (failed ? "FAIL" : score >= 85 ? "PASS" : warned ? "WARN" : "PASS");
|
|
257
|
+
const result = {
|
|
258
|
+
ok: status !== "FAIL",
|
|
259
|
+
status,
|
|
260
|
+
score,
|
|
261
|
+
phase: base.phase,
|
|
262
|
+
generated_at: base.generatedAt,
|
|
263
|
+
checks: base.checks,
|
|
264
|
+
};
|
|
265
|
+
if (options.write && !options.no_write && fs.existsSync(path.join(base.cwd, ".planning"))) {
|
|
266
|
+
writeArtifacts(base.cwd, result);
|
|
267
|
+
}
|
|
268
|
+
return result;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
function printHuman(result) {
|
|
272
|
+
console.log(`Harness eval: ${result.score}/100 (${result.status})`);
|
|
273
|
+
for (const c of result.checks) {
|
|
274
|
+
console.log(`${c.name}: ${c.status} (${c.score}/${c.weight}) — ${c.evidence}`);
|
|
275
|
+
}
|
|
276
|
+
if (result.artifacts) {
|
|
277
|
+
console.log(`Artifacts: ${result.artifacts.json}, ${result.artifacts.markdown}`);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
function main(argv) {
|
|
282
|
+
const args = parseArgs(argv);
|
|
283
|
+
const result = buildHarnessEval(args);
|
|
284
|
+
if (args.json) console.log(JSON.stringify(result, null, 2));
|
|
285
|
+
else printHuman(result);
|
|
286
|
+
return result.ok ? 0 : 1;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
module.exports = {
|
|
290
|
+
buildHarnessEval,
|
|
291
|
+
latestEval,
|
|
292
|
+
verificationVerdict,
|
|
293
|
+
};
|
|
294
|
+
|
|
295
|
+
if (require.main === module) process.exit(main(process.argv));
|
|
296
|
+
|