avorelo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +56 -0
- package/bin/avorelo +9 -0
- package/package.json +135 -0
- package/scripts/README.md +40 -0
- package/scripts/cco-dashboard.js +252 -0
- package/scripts/cco-status.js +430 -0
- package/scripts/lib/activation/account-state.js +37 -0
- package/scripts/lib/activation/activation-runner.js +546 -0
- package/scripts/lib/activation/activation-self-healing.js +480 -0
- package/scripts/lib/activation/activation-state.js +83 -0
- package/scripts/lib/activation/activation-summary.js +191 -0
- package/scripts/lib/activation/adapters/claude-code.js +77 -0
- package/scripts/lib/activation/adapters/codex-cli.js +52 -0
- package/scripts/lib/activation/adapters/cursor.js +37 -0
- package/scripts/lib/activation/adapters/github-agent.js +39 -0
- package/scripts/lib/activation/adapters/terminal.js +42 -0
- package/scripts/lib/activation/adapters/vscode.js +39 -0
- package/scripts/lib/activation/adapters/windsurf.js +37 -0
- package/scripts/lib/activation/ai-surface-detector.js +151 -0
- package/scripts/lib/activation/connect-account.js +145 -0
- package/scripts/lib/activation/detect-environment.js +75 -0
- package/scripts/lib/activation/detect-hosts.js +62 -0
- package/scripts/lib/activation/format-activation-output.js +109 -0
- package/scripts/lib/activation/next-action.js +43 -0
- package/scripts/lib/activation/repair-engine.js +219 -0
- package/scripts/lib/activation-distribution-readiness.js +507 -0
- package/scripts/lib/adapter-conformance.js +176 -0
- package/scripts/lib/adapter-readiness.js +417 -0
- package/scripts/lib/adapter-safety-boundaries.js +335 -0
- package/scripts/lib/adapter-technical-readiness-gate.js +205 -0
- package/scripts/lib/agent-access-governance.js +455 -0
- package/scripts/lib/agent-enforcement.js +765 -0
- package/scripts/lib/agent-policy-profile.js +210 -0
- package/scripts/lib/agent-security/action-evaluator.js +507 -0
- package/scripts/lib/agent-security/adapter-registry.js +98 -0
- package/scripts/lib/agent-security/auto-policy.js +139 -0
- package/scripts/lib/agent-security/bounded-scan.js +93 -0
- package/scripts/lib/agent-security/enforcement-adapter.js +174 -0
- package/scripts/lib/agent-security/enforcement-engine.js +1129 -0
- package/scripts/lib/agent-security/file-write-adapter.js +183 -0
- package/scripts/lib/agent-security/file-write-rules.js +178 -0
- package/scripts/lib/agent-security/index.js +3342 -0
- package/scripts/lib/agent-security/instruction-risk.js +181 -0
- package/scripts/lib/agent-security/mcp-action-adapter.js +185 -0
- package/scripts/lib/agent-security/mcp-action-rules.js +184 -0
- package/scripts/lib/agent-security/package-action-adapter.js +175 -0
- package/scripts/lib/agent-security/package-action-rules.js +233 -0
- package/scripts/lib/agent-security/performance.js +148 -0
- package/scripts/lib/agent-security/permission-minimizer.js +403 -0
- package/scripts/lib/agent-security/scan-cache.js +74 -0
- package/scripts/lib/agent-security/source-trust.js +146 -0
- package/scripts/lib/ai-install-prompt.js +288 -0
- package/scripts/lib/ai-workspace-hygiene.js +1499 -0
- package/scripts/lib/alpha-activation.js +520 -0
- package/scripts/lib/alpha-feedback.js +263 -0
- package/scripts/lib/alpha-readiness-gate.js +332 -0
- package/scripts/lib/anti-gaming.js +169 -0
- package/scripts/lib/artifact-health.js +431 -0
- package/scripts/lib/attribution.js +180 -0
- package/scripts/lib/audit.js +289 -0
- package/scripts/lib/avorelo-skill-registry.js +810 -0
- package/scripts/lib/batch-jobs.js +71 -0
- package/scripts/lib/brain-pack.js +578 -0
- package/scripts/lib/brand-boundary.js +424 -0
- package/scripts/lib/brand.js +74 -0
- package/scripts/lib/browser-capability.js +1048 -0
- package/scripts/lib/browser-proof-preflight.js +321 -0
- package/scripts/lib/cache-readiness.js +187 -0
- package/scripts/lib/canonical-reentry.js +162 -0
- package/scripts/lib/capability-packs.js +314 -0
- package/scripts/lib/capability-recommender.js +512 -0
- package/scripts/lib/capability-registry.js +1059 -0
- package/scripts/lib/carry-forward-surfacing.js +194 -0
- package/scripts/lib/ccusage-adapter.js +188 -0
- package/scripts/lib/company-loop.js +1149 -0
- package/scripts/lib/config.js +637 -0
- package/scripts/lib/context-acquisition-plan.js +287 -0
- package/scripts/lib/context-budget-guard.js +170 -0
- package/scripts/lib/context-budget-scanner.js +257 -0
- package/scripts/lib/context-optimizer.js +715 -0
- package/scripts/lib/context-reduction-plan.js +178 -0
- package/scripts/lib/context-safety.js +88 -0
- package/scripts/lib/context-savings-engine.js +158 -0
- package/scripts/lib/cost-evidence.js +254 -0
- package/scripts/lib/cross-host-install-plan.js +308 -0
- package/scripts/lib/cross-host-install-readiness.js +237 -0
- package/scripts/lib/cross-host-value-flow.js +268 -0
- package/scripts/lib/dashboard.js +900 -0
- package/scripts/lib/design-partner-feedback.js +346 -0
- package/scripts/lib/entitlements.js +100 -0
- package/scripts/lib/execution-packet.js +559 -0
- package/scripts/lib/experimentation-events.js +547 -0
- package/scripts/lib/external-capability-compliance.js +107 -0
- package/scripts/lib/external-user-simulation.js +166 -0
- package/scripts/lib/failure-recovery-readiness.js +81 -0
- package/scripts/lib/failure-recovery.js +419 -0
- package/scripts/lib/feedback-intelligence.js +537 -0
- package/scripts/lib/feedback-signals.js +205 -0
- package/scripts/lib/file-integrity.js +68 -0
- package/scripts/lib/fsx.js +127 -0
- package/scripts/lib/full-readiness-gate.js +451 -0
- package/scripts/lib/guidance-builder.js +174 -0
- package/scripts/lib/hook-apply.js +1019 -0
- package/scripts/lib/hook-baseline.js +310 -0
- package/scripts/lib/hook-config-preview.js +275 -0
- package/scripts/lib/hook-contracts.js +290 -0
- package/scripts/lib/hook-safety-boundary-readiness.js +80 -0
- package/scripts/lib/host-capability-matrix.js +351 -0
- package/scripts/lib/host-support-context.js +254 -0
- package/scripts/lib/http-hook-action.js +538 -0
- package/scripts/lib/install-ai-readiness.js +84 -0
- package/scripts/lib/install-intake-risk.js +1037 -0
- package/scripts/lib/install-journey-intelligence.js +329 -0
- package/scripts/lib/intervention-guidance.js +57 -0
- package/scripts/lib/known-limitations.js +115 -0
- package/scripts/lib/l8-path-truth.js +146 -0
- package/scripts/lib/launch-hardening-gate.js +436 -0
- package/scripts/lib/launch-readiness.js +628 -0
- package/scripts/lib/learning-memory.js +686 -0
- package/scripts/lib/lifecycle-hooks.js +802 -0
- package/scripts/lib/local-package-smoke.js +423 -0
- package/scripts/lib/local-pricing.js +299 -0
- package/scripts/lib/mcp-enforcement.js +311 -0
- package/scripts/lib/mcp-least-privilege-policy.js +303 -0
- package/scripts/lib/mcp-tool-inventory.js +388 -0
- package/scripts/lib/mcp-tool-risk.js +0 -0
- package/scripts/lib/memory.js +335 -0
- package/scripts/lib/metrics.js +699 -0
- package/scripts/lib/micro-proof.js +133 -0
- package/scripts/lib/next-run-context.js +436 -0
- package/scripts/lib/operating-value.js +1648 -0
- package/scripts/lib/optimization-v3.js +122 -0
- package/scripts/lib/orchestration/adapters/_shared.js +49 -0
- package/scripts/lib/orchestration/adapters/aider.js +18 -0
- package/scripts/lib/orchestration/adapters/claude-code.js +35 -0
- package/scripts/lib/orchestration/adapters/codex.js +35 -0
- package/scripts/lib/orchestration/adapters/gemini-cli.js +18 -0
- package/scripts/lib/orchestration/adapters/git.js +25 -0
- package/scripts/lib/orchestration/adapters/index.js +31 -0
- package/scripts/lib/orchestration/adapters/lm-studio.js +18 -0
- package/scripts/lib/orchestration/adapters/ollama.js +18 -0
- package/scripts/lib/orchestration/adapters/opencode.js +18 -0
- package/scripts/lib/orchestration/adapters/openrouter.js +18 -0
- package/scripts/lib/orchestration/adapters/test-runner.js +25 -0
- package/scripts/lib/orchestration/cli.js +438 -0
- package/scripts/lib/orchestration/execution-manager.js +279 -0
- package/scripts/lib/orchestration/handoff.js +314 -0
- package/scripts/lib/orchestration/index.js +456 -0
- package/scripts/lib/orchestration/inventory.js +47 -0
- package/scripts/lib/orchestration/model-discovery.js +498 -0
- package/scripts/lib/orchestration/model-profiler.js +170 -0
- package/scripts/lib/orchestration/model-profiles.js +252 -0
- package/scripts/lib/orchestration/model-refresh-policy.js +72 -0
- package/scripts/lib/orchestration/proof-writer.js +349 -0
- package/scripts/lib/orchestration/provider-discovery/aider.js +49 -0
- package/scripts/lib/orchestration/provider-discovery/claude-code.js +56 -0
- package/scripts/lib/orchestration/provider-discovery/codex.js +49 -0
- package/scripts/lib/orchestration/provider-discovery/common.js +186 -0
- package/scripts/lib/orchestration/provider-discovery/gemini.js +106 -0
- package/scripts/lib/orchestration/provider-discovery/lm-studio.js +118 -0
- package/scripts/lib/orchestration/provider-discovery/models-dev.js +12 -0
- package/scripts/lib/orchestration/provider-discovery/ollama.js +100 -0
- package/scripts/lib/orchestration/provider-discovery/opencode.js +47 -0
- package/scripts/lib/orchestration/provider-discovery/openrouter.js +44 -0
- package/scripts/lib/orchestration/risk-classifier.js +130 -0
- package/scripts/lib/orchestration/routing-policy.js +486 -0
- package/scripts/lib/orchestration/settings.js +112 -0
- package/scripts/lib/orchestration/state.js +165 -0
- package/scripts/lib/orchestration/verification-manager.js +138 -0
- package/scripts/lib/output-profiles.js +146 -0
- package/scripts/lib/package-content-audit.js +368 -0
- package/scripts/lib/package-runtime.js +278 -0
- package/scripts/lib/plan-surface.js +53 -0
- package/scripts/lib/plans.js +2318 -0
- package/scripts/lib/policy-provider.js +27 -0
- package/scripts/lib/prelaunch-activation-readiness.js +409 -0
- package/scripts/lib/prelaunch-evidence-store.js +816 -0
- package/scripts/lib/prelaunch-intelligence.js +869 -0
- package/scripts/lib/pricing-experiment.js +118 -0
- package/scripts/lib/pro-moment-events.js +77 -0
- package/scripts/lib/pro-moment-state.js +227 -0
- package/scripts/lib/pro-moments.js +1216 -0
- package/scripts/lib/product-learning-events.js +629 -0
- package/scripts/lib/project-profile.js +555 -0
- package/scripts/lib/prompt-compiler.js +280 -0
- package/scripts/lib/prompt-lint.js +32 -0
- package/scripts/lib/prompt-suggestions.js +52 -0
- package/scripts/lib/proof-canonical.js +398 -0
- package/scripts/lib/proof-drilldown.js +383 -0
- package/scripts/lib/proof-events.js +342 -0
- package/scripts/lib/proof-history.js +243 -0
- package/scripts/lib/proof-metrics.js +296 -0
- package/scripts/lib/proof-outcome-evidence.js +134 -0
- package/scripts/lib/proof-receipt.js +335 -0
- package/scripts/lib/proof-record.js +461 -0
- package/scripts/lib/public-activation-distribution-gate.js +258 -0
- package/scripts/lib/public-cli.js +3891 -0
- package/scripts/lib/public-distribution-truth.js +211 -0
- package/scripts/lib/public-install-claim-checker.js +294 -0
- package/scripts/lib/publish-provenance-readiness.js +283 -0
- package/scripts/lib/readiness-delta.js +218 -0
- package/scripts/lib/readiness-evidence-closure.js +196 -0
- package/scripts/lib/reentry-memory-capture.js +241 -0
- package/scripts/lib/reentry-memory-retrieval.js +302 -0
- package/scripts/lib/reentry-memory-status.js +146 -0
- package/scripts/lib/reentry-memory-store.js +178 -0
- package/scripts/lib/reentry-state.js +66 -0
- package/scripts/lib/release-candidate-bundle.js +166 -0
- package/scripts/lib/remediation.js +81 -0
- package/scripts/lib/repo-map.js +391 -0
- package/scripts/lib/run-improvements-lifecycle.js +330 -0
- package/scripts/lib/run-improvements.js +789 -0
- package/scripts/lib/runtime-decision-policy.js +387 -0
- package/scripts/lib/safe-path-engine.js +705 -0
- package/scripts/lib/safe-run-controller.js +887 -0
- package/scripts/lib/score.js +262 -0
- package/scripts/lib/seamless-enforcement.js +329 -0
- package/scripts/lib/seamless-outcome.js +689 -0
- package/scripts/lib/seamless-reality-gate.js +5043 -0
- package/scripts/lib/security-risk-classifier.js +511 -0
- package/scripts/lib/security-scan.js +384 -0
- package/scripts/lib/session-context-optimizer.js +1211 -0
- package/scripts/lib/session-timing.js +315 -0
- package/scripts/lib/skill-hygiene.js +805 -0
- package/scripts/lib/skill-packs.js +161 -0
- package/scripts/lib/skills-operating-layer.js +580 -0
- package/scripts/lib/smart-work-routing.js +768 -0
- package/scripts/lib/source-catalog.js +700 -0
- package/scripts/lib/status-value-summary.js +32 -0
- package/scripts/lib/support-bundle.js +578 -0
- package/scripts/lib/task-continuation.js +440 -0
- package/scripts/lib/test-helpers.js +15 -0
- package/scripts/lib/tier.js +38 -0
- package/scripts/lib/token-context-quality-gate.js +370 -0
- package/scripts/lib/token-cost-capture.js +187 -0
- package/scripts/lib/token-cost-intelligence.js +358 -0
- package/scripts/lib/token-efficiency-evidence.js +213 -0
- package/scripts/lib/token-evidence.js +699 -0
- package/scripts/lib/tokenish.js +17 -0
- package/scripts/lib/tool-output-sandbox.js +304 -0
- package/scripts/lib/trust-audit.js +136 -0
- package/scripts/lib/unified-events.js +396 -0
- package/scripts/lib/upgrade-interruption-recovery.js +407 -0
- package/scripts/lib/usage-ledger.js +201 -0
- package/scripts/lib/value-ledger.js +130 -0
- package/scripts/lib/value-proof-calibration.js +531 -0
- package/scripts/lib/visual-qa.js +231 -0
- package/scripts/lib/voice-alpha.js +29 -0
- package/scripts/lib/work-aware-orchestration.js +976 -0
- package/scripts/lib/work-control-receipts.js +577 -0
- package/scripts/lib/work-ledger.js +1123 -0
- package/scripts/lib/work-panel-preview.js +352 -0
- package/scripts/lib/workflow-discipline.js +280 -0
- package/scripts/lib/workflow-signals.js +419 -0
- package/scripts/lib/workspace-map.js +281 -0
- package/scripts/lib/workspace-registry.js +1367 -0
- package/scripts/lib/workspace-resolver.js +480 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Proof Metrics - 12 named effectiveness metric definitions.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const { readAllMetrics } = require("./metrics");
|
|
8
|
+
const { composeProofRecord } = require("./proof-record");
|
|
9
|
+
|
|
10
|
+
const METRIC_DEFINITIONS = {
|
|
11
|
+
time_to_first_useful_result: {
|
|
12
|
+
id: "time_to_first_useful_result",
|
|
13
|
+
name: "Time to First Useful Result",
|
|
14
|
+
description: "Time from run start to the first truly usable output.",
|
|
15
|
+
unit: "seconds",
|
|
16
|
+
dimension: "outcome",
|
|
17
|
+
derivation: "started_at to first TASK_COMPLETED or meaningful artifact event",
|
|
18
|
+
},
|
|
19
|
+
completion_rate: {
|
|
20
|
+
id: "completion_rate",
|
|
21
|
+
name: "Completion Rate",
|
|
22
|
+
description: "Percentage of runs ending in completed task or safe stop with bounded next action.",
|
|
23
|
+
unit: "percent",
|
|
24
|
+
dimension: "outcome",
|
|
25
|
+
derivation: "sessions with canonical completion truth when present, otherwise TASK_COMPLETED or STOP_SNAPSHOT / total sessions",
|
|
26
|
+
},
|
|
27
|
+
evidence_backed_completion_rate: {
|
|
28
|
+
id: "evidence_backed_completion_rate",
|
|
29
|
+
name: "Evidence-Backed Completion Rate",
|
|
30
|
+
description:
|
|
31
|
+
"Of completions, how many have related security/trust evidence artifacts in the same session. This is a proxy for evidence presence, not a verification of task correctness.",
|
|
32
|
+
unit: "percent",
|
|
33
|
+
dimension: "outcome",
|
|
34
|
+
derivation: "canonical proof-aware completions with related evidence / total completions",
|
|
35
|
+
},
|
|
36
|
+
retries_per_task: {
|
|
37
|
+
id: "retries_per_task",
|
|
38
|
+
name: "Retries Per Task",
|
|
39
|
+
description: "Average retries, reruns, or recovery loops per task.",
|
|
40
|
+
unit: "count",
|
|
41
|
+
dimension: "efficiency",
|
|
42
|
+
derivation: "proof-record retries_count / sessions",
|
|
43
|
+
},
|
|
44
|
+
dead_end_rate: {
|
|
45
|
+
id: "dead_end_rate",
|
|
46
|
+
name: "Dead-End Rate",
|
|
47
|
+
description: "Runs ending without usable output, recovery path, or safe stop.",
|
|
48
|
+
unit: "percent",
|
|
49
|
+
dimension: "efficiency",
|
|
50
|
+
derivation: "sessions without completion, safe stop, or recovery / total sessions",
|
|
51
|
+
},
|
|
52
|
+
large_outputs_managed: {
|
|
53
|
+
id: "large_outputs_managed",
|
|
54
|
+
name: "Large Outputs Managed",
|
|
55
|
+
description:
|
|
56
|
+
"Count of large tool outputs stored out-of-band to reduce context pressure. This is a count, not a precise byte-level reduction measurement.",
|
|
57
|
+
unit: "count",
|
|
58
|
+
dimension: "efficiency",
|
|
59
|
+
derivation: "canonical material steps or CTX_LARGE_OUTPUT counts across sessions",
|
|
60
|
+
},
|
|
61
|
+
unnecessary_tool_load_avoided: {
|
|
62
|
+
id: "unnecessary_tool_load_avoided",
|
|
63
|
+
name: "Unnecessary Tool Load Avoided",
|
|
64
|
+
description: "Count of tool loads or scans intentionally skipped because unnecessary.",
|
|
65
|
+
unit: "count",
|
|
66
|
+
dimension: "efficiency",
|
|
67
|
+
derivation: "proof-record tools_skipped",
|
|
68
|
+
},
|
|
69
|
+
approval_interruptions_avoided: {
|
|
70
|
+
id: "approval_interruptions_avoided",
|
|
71
|
+
name: "Approval Interruptions Avoided",
|
|
72
|
+
description: "Count of policy-relevant interruptions avoided because execution stayed within policy.",
|
|
73
|
+
unit: "count",
|
|
74
|
+
dimension: "control",
|
|
75
|
+
derivation: "proof-record approval_requests_avoided",
|
|
76
|
+
},
|
|
77
|
+
risky_actions_blocked_or_downgraded: {
|
|
78
|
+
id: "risky_actions_blocked_or_downgraded",
|
|
79
|
+
name: "Risky Actions Blocked or Downgraded",
|
|
80
|
+
description: "Count of risky actions blocked, downgraded, or deferred.",
|
|
81
|
+
unit: "count",
|
|
82
|
+
dimension: "control",
|
|
83
|
+
derivation: "proof-record risky action counts, canonical proof first",
|
|
84
|
+
},
|
|
85
|
+
honest_fallback_rate: {
|
|
86
|
+
id: "honest_fallback_rate",
|
|
87
|
+
name: "Honest Fallback Rate",
|
|
88
|
+
description: "Rate at which Wuz stopped or limited claims honestly instead of fabricating certainty.",
|
|
89
|
+
unit: "percent",
|
|
90
|
+
dimension: "trust",
|
|
91
|
+
derivation: "safe-stop sessions / total sessions",
|
|
92
|
+
},
|
|
93
|
+
recovery_success_rate: {
|
|
94
|
+
id: "recovery_success_rate",
|
|
95
|
+
name: "Recovery Success Rate",
|
|
96
|
+
description: "Of runs that encountered blocks or failures, how many reached successful completion.",
|
|
97
|
+
unit: "percent",
|
|
98
|
+
dimension: "trust",
|
|
99
|
+
derivation: "sessions with retries that still reach completion / retry sessions",
|
|
100
|
+
},
|
|
101
|
+
evidence_coverage: {
|
|
102
|
+
id: "evidence_coverage",
|
|
103
|
+
name: "Evidence Coverage",
|
|
104
|
+
description: "Percentage of material sessions with evidence or traceable support.",
|
|
105
|
+
unit: "percent",
|
|
106
|
+
dimension: "trust",
|
|
107
|
+
derivation: "proof-record sessions with related evidence / total sessions",
|
|
108
|
+
},
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
function computeProofMetrics(cwd, options = {}) {
|
|
112
|
+
const days = Number(options.days || 7);
|
|
113
|
+
const cutoff = Date.now() - days * 24 * 60 * 60 * 1000;
|
|
114
|
+
const allMetrics = readAllMetrics(cwd);
|
|
115
|
+
const recent = allMetrics.filter((metric) => {
|
|
116
|
+
const ts = Date.parse(metric.ts || 0);
|
|
117
|
+
return Number.isFinite(ts) && ts >= cutoff;
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
const bySession = new Map();
|
|
121
|
+
recent.forEach((metric) => {
|
|
122
|
+
const id = metric.sessionId || "unknown";
|
|
123
|
+
if (!bySession.has(id)) bySession.set(id, []);
|
|
124
|
+
bySession.get(id).push(metric);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
const totalSessions = bySession.size;
|
|
128
|
+
if (totalSessions === 0) return emptyMetrics();
|
|
129
|
+
|
|
130
|
+
let completedSessions = 0;
|
|
131
|
+
let safeStopSessions = 0;
|
|
132
|
+
let evidenceBackedSessions = 0;
|
|
133
|
+
let deadEndSessions = 0;
|
|
134
|
+
let blockedSessions = 0;
|
|
135
|
+
let blockedThenCompletedSessions = 0;
|
|
136
|
+
let evidenceSessions = 0;
|
|
137
|
+
let totalRetries = 0;
|
|
138
|
+
let totalLargeOutputBytes = 0;
|
|
139
|
+
let totalLargeOutputEvents = 0;
|
|
140
|
+
let totalScanSkips = 0;
|
|
141
|
+
let totalPermFatigueConsolidated = 0;
|
|
142
|
+
let totalRiskyBlocked = 0;
|
|
143
|
+
const ttfrValues = [];
|
|
144
|
+
|
|
145
|
+
for (const [sessionId, sessionMetrics] of bySession) {
|
|
146
|
+
const record = composeProofRecord(sessionId, sessionMetrics, []);
|
|
147
|
+
if (record.completion_status === "completed") completedSessions++;
|
|
148
|
+
if (record.completion_status === "safe_stop") safeStopSessions++;
|
|
149
|
+
if (record.outcome?.evidence_backed_completion) evidenceBackedSessions++;
|
|
150
|
+
if (
|
|
151
|
+
record.completion_status === "ended_without_completion" &&
|
|
152
|
+
!record.outcome?.next_bounded_action
|
|
153
|
+
) {
|
|
154
|
+
deadEndSessions++;
|
|
155
|
+
}
|
|
156
|
+
if ((record.efficiency?.retries_count || 0) > 0) {
|
|
157
|
+
blockedSessions++;
|
|
158
|
+
if (record.completion_status === "completed") blockedThenCompletedSessions++;
|
|
159
|
+
}
|
|
160
|
+
if (record.trust?.has_related_evidence) evidenceSessions++;
|
|
161
|
+
|
|
162
|
+
totalRetries += record.efficiency?.retries_count || 0;
|
|
163
|
+
totalLargeOutputBytes += record.efficiency?.large_output_bytes_managed || 0;
|
|
164
|
+
totalLargeOutputEvents += record.efficiency?.large_output_events || 0;
|
|
165
|
+
totalScanSkips += record.efficiency?.tools_skipped || 0;
|
|
166
|
+
totalPermFatigueConsolidated += record.control?.approval_requests_avoided || 0;
|
|
167
|
+
totalRiskyBlocked +=
|
|
168
|
+
(record.control?.risky_actions_blocked || 0) +
|
|
169
|
+
(record.control?.risky_actions_downgraded || 0);
|
|
170
|
+
|
|
171
|
+
const timestamps = sessionMetrics
|
|
172
|
+
.map((metric) => Date.parse(metric.ts || 0))
|
|
173
|
+
.filter((value) => Number.isFinite(value));
|
|
174
|
+
const startTs = timestamps.length ? Math.min(...timestamps) : 0;
|
|
175
|
+
const completionMetric = sessionMetrics.find((metric) =>
|
|
176
|
+
(metric.reasonCodes || []).includes("TASK_COMPLETED")
|
|
177
|
+
);
|
|
178
|
+
if (completionMetric && startTs) {
|
|
179
|
+
const endTs = Date.parse(completionMetric.ts || 0);
|
|
180
|
+
if (Number.isFinite(endTs) && endTs > startTs) {
|
|
181
|
+
ttfrValues.push(Math.round((endTs - startTs) / 1000));
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const completedOrStopped = completedSessions + safeStopSessions;
|
|
187
|
+
const completionRate = totalSessions > 0 ? Math.round((completedOrStopped / totalSessions) * 100) : 0;
|
|
188
|
+
const evidenceBackedRate =
|
|
189
|
+
completedSessions > 0 ? Math.round((evidenceBackedSessions / completedSessions) * 100) : 0;
|
|
190
|
+
const deadEndRate = totalSessions > 0 ? Math.round((deadEndSessions / totalSessions) * 100) : 0;
|
|
191
|
+
const retriesPerTask =
|
|
192
|
+
totalSessions > 0 ? Math.round((totalRetries / totalSessions) * 100) / 100 : 0;
|
|
193
|
+
const honestFallbackRate =
|
|
194
|
+
totalSessions > 0 ? Math.round((safeStopSessions / totalSessions) * 100) : 0;
|
|
195
|
+
const recoverySuccessRate =
|
|
196
|
+
blockedSessions > 0 ? Math.round((blockedThenCompletedSessions / blockedSessions) * 100) : 0;
|
|
197
|
+
const evidenceCoverage =
|
|
198
|
+
totalSessions > 0 ? Math.round((evidenceSessions / totalSessions) * 100) : 0;
|
|
199
|
+
const medianTtfr = ttfrValues.length > 0 ? median(ttfrValues) : null;
|
|
200
|
+
|
|
201
|
+
return {
|
|
202
|
+
period_days: days,
|
|
203
|
+
total_sessions: totalSessions,
|
|
204
|
+
time_to_first_useful_result: {
|
|
205
|
+
...METRIC_DEFINITIONS.time_to_first_useful_result,
|
|
206
|
+
value: medianTtfr,
|
|
207
|
+
sample_size: ttfrValues.length,
|
|
208
|
+
confidence: ttfrValues.length >= 5 ? "medium" : "low",
|
|
209
|
+
},
|
|
210
|
+
completion_rate: {
|
|
211
|
+
...METRIC_DEFINITIONS.completion_rate,
|
|
212
|
+
value: completionRate,
|
|
213
|
+
numerator: completedOrStopped,
|
|
214
|
+
denominator: totalSessions,
|
|
215
|
+
},
|
|
216
|
+
evidence_backed_completion_rate: {
|
|
217
|
+
...METRIC_DEFINITIONS.evidence_backed_completion_rate,
|
|
218
|
+
value: evidenceBackedRate,
|
|
219
|
+
numerator: evidenceBackedSessions,
|
|
220
|
+
denominator: completedSessions,
|
|
221
|
+
},
|
|
222
|
+
retries_per_task: {
|
|
223
|
+
...METRIC_DEFINITIONS.retries_per_task,
|
|
224
|
+
value: retriesPerTask,
|
|
225
|
+
total_retries: totalRetries,
|
|
226
|
+
sessions: totalSessions,
|
|
227
|
+
},
|
|
228
|
+
dead_end_rate: {
|
|
229
|
+
...METRIC_DEFINITIONS.dead_end_rate,
|
|
230
|
+
value: deadEndRate,
|
|
231
|
+
numerator: deadEndSessions,
|
|
232
|
+
denominator: totalSessions,
|
|
233
|
+
},
|
|
234
|
+
large_outputs_managed: {
|
|
235
|
+
...METRIC_DEFINITIONS.large_outputs_managed,
|
|
236
|
+
value: totalLargeOutputEvents,
|
|
237
|
+
total_bytes: totalLargeOutputBytes,
|
|
238
|
+
},
|
|
239
|
+
unnecessary_tool_load_avoided: {
|
|
240
|
+
...METRIC_DEFINITIONS.unnecessary_tool_load_avoided,
|
|
241
|
+
value: totalScanSkips,
|
|
242
|
+
},
|
|
243
|
+
approval_interruptions_avoided: {
|
|
244
|
+
...METRIC_DEFINITIONS.approval_interruptions_avoided,
|
|
245
|
+
value: totalPermFatigueConsolidated,
|
|
246
|
+
},
|
|
247
|
+
risky_actions_blocked_or_downgraded: {
|
|
248
|
+
...METRIC_DEFINITIONS.risky_actions_blocked_or_downgraded,
|
|
249
|
+
value: totalRiskyBlocked,
|
|
250
|
+
},
|
|
251
|
+
honest_fallback_rate: {
|
|
252
|
+
...METRIC_DEFINITIONS.honest_fallback_rate,
|
|
253
|
+
value: honestFallbackRate,
|
|
254
|
+
numerator: safeStopSessions,
|
|
255
|
+
denominator: totalSessions,
|
|
256
|
+
},
|
|
257
|
+
recovery_success_rate: {
|
|
258
|
+
...METRIC_DEFINITIONS.recovery_success_rate,
|
|
259
|
+
value: recoverySuccessRate,
|
|
260
|
+
numerator: blockedThenCompletedSessions,
|
|
261
|
+
denominator: blockedSessions,
|
|
262
|
+
confidence: blockedSessions >= 3 ? "medium" : "low",
|
|
263
|
+
},
|
|
264
|
+
evidence_coverage: {
|
|
265
|
+
...METRIC_DEFINITIONS.evidence_coverage,
|
|
266
|
+
value: evidenceCoverage,
|
|
267
|
+
numerator: evidenceSessions,
|
|
268
|
+
denominator: totalSessions,
|
|
269
|
+
},
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
function emptyMetrics() {
|
|
274
|
+
const result = { period_days: 0, total_sessions: 0 };
|
|
275
|
+
for (const [id, def] of Object.entries(METRIC_DEFINITIONS)) {
|
|
276
|
+
result[id] = {
|
|
277
|
+
...def,
|
|
278
|
+
value: def.unit === "percent" ? 0 : def.unit === "count" ? 0 : null,
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
return result;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
function median(arr) {
|
|
285
|
+
if (!arr.length) return 0;
|
|
286
|
+
const sorted = arr.slice().sort((a, b) => a - b);
|
|
287
|
+
const mid = Math.floor(sorted.length / 2);
|
|
288
|
+
return sorted.length % 2
|
|
289
|
+
? sorted[mid]
|
|
290
|
+
: Math.round((sorted[mid - 1] + sorted[mid]) / 2);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
module.exports = {
|
|
294
|
+
METRIC_DEFINITIONS,
|
|
295
|
+
computeProofMetrics,
|
|
296
|
+
};
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
const fs = require("fs");
|
|
4
|
+
const path = require("path");
|
|
5
|
+
const { nowIso } = require("./fsx");
|
|
6
|
+
|
|
7
|
+
const CONTRACT = "avorelo.proofOutcomeEvidence.v1";
|
|
8
|
+
const SCHEMA_VERSION = 1;
|
|
9
|
+
const PROOF_OUTCOME_DIR_REL = ".claude/cco/orchestration/seamless-outcome";
|
|
10
|
+
const ARTIFACT_REL = `${PROOF_OUTCOME_DIR_REL}/latest-proof-outcome-evidence.json`;
|
|
11
|
+
const VALUE_SUMMARY_REL = `${PROOF_OUTCOME_DIR_REL}/latest-value-summary.json`;
|
|
12
|
+
const REALITY_GATE_REL = `${PROOF_OUTCOME_DIR_REL}/latest-reality-gate.json`;
|
|
13
|
+
|
|
14
|
+
function safeReadJson(absPath) {
|
|
15
|
+
try {
|
|
16
|
+
if (!fs.existsSync(absPath)) return null;
|
|
17
|
+
return JSON.parse(fs.readFileSync(absPath, "utf8").replace(/^\uFEFF/, ""));
|
|
18
|
+
} catch {
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function buildProofOutcomeEvidence(cwd) {
|
|
24
|
+
const valueSummary = safeReadJson(path.join(cwd, VALUE_SUMMARY_REL));
|
|
25
|
+
const realityGate = safeReadJson(path.join(cwd, REALITY_GATE_REL));
|
|
26
|
+
|
|
27
|
+
const latestProofAvailable = !!valueSummary;
|
|
28
|
+
const simulatedProofDetected = valueSummary && valueSummary.simulated === true;
|
|
29
|
+
const realTaskProofCount = valueSummary && simulatedProofDetected !== true ? 1 : 0;
|
|
30
|
+
const evidenceBacked = valueSummary ? valueSummary.evidenceBacked !== false : false;
|
|
31
|
+
|
|
32
|
+
let status = "warn";
|
|
33
|
+
let proofQuality = "missing";
|
|
34
|
+
const missingEvidence = [];
|
|
35
|
+
const safeNextActions = [];
|
|
36
|
+
|
|
37
|
+
if (!valueSummary) {
|
|
38
|
+
missingEvidence.push("No proof/value summary artifact is available from a completed real task.");
|
|
39
|
+
safeNextActions.push("Run a bounded real task in this repo.");
|
|
40
|
+
safeNextActions.push("Run: node bin/avorelo proof");
|
|
41
|
+
safeNextActions.push("Run: node bin/avorelo full-readiness --json");
|
|
42
|
+
} else {
|
|
43
|
+
proofQuality = evidenceBacked ? "evidence_backed" : "present_but_unverified";
|
|
44
|
+
if (simulatedProofDetected) {
|
|
45
|
+
proofQuality = "simulated_fixture_only";
|
|
46
|
+
missingEvidence.push("Latest proof artifact is marked simulated and does not count as real task evidence.");
|
|
47
|
+
}
|
|
48
|
+
if (!evidenceBacked) {
|
|
49
|
+
missingEvidence.push("Latest proof/value summary is present but not marked evidence-backed.");
|
|
50
|
+
}
|
|
51
|
+
if (!realityGate) {
|
|
52
|
+
missingEvidence.push("No reality gate artifact is available alongside the latest proof summary.");
|
|
53
|
+
safeNextActions.push("Run: node bin/avorelo outcome --gate --json");
|
|
54
|
+
}
|
|
55
|
+
if (evidenceBacked && realityGate && simulatedProofDetected !== true) {
|
|
56
|
+
status = "pass";
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (safeNextActions.length === 0) {
|
|
61
|
+
safeNextActions.push("Run: node bin/avorelo full-readiness --json");
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
contract: CONTRACT,
|
|
66
|
+
schemaVersion: SCHEMA_VERSION,
|
|
67
|
+
createdAt: nowIso(),
|
|
68
|
+
status,
|
|
69
|
+
latestProofAvailable,
|
|
70
|
+
realTaskProofCount,
|
|
71
|
+
simulatedProofDetected: simulatedProofDetected === true,
|
|
72
|
+
proofQuality,
|
|
73
|
+
evidenceRefs: {
|
|
74
|
+
valueSummary: latestProofAvailable ? VALUE_SUMMARY_REL : null,
|
|
75
|
+
realityGate: realityGate ? REALITY_GATE_REL : null,
|
|
76
|
+
},
|
|
77
|
+
missingEvidence,
|
|
78
|
+
safeNextActions,
|
|
79
|
+
redacted: true,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function writeProofOutcomeEvidence(cwd, evidence) {
|
|
84
|
+
const dirAbs = path.join(cwd, PROOF_OUTCOME_DIR_REL);
|
|
85
|
+
fs.mkdirSync(dirAbs, { recursive: true });
|
|
86
|
+
fs.writeFileSync(path.join(cwd, ARTIFACT_REL), JSON.stringify(evidence, null, 2), "utf8");
|
|
87
|
+
return path.join(cwd, ARTIFACT_REL);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function buildProofOutcomeEvidenceSurface(cwd) {
|
|
91
|
+
const existing = safeReadJson(path.join(cwd, ARTIFACT_REL));
|
|
92
|
+
const evidence = existing || buildProofOutcomeEvidence(cwd);
|
|
93
|
+
if (!existing) writeProofOutcomeEvidence(cwd, evidence);
|
|
94
|
+
return {
|
|
95
|
+
status: evidence.status,
|
|
96
|
+
latestProofAvailable: evidence.latestProofAvailable,
|
|
97
|
+
realTaskProofCount: evidence.realTaskProofCount,
|
|
98
|
+
proofQuality: evidence.proofQuality,
|
|
99
|
+
artifactPath: ARTIFACT_REL,
|
|
100
|
+
redacted: true,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function formatProofOutcomeEvidenceText(evidence) {
|
|
105
|
+
const lines = [
|
|
106
|
+
`Proof outcome evidence: ${evidence.status}`,
|
|
107
|
+
`Latest proof available: ${evidence.latestProofAvailable === true ? "yes" : "no"}`,
|
|
108
|
+
`Real task proof count: ${evidence.realTaskProofCount || 0}`,
|
|
109
|
+
`Proof quality: ${evidence.proofQuality || "missing"}`,
|
|
110
|
+
];
|
|
111
|
+
if (Array.isArray(evidence.missingEvidence) && evidence.missingEvidence.length) {
|
|
112
|
+
lines.push("");
|
|
113
|
+
lines.push("Missing evidence:");
|
|
114
|
+
evidence.missingEvidence.slice(0, 5).forEach((item) => lines.push(` - ${item}`));
|
|
115
|
+
}
|
|
116
|
+
if (Array.isArray(evidence.safeNextActions) && evidence.safeNextActions.length) {
|
|
117
|
+
lines.push("");
|
|
118
|
+
lines.push("Next:");
|
|
119
|
+
evidence.safeNextActions.slice(0, 3).forEach((item) => lines.push(` - ${item}`));
|
|
120
|
+
}
|
|
121
|
+
return lines.join("\n");
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
module.exports = {
|
|
125
|
+
CONTRACT,
|
|
126
|
+
SCHEMA_VERSION,
|
|
127
|
+
ARTIFACT_REL,
|
|
128
|
+
VALUE_SUMMARY_REL,
|
|
129
|
+
REALITY_GATE_REL,
|
|
130
|
+
buildProofOutcomeEvidence,
|
|
131
|
+
writeProofOutcomeEvidence,
|
|
132
|
+
buildProofOutcomeEvidenceSurface,
|
|
133
|
+
formatProofOutcomeEvidenceText,
|
|
134
|
+
};
|