@alan512/experienceengine 0.1.3 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +213 -130
- package/README.zh-CN.md +250 -119
- package/dist/adapters/claude-code/session-store.d.ts +1 -0
- package/dist/adapters/claude-code/session-store.js +24 -1
- package/dist/adapters/claude-code/session-store.js.map +1 -1
- package/dist/adapters/codex/action-registry.d.ts +84 -0
- package/dist/adapters/codex/action-registry.js +277 -0
- package/dist/adapters/codex/action-registry.js.map +1 -0
- package/dist/adapters/codex/broker-tools.d.ts +114 -0
- package/dist/adapters/codex/broker-tools.js +130 -0
- package/dist/adapters/codex/broker-tools.js.map +1 -0
- package/dist/adapters/codex/mcp-server.d.ts +21 -0
- package/dist/adapters/codex/mcp-server.js +103 -423
- package/dist/adapters/codex/mcp-server.js.map +1 -1
- package/dist/analyzer/candidate-signals.d.ts +3 -1
- package/dist/analyzer/candidate-signals.js +159 -0
- package/dist/analyzer/candidate-signals.js.map +1 -1
- package/dist/analyzer/llm-learning-gate.d.ts +12 -1
- package/dist/analyzer/llm-learning-gate.js +633 -16
- package/dist/analyzer/llm-learning-gate.js.map +1 -1
- package/dist/cli/commands/claude-hook.js +11 -4
- package/dist/cli/commands/claude-hook.js.map +1 -1
- package/dist/cli/commands/codex.d.ts +60 -0
- package/dist/cli/commands/codex.js +188 -0
- package/dist/cli/commands/codex.js.map +1 -0
- package/dist/cli/commands/doctor.js +35 -2
- package/dist/cli/commands/doctor.js.map +1 -1
- package/dist/cli/commands/evaluate.d.ts +9 -3
- package/dist/cli/commands/evaluate.js +31 -5
- package/dist/cli/commands/evaluate.js.map +1 -1
- package/dist/cli/commands/init.js +21 -8
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/inspect.js +73 -4
- package/dist/cli/commands/inspect.js.map +1 -1
- package/dist/cli/commands/repair.js +3 -3
- package/dist/cli/commands/repair.js.map +1 -1
- package/dist/cli/commands/status.js +38 -0
- package/dist/cli/commands/status.js.map +1 -1
- package/dist/cli/dispatch.js +16 -4
- package/dist/cli/dispatch.js.map +1 -1
- package/dist/config/config-schema.d.ts +177 -0
- package/dist/config/config-schema.js +142 -1
- package/dist/config/config-schema.js.map +1 -1
- package/dist/config/default-config.js +19 -1
- package/dist/config/default-config.js.map +1 -1
- package/dist/config/load-config.js +72 -1
- package/dist/config/load-config.js.map +1 -1
- package/dist/config/settings-store.d.ts +19 -0
- package/dist/config/settings-store.js +11 -0
- package/dist/config/settings-store.js.map +1 -1
- package/dist/controller/candidate-retriever.d.ts +16 -1
- package/dist/controller/candidate-retriever.js +199 -137
- package/dist/controller/candidate-retriever.js.map +1 -1
- package/dist/controller/injection-scorecard.d.ts +2 -14
- package/dist/controller/injection-scorecard.js +29 -0
- package/dist/controller/injection-scorecard.js.map +1 -1
- package/dist/controller/intervention-controller.d.ts +3 -15
- package/dist/controller/intervention-controller.js +219 -57
- package/dist/controller/intervention-controller.js.map +1 -1
- package/dist/controller/policy-enricher.d.ts +10 -0
- package/dist/controller/policy-enricher.js +186 -0
- package/dist/controller/policy-enricher.js.map +1 -0
- package/dist/controller/retrieval-context.d.ts +3 -0
- package/dist/controller/retrieval-context.js +37 -0
- package/dist/controller/retrieval-context.js.map +1 -0
- package/dist/controller/second-opinion-gate.d.ts +41 -0
- package/dist/controller/second-opinion-gate.js +225 -0
- package/dist/controller/second-opinion-gate.js.map +1 -0
- package/dist/controller/trigger-evaluator.d.ts +6 -1
- package/dist/controller/trigger-evaluator.js +31 -1
- package/dist/controller/trigger-evaluator.js.map +1 -1
- package/dist/distillation/prompt-contract.d.ts +1 -1
- package/dist/distillation/prompt-contract.js +3 -1
- package/dist/distillation/prompt-contract.js.map +1 -1
- package/dist/distillation/providers/gemini.js +5 -1
- package/dist/distillation/providers/gemini.js.map +1 -1
- package/dist/distillation/queue-worker.js +22 -3
- package/dist/distillation/queue-worker.js.map +1 -1
- package/dist/evaluation/codex-lifecycle-validation.d.ts +60 -0
- package/dist/evaluation/codex-lifecycle-validation.js +233 -0
- package/dist/evaluation/codex-lifecycle-validation.js.map +1 -0
- package/dist/evaluation/hybrid-phase1-rollout-summary.d.ts +63 -0
- package/dist/evaluation/hybrid-phase1-rollout-summary.js +108 -0
- package/dist/evaluation/hybrid-phase1-rollout-summary.js.map +1 -0
- package/dist/evaluation/hybrid-phase3-gate-metrics.d.ts +26 -0
- package/dist/evaluation/hybrid-phase3-gate-metrics.js +23 -0
- package/dist/evaluation/hybrid-phase3-gate-metrics.js.map +1 -0
- package/dist/evaluation/openclaw-baseline.d.ts +8 -0
- package/dist/evaluation/openclaw-baseline.js +27 -0
- package/dist/evaluation/openclaw-baseline.js.map +1 -1
- package/dist/experience-management/governance-observability.d.ts +13 -0
- package/dist/experience-management/governance-observability.js +37 -0
- package/dist/experience-management/governance-observability.js.map +1 -0
- package/dist/experience-management/node-lifecycle-governance.d.ts +8 -0
- package/dist/experience-management/node-lifecycle-governance.js +80 -0
- package/dist/experience-management/node-lifecycle-governance.js.map +1 -0
- package/dist/experience-management/task-management-signals.d.ts +29 -0
- package/dist/experience-management/task-management-signals.js +148 -0
- package/dist/experience-management/task-management-signals.js.map +1 -0
- package/dist/feedback/feedback-manager.d.ts +4 -1
- package/dist/feedback/feedback-manager.js +11 -22
- package/dist/feedback/feedback-manager.js.map +1 -1
- package/dist/feedback/state-transition.d.ts +6 -1
- package/dist/feedback/state-transition.js +6 -3
- package/dist/feedback/state-transition.js.map +1 -1
- package/dist/hybrid/capsule-builder.d.ts +23 -0
- package/dist/hybrid/capsule-builder.js +114 -0
- package/dist/hybrid/capsule-builder.js.map +1 -0
- package/dist/hybrid/explain-provider-client.d.ts +19 -0
- package/dist/hybrid/explain-provider-client.js +34 -0
- package/dist/hybrid/explain-provider-client.js.map +1 -0
- package/dist/hybrid/postmortem-provider-client.d.ts +19 -0
- package/dist/hybrid/postmortem-provider-client.js +34 -0
- package/dist/hybrid/postmortem-provider-client.js.map +1 -0
- package/dist/hybrid/rollout.d.ts +9 -0
- package/dist/hybrid/rollout.js +49 -0
- package/dist/hybrid/rollout.js.map +1 -0
- package/dist/hybrid/router.d.ts +4 -0
- package/dist/hybrid/router.js +62 -0
- package/dist/hybrid/router.js.map +1 -0
- package/dist/hybrid/types.d.ts +140 -0
- package/dist/hybrid/types.js +2 -0
- package/dist/hybrid/types.js.map +1 -0
- package/dist/hybrid/validators.d.ts +5 -0
- package/dist/hybrid/validators.js +94 -0
- package/dist/hybrid/validators.js.map +1 -0
- package/dist/hybrid/worker-client.d.ts +61 -0
- package/dist/hybrid/worker-client.js +196 -0
- package/dist/hybrid/worker-client.js.map +1 -0
- package/dist/hybrid/workers/explain-decision-llm.d.ts +8 -0
- package/dist/hybrid/workers/explain-decision-llm.js +152 -0
- package/dist/hybrid/workers/explain-decision-llm.js.map +1 -0
- package/dist/hybrid/workers/explain-decision.d.ts +2 -0
- package/dist/hybrid/workers/explain-decision.js +40 -0
- package/dist/hybrid/workers/explain-decision.js.map +1 -0
- package/dist/hybrid/workers/postmortem-review-llm.d.ts +8 -0
- package/dist/hybrid/workers/postmortem-review-llm.js +398 -0
- package/dist/hybrid/workers/postmortem-review-llm.js.map +1 -0
- package/dist/hybrid/workers/postmortem-review.d.ts +2 -0
- package/dist/hybrid/workers/postmortem-review.js +66 -0
- package/dist/hybrid/workers/postmortem-review.js.map +1 -0
- package/dist/install/claude-code-doctor.d.ts +1 -0
- package/dist/install/claude-code-doctor.js +20 -4
- package/dist/install/claude-code-doctor.js.map +1 -1
- package/dist/install/claude-code-installer.js +50 -1
- package/dist/install/claude-code-installer.js.map +1 -1
- package/dist/install/codex-cli.d.ts +15 -0
- package/dist/install/codex-cli.js +55 -3
- package/dist/install/codex-cli.js.map +1 -1
- package/dist/install/codex-installer.d.ts +7 -0
- package/dist/install/codex-installer.js +22 -0
- package/dist/install/codex-installer.js.map +1 -1
- package/dist/install/openclaw-cli.d.ts +11 -0
- package/dist/install/openclaw-cli.js.map +1 -1
- package/dist/install/openclaw-installer.d.ts +12 -7
- package/dist/install/openclaw-installer.js +197 -46
- package/dist/install/openclaw-installer.js.map +1 -1
- package/dist/interaction/service.d.ts +15 -0
- package/dist/interaction/service.js +189 -31
- package/dist/interaction/service.js.map +1 -1
- package/dist/plugin/hooks/before-prompt-build.d.ts +1 -0
- package/dist/plugin/hooks/before-prompt-build.js +4 -1
- package/dist/plugin/hooks/before-prompt-build.js.map +1 -1
- package/dist/plugin/openclaw-install-state.d.ts +39 -0
- package/dist/plugin/openclaw-install-state.js +24 -0
- package/dist/plugin/openclaw-install-state.js.map +1 -0
- package/dist/plugin/openclaw-plugin.d.ts +125 -0
- package/dist/plugin/openclaw-plugin.js +18 -7
- package/dist/plugin/openclaw-plugin.js.map +1 -1
- package/dist/plugin/openclaw-routine-interaction.d.ts +2 -1
- package/dist/plugin/openclaw-routine-interaction.js +12 -7
- package/dist/plugin/openclaw-routine-interaction.js.map +1 -1
- package/dist/plugin/openclaw-runtime-defaults.d.ts +16 -0
- package/dist/plugin/openclaw-runtime-defaults.js +16 -0
- package/dist/plugin/openclaw-runtime-defaults.js.map +1 -0
- package/dist/runtime/service.d.ts +34 -5
- package/dist/runtime/service.js +474 -49
- package/dist/runtime/service.js.map +1 -1
- package/dist/store/sqlite/db.js +28 -0
- package/dist/store/sqlite/db.js.map +1 -1
- package/dist/store/sqlite/repositories/hybrid-invocation-trace-repo.d.ts +11 -0
- package/dist/store/sqlite/repositories/hybrid-invocation-trace-repo.js +76 -0
- package/dist/store/sqlite/repositories/hybrid-invocation-trace-repo.js.map +1 -0
- package/dist/store/sqlite/repositories/hybrid-review-artifact-repo.d.ts +11 -0
- package/dist/store/sqlite/repositories/hybrid-review-artifact-repo.js +73 -0
- package/dist/store/sqlite/repositories/hybrid-review-artifact-repo.js.map +1 -0
- package/dist/store/sqlite/repositories/input-record-repo.d.ts +1 -0
- package/dist/store/sqlite/repositories/input-record-repo.js +13 -0
- package/dist/store/sqlite/repositories/input-record-repo.js.map +1 -1
- package/dist/store/sqlite/repositories/node-repo.d.ts +4 -0
- package/dist/store/sqlite/repositories/node-repo.js +54 -6
- package/dist/store/sqlite/repositories/node-repo.js.map +1 -1
- package/dist/store/sqlite/schema.sql +40 -0
- package/dist/store/vector/embeddings.js +26 -8
- package/dist/store/vector/embeddings.js.map +1 -1
- package/dist/types/domain.d.ts +151 -2
- package/dist/types/plugin.d.ts +2 -1
- package/docs/releases/v0.1.3.md +3 -2
- package/docs/releases/v0.2.0.md +85 -0
- package/docs/releases/v0.2.1.md +21 -0
- package/docs/user-guide.md +44 -13
- package/openclaw.plugin.json +81 -1
- package/package.json +11 -2
- package/plugins/claude-code-experienceengine/.claude-plugin/plugin.json +1 -1
- package/plugins/claude-code-experienceengine/scripts/install-deps.sh +1 -1
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
export type CodexLifecycleValidationOptions = {
|
|
2
|
+
env?: NodeJS.ProcessEnv;
|
|
3
|
+
homeDir?: string;
|
|
4
|
+
outputDir?: string;
|
|
5
|
+
repoRoot?: string;
|
|
6
|
+
now?: () => string;
|
|
7
|
+
};
|
|
8
|
+
export type CodexLifecycleValidationReport = {
|
|
9
|
+
generatedAt: string;
|
|
10
|
+
repoRoot: string;
|
|
11
|
+
prompt: string;
|
|
12
|
+
sessionId: string;
|
|
13
|
+
outputDir: string;
|
|
14
|
+
runtimeHome: string;
|
|
15
|
+
sqlitePath: string;
|
|
16
|
+
captureDir: string;
|
|
17
|
+
seededNodeId: string;
|
|
18
|
+
lookup: {
|
|
19
|
+
mode: "skip" | "inject_conservative" | "inject";
|
|
20
|
+
injectedNodeIds: string[];
|
|
21
|
+
deliveryMode?: "live" | "shadow" | "holdout";
|
|
22
|
+
delivered?: boolean;
|
|
23
|
+
notice?: string;
|
|
24
|
+
};
|
|
25
|
+
toolResult: {
|
|
26
|
+
status: string;
|
|
27
|
+
eventStatus: string;
|
|
28
|
+
toolName: string;
|
|
29
|
+
};
|
|
30
|
+
finalize: {
|
|
31
|
+
status: string;
|
|
32
|
+
outcomeSignal: string;
|
|
33
|
+
recordedToolEvents: number;
|
|
34
|
+
};
|
|
35
|
+
persistence: {
|
|
36
|
+
taskRunCount: number;
|
|
37
|
+
injectionEventCount: number;
|
|
38
|
+
reviewEventCount: number;
|
|
39
|
+
hybridArtifactCount: number;
|
|
40
|
+
hybridTraceCount: number;
|
|
41
|
+
reviewEventTypes: string[];
|
|
42
|
+
};
|
|
43
|
+
node: {
|
|
44
|
+
id: string;
|
|
45
|
+
state: string;
|
|
46
|
+
deliveryState: string;
|
|
47
|
+
usageCount: number;
|
|
48
|
+
helpedCount: number;
|
|
49
|
+
harmedCount: number;
|
|
50
|
+
lastFeedbackVerdict?: string;
|
|
51
|
+
};
|
|
52
|
+
};
|
|
53
|
+
export type CodexLifecycleValidationRunResult = {
|
|
54
|
+
outputDir: string;
|
|
55
|
+
jsonPath: string;
|
|
56
|
+
markdownPath: string;
|
|
57
|
+
report: CodexLifecycleValidationReport;
|
|
58
|
+
};
|
|
59
|
+
export declare const renderCodexLifecycleValidationMarkdown: (report: CodexLifecycleValidationReport) => string;
|
|
60
|
+
export declare const runCodexLifecycleValidation: (options?: CodexLifecycleValidationOptions) => Promise<CodexLifecycleValidationRunResult>;
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { join, resolve } from "node:path";
|
|
3
|
+
import { loadConfig } from "../config/load-config.js";
|
|
4
|
+
import { createCodexBehaviorLoop } from "../adapters/codex/mcp-server.js";
|
|
5
|
+
import { resolveScope } from "../input/scope-resolver.js";
|
|
6
|
+
import { openDatabase, bootstrapDatabase } from "../store/sqlite/db.js";
|
|
7
|
+
import { HybridInvocationTraceRepository } from "../store/sqlite/repositories/hybrid-invocation-trace-repo.js";
|
|
8
|
+
import { HybridReviewArtifactRepository } from "../store/sqlite/repositories/hybrid-review-artifact-repo.js";
|
|
9
|
+
import { InjectionRepository } from "../store/sqlite/repositories/injection-repo.js";
|
|
10
|
+
import { NodeRepository } from "../store/sqlite/repositories/node-repo.js";
|
|
11
|
+
import { ReviewEventRepository } from "../store/sqlite/repositories/review-event-repo.js";
|
|
12
|
+
import { TaskRunRepository } from "../store/sqlite/repositories/task-run-repo.js";
|
|
13
|
+
import { buildLegacyEmbedding } from "../store/vector/embeddings.js";
|
|
14
|
+
const DEFAULT_SESSION_ID = "codex-lifecycle-validation";
|
|
15
|
+
const DEFAULT_NODE_ID = "node_codex_lifecycle_validation";
|
|
16
|
+
const DEFAULT_PROMPT = "Fix the failing auth test";
|
|
17
|
+
const sanitizeStamp = (value) => value.replace(/[:.]/g, "-");
|
|
18
|
+
const mkdirIfMissing = (path) => {
|
|
19
|
+
if (!existsSync(path)) {
|
|
20
|
+
mkdirSync(path, { recursive: true });
|
|
21
|
+
}
|
|
22
|
+
};
|
|
23
|
+
const defaultOutputDir = (timestamp) => resolve("artifacts", "evaluations", "codex", sanitizeStamp(timestamp));
|
|
24
|
+
const seedLifecycleNode = (repo, repoRoot, timestamp, nodeId, prompt) => {
|
|
25
|
+
const scope = resolveScope(repoRoot);
|
|
26
|
+
const retrievalText = `${prompt}\nRun the failing auth test before editing and verify after the fix.`;
|
|
27
|
+
const embedding = buildLegacyEmbedding(retrievalText);
|
|
28
|
+
const node = {
|
|
29
|
+
id: nodeId,
|
|
30
|
+
node_type: "strategy",
|
|
31
|
+
scope_id: scope.scope_id,
|
|
32
|
+
task_type: "test_debug",
|
|
33
|
+
trigger_pattern: prompt,
|
|
34
|
+
applicability_notes: "Use the same repo and test scope",
|
|
35
|
+
env_signature: undefined,
|
|
36
|
+
compact_hint: "Run the failing auth test before editing and verify after the fix.",
|
|
37
|
+
goal: "Stabilize the failing auth test",
|
|
38
|
+
recommended_steps: ["Run the failing test", "Apply the minimal fix", "Re-run the test"],
|
|
39
|
+
avoid_steps: [],
|
|
40
|
+
fallback_steps: [],
|
|
41
|
+
success_signal: "The targeted auth test passes",
|
|
42
|
+
stop_condition: undefined,
|
|
43
|
+
escalation_condition: undefined,
|
|
44
|
+
evidence_summary: "Recovered the same failing auth test in a prior Codex lifecycle validation run.",
|
|
45
|
+
retrieval_text: retrievalText,
|
|
46
|
+
embedding: embedding.embedding,
|
|
47
|
+
embedding_provider: embedding.space.provider,
|
|
48
|
+
embedding_model: embedding.space.model,
|
|
49
|
+
embedding_version: embedding.space.version,
|
|
50
|
+
embedding_dimensions: embedding.space.dimensions,
|
|
51
|
+
distillation_mode_used: "rule",
|
|
52
|
+
distillation_source: "rule",
|
|
53
|
+
source_kind: "system_derived",
|
|
54
|
+
origin_record_ids: ["input_codex_validation_origin"],
|
|
55
|
+
helped_record_ids: [],
|
|
56
|
+
harmed_record_ids: [],
|
|
57
|
+
state: "active",
|
|
58
|
+
delivery_state: "eligible",
|
|
59
|
+
usage_count: 0,
|
|
60
|
+
helped_count: 0,
|
|
61
|
+
harmed_count: 0,
|
|
62
|
+
consecutive_harmed_count: 0,
|
|
63
|
+
support_count: 1,
|
|
64
|
+
created_at: timestamp,
|
|
65
|
+
updated_at: timestamp
|
|
66
|
+
};
|
|
67
|
+
repo.upsert(node);
|
|
68
|
+
};
|
|
69
|
+
export const renderCodexLifecycleValidationMarkdown = (report) => [
|
|
70
|
+
"# Codex lifecycle validation",
|
|
71
|
+
"",
|
|
72
|
+
`- Generated at: ${report.generatedAt}`,
|
|
73
|
+
`- Repo root: ${report.repoRoot}`,
|
|
74
|
+
`- Session id: ${report.sessionId}`,
|
|
75
|
+
`- Seeded node: ${report.seededNodeId}`,
|
|
76
|
+
`- Lookup mode: ${report.lookup.mode}`,
|
|
77
|
+
`- Injected node ids: ${report.lookup.injectedNodeIds.join(", ") || "none"}`,
|
|
78
|
+
`- Final outcome: ${report.finalize.outcomeSignal}`,
|
|
79
|
+
`- Persisted task runs: ${report.persistence.taskRunCount}`,
|
|
80
|
+
`- Persisted injection events: ${report.persistence.injectionEventCount}`,
|
|
81
|
+
`- Persisted review events: ${report.persistence.reviewEventCount}`,
|
|
82
|
+
`- Persisted hybrid artifacts: ${report.persistence.hybridArtifactCount}`,
|
|
83
|
+
`- Persisted hybrid traces: ${report.persistence.hybridTraceCount}`,
|
|
84
|
+
`- Review event types: ${report.persistence.reviewEventTypes.join(", ") || "none"}`,
|
|
85
|
+
`- Node lifecycle: ${report.node.state}`,
|
|
86
|
+
`- Node delivery: ${report.node.deliveryState}`,
|
|
87
|
+
`- Node helped/harmed: ${report.node.helpedCount}/${report.node.harmedCount}`
|
|
88
|
+
].join("\n");
|
|
89
|
+
export const runCodexLifecycleValidation = async (options = {}) => {
|
|
90
|
+
const generatedAt = options.now?.() ?? new Date().toISOString();
|
|
91
|
+
const outputDir = resolve(options.outputDir ?? defaultOutputDir(generatedAt));
|
|
92
|
+
const runtimeHome = resolve(options.homeDir ?? join(outputDir, "runtime-home"));
|
|
93
|
+
const experienceHome = join(runtimeHome, ".experienceengine");
|
|
94
|
+
const repoRoot = options.repoRoot ? resolve(options.repoRoot) : process.cwd();
|
|
95
|
+
const prompt = DEFAULT_PROMPT;
|
|
96
|
+
mkdirIfMissing(outputDir);
|
|
97
|
+
mkdirIfMissing(runtimeHome);
|
|
98
|
+
const env = {
|
|
99
|
+
...process.env,
|
|
100
|
+
...options.env,
|
|
101
|
+
EXPERIENCE_ENGINE_HOME: experienceHome,
|
|
102
|
+
EXPERIENCE_ENGINE_EMBEDDING_PROVIDER: "legacy",
|
|
103
|
+
EXPERIENCE_ENGINE_DISTILLATION_MODE: "disabled",
|
|
104
|
+
EXPERIENCE_ENGINE_HYBRID_ENABLED: "true",
|
|
105
|
+
EXPERIENCE_ENGINE_HYBRID_ASYNC_POSTMORTEM_ENABLED: "true",
|
|
106
|
+
EXPERIENCE_ENGINE_HYBRID_ASYNC_POSTMORTEM_LLM_ENABLED: "false",
|
|
107
|
+
EXPERIENCE_ENGINE_HYBRID_ROLLOUT_MODE: "live"
|
|
108
|
+
};
|
|
109
|
+
const config = loadConfig({}, { env, homeDir: runtimeHome });
|
|
110
|
+
const db = openDatabase(config);
|
|
111
|
+
bootstrapDatabase(db);
|
|
112
|
+
const nodeRepo = new NodeRepository(db);
|
|
113
|
+
seedLifecycleNode(nodeRepo, repoRoot, generatedAt, DEFAULT_NODE_ID, prompt);
|
|
114
|
+
const loop = createCodexBehaviorLoop({
|
|
115
|
+
env,
|
|
116
|
+
homeDir: runtimeHome,
|
|
117
|
+
runtimeOptions: {
|
|
118
|
+
env,
|
|
119
|
+
homeDir: runtimeHome,
|
|
120
|
+
hybridWorkerClientOptions: {
|
|
121
|
+
postmortemReviewExecutor: async () => ({
|
|
122
|
+
task: "postmortem_review",
|
|
123
|
+
review_verdict: "policy_gated",
|
|
124
|
+
candidate_recommendation: "observe",
|
|
125
|
+
feedback_followup_recommendation: "none",
|
|
126
|
+
confidence: "high",
|
|
127
|
+
reason: "The injected node materially contributed to the successful Codex lifecycle validation run.",
|
|
128
|
+
review_artifact: {
|
|
129
|
+
summary: "The injected node materially contributed to the successful Codex lifecycle validation run.",
|
|
130
|
+
notes: ["Apply bounded helped writeback for the injected node."]
|
|
131
|
+
},
|
|
132
|
+
injected_node_reviews: [
|
|
133
|
+
{
|
|
134
|
+
node_id: DEFAULT_NODE_ID,
|
|
135
|
+
feedback_verdict: "helped",
|
|
136
|
+
confidence: "high",
|
|
137
|
+
delivery_recommendation: "keep",
|
|
138
|
+
reason: "The deterministic validation flow followed the injected verification loop and completed successfully."
|
|
139
|
+
}
|
|
140
|
+
]
|
|
141
|
+
})
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
const lookup = await loop.lookupHints({
|
|
146
|
+
cwd: repoRoot,
|
|
147
|
+
prompt,
|
|
148
|
+
sessionId: DEFAULT_SESSION_ID
|
|
149
|
+
});
|
|
150
|
+
const toolResult = await loop.recordToolResult({
|
|
151
|
+
sessionId: DEFAULT_SESSION_ID,
|
|
152
|
+
toolName: "vitest",
|
|
153
|
+
inputSummary: "pnpm vitest run auth",
|
|
154
|
+
outputSummary: "The targeted auth test passed after following the injected hint.",
|
|
155
|
+
status: "success"
|
|
156
|
+
});
|
|
157
|
+
const finalized = await loop.finalizeTask({
|
|
158
|
+
sessionId: DEFAULT_SESSION_ID,
|
|
159
|
+
cwd: repoRoot,
|
|
160
|
+
prompt
|
|
161
|
+
});
|
|
162
|
+
await loop.waitForBackgroundLearning();
|
|
163
|
+
const taskRunRepo = new TaskRunRepository(db);
|
|
164
|
+
const injectionRepo = new InjectionRepository(db);
|
|
165
|
+
const reviewRepo = new ReviewEventRepository(db);
|
|
166
|
+
const artifactRepo = new HybridReviewArtifactRepository(db);
|
|
167
|
+
const traceRepo = new HybridInvocationTraceRepository(db);
|
|
168
|
+
const taskRun = taskRunRepo.getLatestBySessionId(DEFAULT_SESSION_ID);
|
|
169
|
+
if (!taskRun) {
|
|
170
|
+
throw new Error("Codex lifecycle validation expected a persisted task run.");
|
|
171
|
+
}
|
|
172
|
+
const node = nodeRepo.getById(DEFAULT_NODE_ID);
|
|
173
|
+
if (!node) {
|
|
174
|
+
throw new Error("Codex lifecycle validation expected the seeded node to remain persisted.");
|
|
175
|
+
}
|
|
176
|
+
const reviewEvents = reviewRepo.listByTaskRunId(taskRun.id).reverse();
|
|
177
|
+
const report = {
|
|
178
|
+
generatedAt,
|
|
179
|
+
repoRoot,
|
|
180
|
+
prompt,
|
|
181
|
+
sessionId: DEFAULT_SESSION_ID,
|
|
182
|
+
outputDir,
|
|
183
|
+
runtimeHome,
|
|
184
|
+
sqlitePath: config.sqlitePath,
|
|
185
|
+
captureDir: config.captureDir,
|
|
186
|
+
seededNodeId: DEFAULT_NODE_ID,
|
|
187
|
+
lookup: {
|
|
188
|
+
mode: lookup.mode,
|
|
189
|
+
injectedNodeIds: lookup.injectedNodeIds,
|
|
190
|
+
deliveryMode: lookup.deliveryMode,
|
|
191
|
+
delivered: lookup.delivered,
|
|
192
|
+
notice: lookup.notice
|
|
193
|
+
},
|
|
194
|
+
toolResult: {
|
|
195
|
+
status: toolResult.status,
|
|
196
|
+
eventStatus: toolResult.eventStatus,
|
|
197
|
+
toolName: toolResult.toolName
|
|
198
|
+
},
|
|
199
|
+
finalize: {
|
|
200
|
+
status: finalized.status,
|
|
201
|
+
outcomeSignal: finalized.outcomeSignal,
|
|
202
|
+
recordedToolEvents: finalized.recordedToolEvents
|
|
203
|
+
},
|
|
204
|
+
persistence: {
|
|
205
|
+
taskRunCount: taskRunRepo.count(),
|
|
206
|
+
injectionEventCount: injectionRepo.count(),
|
|
207
|
+
reviewEventCount: reviewRepo.count(),
|
|
208
|
+
hybridArtifactCount: artifactRepo.count(),
|
|
209
|
+
hybridTraceCount: traceRepo.count(),
|
|
210
|
+
reviewEventTypes: reviewEvents.map((event) => event.event_type)
|
|
211
|
+
},
|
|
212
|
+
node: {
|
|
213
|
+
id: node.id,
|
|
214
|
+
state: node.state,
|
|
215
|
+
deliveryState: node.delivery_state ?? "eligible",
|
|
216
|
+
usageCount: node.usage_count,
|
|
217
|
+
helpedCount: node.helped_count,
|
|
218
|
+
harmedCount: node.harmed_count,
|
|
219
|
+
lastFeedbackVerdict: node.last_feedback_verdict
|
|
220
|
+
}
|
|
221
|
+
};
|
|
222
|
+
const jsonPath = join(outputDir, "codex-lifecycle.json");
|
|
223
|
+
const markdownPath = join(outputDir, "codex-lifecycle.md");
|
|
224
|
+
writeFileSync(jsonPath, JSON.stringify(report, null, 2));
|
|
225
|
+
writeFileSync(markdownPath, renderCodexLifecycleValidationMarkdown(report));
|
|
226
|
+
return {
|
|
227
|
+
outputDir,
|
|
228
|
+
jsonPath,
|
|
229
|
+
markdownPath,
|
|
230
|
+
report
|
|
231
|
+
};
|
|
232
|
+
};
|
|
233
|
+
//# sourceMappingURL=codex-lifecycle-validation.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"codex-lifecycle-validation.js","sourceRoot":"","sources":["../../src/evaluation/codex-lifecycle-validation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAC/D,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,0BAA0B,CAAC;AACtD,OAAO,EAAE,uBAAuB,EAAE,MAAM,iCAAiC,CAAC;AAC1E,OAAO,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AACxE,OAAO,EAAE,+BAA+B,EAAE,MAAM,8DAA8D,CAAC;AAC/G,OAAO,EAAE,8BAA8B,EAAE,MAAM,6DAA6D,CAAC;AAC7G,OAAO,EAAE,mBAAmB,EAAE,MAAM,gDAAgD,CAAC;AACrF,OAAO,EAAE,cAAc,EAAE,MAAM,2CAA2C,CAAC;AAC3E,OAAO,EAAE,qBAAqB,EAAE,MAAM,mDAAmD,CAAC;AAC1F,OAAO,EAAE,iBAAiB,EAAE,MAAM,+CAA+C,CAAC;AAClF,OAAO,EAAE,oBAAoB,EAAE,MAAM,+BAA+B,CAAC;AAgErE,MAAM,kBAAkB,GAAG,4BAA4B,CAAC;AACxD,MAAM,eAAe,GAAG,iCAAiC,CAAC;AAC1D,MAAM,cAAc,GAAG,2BAA2B,CAAC;AAEnD,MAAM,aAAa,GAAG,CAAC,KAAa,EAAU,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;AAE7E,MAAM,cAAc,GAAG,CAAC,IAAY,EAAQ,EAAE;IAC5C,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QACtB,SAAS,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACvC,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,gBAAgB,GAAG,CAAC,SAAiB,EAAU,EAAE,CACrD,OAAO,CAAC,WAAW,EAAE,aAAa,EAAE,OAAO,EAAE,aAAa,CAAC,SAAS,CAAC,CAAC,CAAC;AAEzE,MAAM,iBAAiB,GAAG,CACxB,IAAoB,EACpB,QAAgB,EAChB,SAAiB,EACjB,MAAc,EACd,MAAc,EACR,EAAE;IACR,MAAM,KAAK,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACrC,MAAM,aAAa,GAAG,GAAG,MAAM,sEAAsE,CAAC;IACtG,MAAM,SAAS,GAAG,oBAAoB,CAAC,aAAa,CAAC,CAAC;IACtD,MAAM,IAAI,GAAmB;QAC3B,EAAE,EAAE,MAAM;QACV,SAAS,EAAE,UAAU;QACrB,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,SAAS,EAAE,YAAY;QACvB,eAAe,EAAE,MAAM;QACvB,mBAAmB,EAAE,kCAAkC;QACvD,aAAa,EAAE,SAAS;QACxB,YAAY,EAAE,oEAAoE;QAClF,IAAI,EAAE,iCAAiC;QACvC,iBAAiB,EAAE,CAAC,sBAAsB,EAAE,uBAAuB,EAAE,iBAAiB,CAAC;QACvF,WAAW,EAAE,EAAE;QACf,cAAc,EAAE,EAAE;QAClB,cAAc,EAAE,+BAA+B;QAC/C,cAAc,EAAE,SAAS;QACzB,oBAAoB,EAAE,SAAS;QAC/B,gBAAgB,EAAE,iFAAiF;QACnG,cAAc,EAAE,aAAa;QAC7B,SAAS,EAAE,SAAS,CAAC,SAAS;QAC9B,kBAAkB,EAAE,SAAS,CAAC,KAAK,CAAC,QAAQ;QAC5C,eAAe,EAAE,SAAS,CAAC,KAAK,CAAC,KAAK;QACtC,iBAAiB,EAAE,SAAS,CAAC,KAAK,CAAC,OAAO;QAC1C,oBAAoB,EAAE,SAAS,CAAC,KAAK,CAAC,UAAU;QAChD,sBAAsB,EAAE,MAAM;QAC9B,mBAAmB,EAAE,MAAM;QAC3B,WAAW,EAAE,gBAAgB;QAC7B,iBAAiB,EAAE,CAAC,+BAA+B,CAAC;QACpD,iBAAiB,EAAE,EAAE;QACrB,iBAAiB,EAAE,EAAE;QACrB,KAAK,EAAE,QAAQ;QACf,cAAc,EAAE,UAAU;QAC1B,WAAW,EAAE,CAAC;QACd,YAAY,EAAE,CAAC;QACf,YAAY,EAAE,CAAC;QACf,wBAAwB,EAAE,CAAC;QAC3B,aAAa,EAAE,CAAC;QAChB,UAAU,EAAE,SAAS;QACrB,UAAU,EAAE,SAAS;KACtB,CAAC;IAEF,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;AACpB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,sCAAsC,GAAG,CACpD,MAAsC,EAC9B,EAAE,CACV;IACE,8BAA8B;IAC9B,EAAE;IACF,mBAAmB,MAAM,CAAC,WAAW,EAAE;IACvC,gBAAgB,MAAM,CAAC,QAAQ,EAAE;IACjC,iBAAiB,MAAM,CAAC,SAAS,EAAE;IACnC,kBAAkB,MAAM,CAAC,YAAY,EAAE;IACvC,kBAAkB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE;IACtC,wBAAwB,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,MAAM,EAAE;IAC5E,oBAAoB,MAAM,CAAC,QAAQ,CAAC,aAAa,EAAE;IACnD,0BAA0B,MAAM,CAAC,WAAW,CAAC,YAAY,EAAE;IAC3D,iCAAiC,MAAM,CAAC,WAAW,CAAC,mBAAmB,EAAE;IACzE,8BAA8B,MAAM,CAAC,WAAW,CAAC,gBAAgB,EAAE;IACnE,iCAAiC,MAAM,CAAC,WAAW,CAAC,mBAAmB,EAAE;IACzE,8BAA8B,MAAM,CAAC,WAAW,CAAC,gBAAgB,EAAE;IACnE,yBAAyB,MAAM,CAAC,WAAW,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,MAAM,EAAE;IACnF,qBAAqB,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE;IACxC,oBAAoB,MAAM,CAAC,IAAI,CAAC,aAAa,EAAE;IAC/C,yBAAyB,MAAM,CAAC,IAAI,CAAC,WAAW,IAAI,MAAM,CAAC,IAAI,CAAC,WAAW,EAAE;CAC9E,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEf,MAAM,CAAC,MAAM,2BAA2B,GAAG,KAAK,EAC9C,UAA2C,EAAE,EACD,EAAE;IAC9C,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,EAAE,EAAE,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAChE,MAAM,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,IAAI,gBAAgB,CAAC,WAAW,CAAC,CAAC,CAAC;IAC9E,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,SAAS,EAAE,cAAc,CAAC,CAAC,CAAC;IAChF,MAAM,cAAc,GAAG,IAAI,CAAC,WAAW,EAAE,mBAAmB,CAAC,CAAC;IAC9D,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC;IAC9E,MAAM,MAAM,GAAG,cAAc,CAAC;IAE9B,cAAc,CAAC,SAAS,CAAC,CAAC;IAC1B,cAAc,CAAC,WAAW,CAAC,CAAC;IAE5B,MAAM,GAAG,GAAsB;QAC7B,GAAG,OAAO,CAAC,GAAG;QACd,GAAG,OAAO,CAAC,GAAG;QACd,sBAAsB,EAAE,cAAc;QACtC,oCAAoC,EAAE,QAAQ;QAC9C,mCAAmC,EAAE,UAAU;QAC/C,gCAAgC,EAAE,MAAM;QACxC,iDAAiD,EAAE,MAAM;QACzD,qDAAqD,EAAE,OAAO;QAC9D,qCAAqC,EAAE,MAAM;KAC9C,CAAC;IAEF,MAAM,MAAM,GAAG,UAAU,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC,CAAC;IAC7D,MAAM,EAAE,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;IAChC,iBAAiB,CAAC,EAAE,CAAC,CAAC;IAEtB,MAAM,QAAQ,GAAG,IAAI,cAAc,CAAC,EAAE,CAAC,CAAC;IACxC,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,CAAC,CAAC;IAE5E,MAAM,IAAI,GAAG,uBAAuB,CAAC;QACnC,GAAG;QACH,OAAO,EAAE,WAAW;QACpB,cAAc,EAAE;YACd,GAAG;YACH,OAAO,EAAE,WAAW;YACpB,yBAAyB,EAAE;gBACzB,wBAAwB,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC;oBACrC,IAAI,EAAE,mBAAmB;oBACzB,cAAc,EAAE,cAAc;oBAC9B,wBAAwB,EAAE,SAAS;oBACnC,gCAAgC,EAAE,MAAM;oBACxC,UAAU,EAAE,MAAM;oBAClB,MAAM,EAAE,4FAA4F;oBACpG,eAAe,EAAE;wBACf,OAAO,EAAE,4FAA4F;wBACrG,KAAK,EAAE,CAAC,uDAAuD,CAAC;qBACjE;oBACD,qBAAqB,EAAE;wBACrB;4BACE,OAAO,EAAE,eAAe;4BACxB,gBAAgB,EAAE,QAAQ;4BAC1B,UAAU,EAAE,MAAM;4BAClB,uBAAuB,EAAE,MAAM;4BAC/B,MAAM,EAAE,uGAAuG;yBAChH;qBACF;iBACF,CAAC;aACH;SACF;KACF,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC;QACpC,GAAG,EAAE,QAAQ;QACb,MAAM;QACN,SAAS,EAAE,kBAAkB;KAC9B,CAAC,CAAC;IACH,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC;QAC7C,SAAS,EAAE,kBAAkB;QAC7B,QAAQ,EAAE,QAAQ;QAClB,YAAY,EAAE,sBAAsB;QACpC,aAAa,EAAE,kEAAkE;QACjF,MAAM,EAAE,SAAS;KAClB,CAAC,CAAC;IACH,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC;QACxC,SAAS,EAAE,kBAAkB;QAC7B,GAAG,EAAE,QAAQ;QACb,MAAM;KACP,CAAC,CAAC;IACH,MAAM,IAAI,CAAC,yBAAyB,EAAE,CAAC;IAEvC,MAAM,WAAW,GAAG,IAAI,iBAAiB,CAAC,EAAE,CAAC,CAAC;IAC9C,MAAM,aAAa,GAAG,IAAI,mBAAmB,CAAC,EAAE,CAAC,CAAC;IAClD,MAAM,UAAU,GAAG,IAAI,qBAAqB,CAAC,EAAE,CAAC,CAAC;IACjD,MAAM,YAAY,GAAG,IAAI,8BAA8B,CAAC,EAAE,CAAC,CAAC;IAC5D,MAAM,SAAS,GAAG,IAAI,+BAA+B,CAAC,EAAE,CAAC,CAAC;IAE1D,MAAM,OAAO,GAAG,WAAW,CAAC,oBAAoB,CAAC,kBAAkB,CAAC,CAAC;IACrE,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,2DAA2D,CAAC,CAAC;IAC/E,CAAC;IACD,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC;IAC/C,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,MAAM,IAAI,KAAK,CAAC,0EAA0E,CAAC,CAAC;IAC9F,CAAC;IAED,MAAM,YAAY,GAAG,UAAU,CAAC,eAAe,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC;IACtE,MAAM,MAAM,GAAmC;QAC7C,WAAW;QACX,QAAQ;QACR,MAAM;QACN,SAAS,EAAE,kBAAkB;QAC7B,SAAS;QACT,WAAW;QACX,UAAU,EAAE,MAAM,CAAC,UAAU;QAC7B,UAAU,EAAE,MAAM,CAAC,UAAU;QAC7B,YAAY,EAAE,eAAe;QAC7B,MAAM,EAAE;YACN,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,eAAe,EAAE,MAAM,CAAC,eAAe;YACvC,YAAY,EAAE,MAAM,CAAC,YAAY;YACjC,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB;QACD,UAAU,EAAE;YACV,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,WAAW,EAAE,UAAU,CAAC,WAAW;YACnC,QAAQ,EAAE,UAAU,CAAC,QAAQ;SAC9B;QACD,QAAQ,EAAE;YACR,MAAM,EAAE,SAAS,CAAC,MAAM;YACxB,aAAa,EAAE,SAAS,CAAC,aAAa;YACtC,kBAAkB,EAAE,SAAS,CAAC,kBAAkB;SACjD;QACD,WAAW,EAAE;YACX,YAAY,EAAE,WAAW,CAAC,KAAK,EAAE;YACjC,mBAAmB,EAAE,aAAa,CAAC,KAAK,EAAE;YAC1C,gBAAgB,EAAE,UAAU,CAAC,KAAK,EAAE;YACpC,mBAAmB,EAAE,YAAY,CAAC,KAAK,EAAE;YACzC,gBAAgB,EAAE,SAAS,CAAC,KAAK,EAAE;YACnC,gBAAgB,EAAE,YAAY,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,CAAC;SAChE;QACD,IAAI,EAAE;YACJ,EAAE,EAAE,IAAI,CAAC,EAAE;YACX,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,aAAa,EAAE,IAAI,CAAC,cAAc,IAAI,UAAU;YAChD,UAAU,EAAE,IAAI,CAAC,WAAW;YAC5B,WAAW,EAAE,IAAI,CAAC,YAAY;YAC9B,WAAW,EAAE,IAAI,CAAC,YAAY;YAC9B,mBAAmB,EAAE,IAAI,CAAC,qBAAqB;SAChD;KACF,CAAC;IAEF,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,sBAAsB,CAAC,CAAC;IACzD,MAAM,YAAY,GAAG,IAAI,CAAC,SAAS,EAAE,oBAAoB,CAAC,CAAC;IAC3D,aAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACzD,aAAa,CAAC,YAAY,EAAE,sCAAsC,CAAC,MAAM,CAAC,CAAC,CAAC;IAE5E,OAAO;QACL,SAAS;QACT,QAAQ;QACR,YAAY;QACZ,MAAM;KACP,CAAC;AACJ,CAAC,CAAC"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import type { HybridInvocationTrace, HybridReviewArtifact } from "../types/domain.js";
|
|
2
|
+
export type HybridPhase1ReleaseRecommendation = "blocked" | "shadow_only" | "canary_ready" | "live_ready";
|
|
3
|
+
export type HybridPhase1RolloutSummary = {
|
|
4
|
+
routeDistribution: Record<string, number>;
|
|
5
|
+
syncEscalationRate: number;
|
|
6
|
+
asyncReviewSchedulingRate: number;
|
|
7
|
+
workerOutputValidityRate: number;
|
|
8
|
+
fallbackRate: number;
|
|
9
|
+
explanationQualitySummary: {
|
|
10
|
+
surfaced: number;
|
|
11
|
+
shadowed: number;
|
|
12
|
+
fallbacks: number;
|
|
13
|
+
};
|
|
14
|
+
postmortemQualitySummary: {
|
|
15
|
+
storedArtifacts: number;
|
|
16
|
+
policyGatedArtifacts: number;
|
|
17
|
+
rejectedRuns: number;
|
|
18
|
+
};
|
|
19
|
+
phase2ExplainSummary?: {
|
|
20
|
+
llmBackedAttempts: number;
|
|
21
|
+
llmBackedFallbacks: number;
|
|
22
|
+
recommendation: "blocked" | "shadow_only" | "canary_ready" | "live_ready";
|
|
23
|
+
};
|
|
24
|
+
phase3PostmortemSummary?: {
|
|
25
|
+
llmBackedAttempts: number;
|
|
26
|
+
llmBackedFallbacks: number;
|
|
27
|
+
recommendation: "blocked" | "shadow_only" | "canary_ready" | "live_ready";
|
|
28
|
+
};
|
|
29
|
+
releaseGate?: {
|
|
30
|
+
stage: "offline" | "shadow" | "canary";
|
|
31
|
+
routeGatePassed: boolean;
|
|
32
|
+
explainGatePassed: boolean;
|
|
33
|
+
postmortemGatePassed: boolean;
|
|
34
|
+
runtimeGuardrailsPassed: boolean;
|
|
35
|
+
};
|
|
36
|
+
recommendation: HybridPhase1ReleaseRecommendation;
|
|
37
|
+
};
|
|
38
|
+
export declare const buildHybridPhase1RolloutSummary: (input: {
|
|
39
|
+
traces: HybridInvocationTrace[];
|
|
40
|
+
artifacts: HybridReviewArtifact[];
|
|
41
|
+
releaseGate?: {
|
|
42
|
+
stage: "offline" | "shadow" | "canary";
|
|
43
|
+
routeGatePassed: boolean;
|
|
44
|
+
explainGatePassed: boolean;
|
|
45
|
+
postmortemGatePassed: boolean;
|
|
46
|
+
runtimeGuardrailsPassed: boolean;
|
|
47
|
+
};
|
|
48
|
+
phase2ExplainGate?: {
|
|
49
|
+
stage: "offline" | "shadow" | "canary";
|
|
50
|
+
explainFaithfulnessPassed: boolean;
|
|
51
|
+
explainFallbackRatePassed: boolean;
|
|
52
|
+
explainTimeoutRatePassed: boolean;
|
|
53
|
+
};
|
|
54
|
+
phase3PostmortemGate?: {
|
|
55
|
+
stage: "offline" | "shadow" | "canary";
|
|
56
|
+
schemaValidOutputRatePassed: boolean;
|
|
57
|
+
timeoutFallbackRatePassed: boolean;
|
|
58
|
+
providerUnavailableFallbackRatePassed: boolean;
|
|
59
|
+
blockedClassificationStabilityPassed: boolean;
|
|
60
|
+
artifactSpamRatePassed: boolean;
|
|
61
|
+
backlogGrowthPassed: boolean;
|
|
62
|
+
};
|
|
63
|
+
}) => HybridPhase1RolloutSummary;
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
const ratio = (value, total) => total > 0 ? Number((value / total).toFixed(4)) : 0;
|
|
2
|
+
export const buildHybridPhase1RolloutSummary = (input) => {
|
|
3
|
+
const routeDistribution = input.traces.reduce((acc, trace) => {
|
|
4
|
+
acc[trace.route] = (acc[trace.route] ?? 0) + 1;
|
|
5
|
+
return acc;
|
|
6
|
+
}, {});
|
|
7
|
+
const totalTraces = input.traces.length;
|
|
8
|
+
const syncEscalations = input.traces.filter((trace) => trace.route === "ESCALATE_SYNC_EXPLAIN").length;
|
|
9
|
+
const asyncReviews = input.traces.filter((trace) => trace.route === "ESCALATE_ASYNC_POSTMORTEM").length;
|
|
10
|
+
const accepted = input.traces.filter((trace) => trace.validation_status === "accepted").length;
|
|
11
|
+
const fallbacks = input.traces.filter((trace) => trace.validation_status === "fallback").length;
|
|
12
|
+
const explainTraces = input.traces.filter((trace) => trace.worker_task === "explain_decision");
|
|
13
|
+
const postmortemTraces = input.traces.filter((trace) => trace.worker_task === "postmortem_review");
|
|
14
|
+
const explanationQualitySummary = {
|
|
15
|
+
surfaced: explainTraces.filter((trace) => trace.output_action === "surfaced").length,
|
|
16
|
+
shadowed: explainTraces.filter((trace) => trace.output_action === "none").length,
|
|
17
|
+
fallbacks: explainTraces.filter((trace) => trace.validation_status === "fallback").length
|
|
18
|
+
};
|
|
19
|
+
const postmortemQualitySummary = {
|
|
20
|
+
storedArtifacts: postmortemTraces.filter((trace) => trace.output_action === "stored").length,
|
|
21
|
+
policyGatedArtifacts: input.artifacts.filter((artifact) => artifact.approval_class === "policy_gated").length,
|
|
22
|
+
rejectedRuns: postmortemTraces.filter((trace) => trace.output_action === "rejected").length
|
|
23
|
+
};
|
|
24
|
+
const llmExplainTraces = explainTraces.filter((trace) => trace.worker_profile_version?.startsWith("hybrid-explain-llm"));
|
|
25
|
+
const phase2ExplainGate = input.phase2ExplainGate;
|
|
26
|
+
let phase2ExplainSummary;
|
|
27
|
+
if (phase2ExplainGate) {
|
|
28
|
+
const explainGatesPassed = phase2ExplainGate.explainFaithfulnessPassed
|
|
29
|
+
&& phase2ExplainGate.explainFallbackRatePassed
|
|
30
|
+
&& phase2ExplainGate.explainTimeoutRatePassed;
|
|
31
|
+
let recommendation = "blocked";
|
|
32
|
+
if (explainGatesPassed) {
|
|
33
|
+
if (phase2ExplainGate.stage === "offline") {
|
|
34
|
+
recommendation = "shadow_only";
|
|
35
|
+
}
|
|
36
|
+
else if (phase2ExplainGate.stage === "shadow") {
|
|
37
|
+
recommendation = "canary_ready";
|
|
38
|
+
}
|
|
39
|
+
else if (phase2ExplainGate.stage === "canary") {
|
|
40
|
+
recommendation = "live_ready";
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
phase2ExplainSummary = {
|
|
44
|
+
llmBackedAttempts: llmExplainTraces.length,
|
|
45
|
+
llmBackedFallbacks: llmExplainTraces.filter((trace) => trace.validation_status === "fallback").length,
|
|
46
|
+
recommendation
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
const llmPostmortemTraces = postmortemTraces.filter((trace) => trace.worker_profile_version?.startsWith("hybrid-postmortem-llm"));
|
|
50
|
+
const phase3PostmortemGate = input.phase3PostmortemGate;
|
|
51
|
+
let phase3PostmortemSummary;
|
|
52
|
+
if (phase3PostmortemGate) {
|
|
53
|
+
const postmortemGatesPassed = phase3PostmortemGate.schemaValidOutputRatePassed
|
|
54
|
+
&& phase3PostmortemGate.timeoutFallbackRatePassed
|
|
55
|
+
&& phase3PostmortemGate.providerUnavailableFallbackRatePassed
|
|
56
|
+
&& phase3PostmortemGate.blockedClassificationStabilityPassed
|
|
57
|
+
&& phase3PostmortemGate.artifactSpamRatePassed
|
|
58
|
+
&& phase3PostmortemGate.backlogGrowthPassed;
|
|
59
|
+
let recommendation = "blocked";
|
|
60
|
+
if (postmortemGatesPassed) {
|
|
61
|
+
if (phase3PostmortemGate.stage === "offline") {
|
|
62
|
+
recommendation = "shadow_only";
|
|
63
|
+
}
|
|
64
|
+
else if (phase3PostmortemGate.stage === "shadow") {
|
|
65
|
+
recommendation = "canary_ready";
|
|
66
|
+
}
|
|
67
|
+
else if (phase3PostmortemGate.stage === "canary") {
|
|
68
|
+
recommendation = "live_ready";
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
phase3PostmortemSummary = {
|
|
72
|
+
llmBackedAttempts: llmPostmortemTraces.length,
|
|
73
|
+
llmBackedFallbacks: llmPostmortemTraces.filter((trace) => trace.validation_status === "fallback").length,
|
|
74
|
+
recommendation
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
const gate = input.releaseGate;
|
|
78
|
+
let recommendation = "blocked";
|
|
79
|
+
const gatesPassed = gate?.routeGatePassed === true
|
|
80
|
+
&& gate.explainGatePassed === true
|
|
81
|
+
&& gate.postmortemGatePassed === true
|
|
82
|
+
&& gate.runtimeGuardrailsPassed === true;
|
|
83
|
+
if (gatesPassed && totalTraces > 0) {
|
|
84
|
+
if (gate?.stage === "offline") {
|
|
85
|
+
recommendation = "shadow_only";
|
|
86
|
+
}
|
|
87
|
+
else if (gate?.stage === "shadow") {
|
|
88
|
+
recommendation = "canary_ready";
|
|
89
|
+
}
|
|
90
|
+
else if (gate?.stage === "canary") {
|
|
91
|
+
recommendation = "live_ready";
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return {
|
|
95
|
+
routeDistribution,
|
|
96
|
+
syncEscalationRate: ratio(syncEscalations, totalTraces),
|
|
97
|
+
asyncReviewSchedulingRate: ratio(asyncReviews, totalTraces),
|
|
98
|
+
workerOutputValidityRate: ratio(accepted, totalTraces),
|
|
99
|
+
fallbackRate: ratio(fallbacks, totalTraces),
|
|
100
|
+
explanationQualitySummary,
|
|
101
|
+
postmortemQualitySummary,
|
|
102
|
+
phase2ExplainSummary,
|
|
103
|
+
phase3PostmortemSummary,
|
|
104
|
+
releaseGate: gate,
|
|
105
|
+
recommendation
|
|
106
|
+
};
|
|
107
|
+
};
|
|
108
|
+
//# sourceMappingURL=hybrid-phase1-rollout-summary.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hybrid-phase1-rollout-summary.js","sourceRoot":"","sources":["../../src/evaluation/hybrid-phase1-rollout-summary.ts"],"names":[],"mappings":"AAwCA,MAAM,KAAK,GAAG,CAAC,KAAa,EAAE,KAAa,EAAU,EAAE,CACrD,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAErD,MAAM,CAAC,MAAM,+BAA+B,GAAG,CAAC,KAyB/C,EAA8B,EAAE;IAC/B,MAAM,iBAAiB,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAyB,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE;QACnF,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QAC/C,OAAO,GAAG,CAAC;IACb,CAAC,EAAE,EAAE,CAAC,CAAC;IAEP,MAAM,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;IACxC,MAAM,eAAe,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,KAAK,KAAK,uBAAuB,CAAC,CAAC,MAAM,CAAC;IACvG,MAAM,YAAY,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,KAAK,KAAK,2BAA2B,CAAC,CAAC,MAAM,CAAC;IACxG,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,iBAAiB,KAAK,UAAU,CAAC,CAAC,MAAM,CAAC;IAC/F,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,iBAAiB,KAAK,UAAU,CAAC,CAAC,MAAM,CAAC;IAEhG,MAAM,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,WAAW,KAAK,kBAAkB,CAAC,CAAC;IAC/F,MAAM,gBAAgB,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,WAAW,KAAK,mBAAmB,CAAC,CAAC;IAEnG,MAAM,yBAAyB,GAAG;QAChC,QAAQ,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,aAAa,KAAK,UAAU,CAAC,CAAC,MAAM;QACpF,QAAQ,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,aAAa,KAAK,MAAM,CAAC,CAAC,MAAM;QAChF,SAAS,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,iBAAiB,KAAK,UAAU,CAAC,CAAC,MAAM;KAC1F,CAAC;IAEF,MAAM,wBAAwB,GAAG;QAC/B,eAAe,EAAE,gBAAgB,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,aAAa,KAAK,QAAQ,CAAC,CAAC,MAAM;QAC5F,oBAAoB,EAAE,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,cAAc,KAAK,cAAc,CAAC,CAAC,MAAM;QAC7G,YAAY,EAAE,gBAAgB,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,aAAa,KAAK,UAAU,CAAC,CAAC,MAAM;KAC5F,CAAC;IAEF,MAAM,gBAAgB,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,sBAAsB,EAAE,UAAU,CAAC,oBAAoB,CAAC,CAAC,CAAC;IACzH,MAAM,iBAAiB,GAAG,KAAK,CAAC,iBAAiB,CAAC;IAClD,IAAI,oBAAwE,CAAC;IAC7E,IAAI,iBAAiB,EAAE,CAAC;QACtB,MAAM,kBAAkB,GACtB,iBAAiB,CAAC,yBAAyB;eACxC,iBAAiB,CAAC,yBAAyB;eAC3C,iBAAiB,CAAC,wBAAwB,CAAC;QAChD,IAAI,cAAc,GAAsC,SAAS,CAAC;QAClE,IAAI,kBAAkB,EAAE,CAAC;YACvB,IAAI,iBAAiB,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;gBAC1C,cAAc,GAAG,aAAa,CAAC;YACjC,CAAC;iBAAM,IAAI,iBAAiB,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;gBAChD,cAAc,GAAG,cAAc,CAAC;YAClC,CAAC;iBAAM,IAAI,iBAAiB,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;gBAChD,cAAc,GAAG,YAAY,CAAC;YAChC,CAAC;QACH,CAAC;QACD,oBAAoB,GAAG;YACrB,iBAAiB,EAAE,gBAAgB,CAAC,MAAM;YAC1C,kBAAkB,EAAE,gBAAgB,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,iBAAiB,KAAK,UAAU,CAAC,CAAC,MAAM;YACrG,cAAc;SACf,CAAC;IACJ,CAAC;IAED,MAAM,mBAAmB,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAC5D,KAAK,CAAC,sBAAsB,EAAE,UAAU,CAAC,uBAAuB,CAAC,CAClE,CAAC;IACF,MAAM,oBAAoB,GAAG,KAAK,CAAC,oBAAoB,CAAC;IACxD,IAAI,uBAA8E,CAAC;IACnF,IAAI,oBAAoB,EAAE,CAAC;QACzB,MAAM,qBAAqB,GACzB,oBAAoB,CAAC,2BAA2B;eAC7C,oBAAoB,CAAC,yBAAyB;eAC9C,oBAAoB,CAAC,qCAAqC;eAC1D,oBAAoB,CAAC,oCAAoC;eACzD,oBAAoB,CAAC,sBAAsB;eAC3C,oBAAoB,CAAC,mBAAmB,CAAC;QAC9C,IAAI,cAAc,GAAsC,SAAS,CAAC;QAClE,IAAI,qBAAqB,EAAE,CAAC;YAC1B,IAAI,oBAAoB,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;gBAC7C,cAAc,GAAG,aAAa,CAAC;YACjC,CAAC;iBAAM,IAAI,oBAAoB,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;gBACnD,cAAc,GAAG,cAAc,CAAC;YAClC,CAAC;iBAAM,IAAI,oBAAoB,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;gBACnD,cAAc,GAAG,YAAY,CAAC;YAChC,CAAC;QACH,CAAC;QACD,uBAAuB,GAAG;YACxB,iBAAiB,EAAE,mBAAmB,CAAC,MAAM;YAC7C,kBAAkB,EAAE,mBAAmB,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,iBAAiB,KAAK,UAAU,CAAC,CAAC,MAAM;YACxG,cAAc;SACf,CAAC;IACJ,CAAC;IAED,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,CAAC;IAC/B,IAAI,cAAc,GAAsC,SAAS,CAAC;IAClE,MAAM,WAAW,GACf,IAAI,EAAE,eAAe,KAAK,IAAI;WAC3B,IAAI,CAAC,iBAAiB,KAAK,IAAI;WAC/B,IAAI,CAAC,oBAAoB,KAAK,IAAI;WAClC,IAAI,CAAC,uBAAuB,KAAK,IAAI,CAAC;IAC3C,IAAI,WAAW,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;QACnC,IAAI,IAAI,EAAE,KAAK,KAAK,SAAS,EAAE,CAAC;YAC9B,cAAc,GAAG,aAAa,CAAC;QACjC,CAAC;aAAM,IAAI,IAAI,EAAE,KAAK,KAAK,QAAQ,EAAE,CAAC;YACpC,cAAc,GAAG,cAAc,CAAC;QAClC,CAAC;aAAM,IAAI,IAAI,EAAE,KAAK,KAAK,QAAQ,EAAE,CAAC;YACpC,cAAc,GAAG,YAAY,CAAC;QAChC,CAAC;IACH,CAAC;IAED,OAAO;QACL,iBAAiB;QACjB,kBAAkB,EAAE,KAAK,CAAC,eAAe,EAAE,WAAW,CAAC;QACvD,yBAAyB,EAAE,KAAK,CAAC,YAAY,EAAE,WAAW,CAAC;QAC3D,wBAAwB,EAAE,KAAK,CAAC,QAAQ,EAAE,WAAW,CAAC;QACtD,YAAY,EAAE,KAAK,CAAC,SAAS,EAAE,WAAW,CAAC;QAC3C,yBAAyB;QACzB,wBAAwB;QACxB,oBAAoB;QACpB,uBAAuB;QACvB,WAAW,EAAE,IAAI;QACjB,cAAc;KACf,CAAC;AACJ,CAAC,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export type HybridPhase3GateMetrics = {
|
|
2
|
+
schemaValidOutputRate: number;
|
|
3
|
+
timeoutFallbackRate: number;
|
|
4
|
+
providerUnavailableFallbackRate: number;
|
|
5
|
+
blockedPolicyGatedStability: number;
|
|
6
|
+
falsePositiveRecommendationRate: number;
|
|
7
|
+
artifactSpamRate: number;
|
|
8
|
+
backlogGrowthVsBaseline: number;
|
|
9
|
+
};
|
|
10
|
+
export declare const buildHybridPhase3GateMetrics: (input: {
|
|
11
|
+
scheduledEligibleRuns: number;
|
|
12
|
+
acceptedArtifacts: number;
|
|
13
|
+
policyGatedArtifacts: number;
|
|
14
|
+
blockedOutputs: number;
|
|
15
|
+
timeoutFallbacks: number;
|
|
16
|
+
providerUnavailableFallbacks: number;
|
|
17
|
+
validationFailedFallbacks: number;
|
|
18
|
+
falsePositiveRecommendations: number;
|
|
19
|
+
deterministicBaseline: {
|
|
20
|
+
eligibleRuns: number;
|
|
21
|
+
backlogSize: number;
|
|
22
|
+
};
|
|
23
|
+
currentWindow: {
|
|
24
|
+
backlogSize: number;
|
|
25
|
+
};
|
|
26
|
+
}) => HybridPhase3GateMetrics;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
const ratio = (value, total) => total > 0 ? Number((value / total).toFixed(4)) : 0;
|
|
2
|
+
export const buildHybridPhase3GateMetrics = (input) => {
|
|
3
|
+
const totalAttempts = input.acceptedArtifacts
|
|
4
|
+
+ input.policyGatedArtifacts
|
|
5
|
+
+ input.blockedOutputs
|
|
6
|
+
+ input.timeoutFallbacks
|
|
7
|
+
+ input.providerUnavailableFallbacks
|
|
8
|
+
+ input.validationFailedFallbacks;
|
|
9
|
+
const acceptedLike = input.acceptedArtifacts + input.policyGatedArtifacts;
|
|
10
|
+
const baselineBacklog = input.deterministicBaseline.backlogSize;
|
|
11
|
+
const currentBacklog = input.currentWindow.backlogSize;
|
|
12
|
+
const backlogGrowthVsBaseline = baselineBacklog > 0 ? Number(((currentBacklog - baselineBacklog) / baselineBacklog).toFixed(4)) : 0;
|
|
13
|
+
return {
|
|
14
|
+
schemaValidOutputRate: ratio(acceptedLike + input.blockedOutputs, totalAttempts),
|
|
15
|
+
timeoutFallbackRate: ratio(input.timeoutFallbacks, totalAttempts),
|
|
16
|
+
providerUnavailableFallbackRate: ratio(input.providerUnavailableFallbacks, totalAttempts),
|
|
17
|
+
blockedPolicyGatedStability: ratio(acceptedLike + input.blockedOutputs, totalAttempts),
|
|
18
|
+
falsePositiveRecommendationRate: ratio(input.falsePositiveRecommendations, totalAttempts),
|
|
19
|
+
artifactSpamRate: ratio(acceptedLike, input.scheduledEligibleRuns),
|
|
20
|
+
backlogGrowthVsBaseline
|
|
21
|
+
};
|
|
22
|
+
};
|
|
23
|
+
//# sourceMappingURL=hybrid-phase3-gate-metrics.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hybrid-phase3-gate-metrics.js","sourceRoot":"","sources":["../../src/evaluation/hybrid-phase3-gate-metrics.ts"],"names":[],"mappings":"AAUA,MAAM,KAAK,GAAG,CAAC,KAAa,EAAE,KAAa,EAAU,EAAE,CACrD,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAErD,MAAM,CAAC,MAAM,4BAA4B,GAAG,CAAC,KAgB5C,EAA2B,EAAE;IAC5B,MAAM,aAAa,GACjB,KAAK,CAAC,iBAAiB;UACrB,KAAK,CAAC,oBAAoB;UAC1B,KAAK,CAAC,cAAc;UACpB,KAAK,CAAC,gBAAgB;UACtB,KAAK,CAAC,4BAA4B;UAClC,KAAK,CAAC,yBAAyB,CAAC;IACpC,MAAM,YAAY,GAAG,KAAK,CAAC,iBAAiB,GAAG,KAAK,CAAC,oBAAoB,CAAC;IAC1E,MAAM,eAAe,GAAG,KAAK,CAAC,qBAAqB,CAAC,WAAW,CAAC;IAChE,MAAM,cAAc,GAAG,KAAK,CAAC,aAAa,CAAC,WAAW,CAAC;IACvD,MAAM,uBAAuB,GAC3B,eAAe,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,cAAc,GAAG,eAAe,CAAC,GAAG,eAAe,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEtG,OAAO;QACL,qBAAqB,EAAE,KAAK,CAAC,YAAY,GAAG,KAAK,CAAC,cAAc,EAAE,aAAa,CAAC;QAChF,mBAAmB,EAAE,KAAK,CAAC,KAAK,CAAC,gBAAgB,EAAE,aAAa,CAAC;QACjE,+BAA+B,EAAE,KAAK,CAAC,KAAK,CAAC,4BAA4B,EAAE,aAAa,CAAC;QACzF,2BAA2B,EAAE,KAAK,CAAC,YAAY,GAAG,KAAK,CAAC,cAAc,EAAE,aAAa,CAAC;QACtF,+BAA+B,EAAE,KAAK,CAAC,KAAK,CAAC,4BAA4B,EAAE,aAAa,CAAC;QACzF,gBAAgB,EAAE,KAAK,CAAC,YAAY,EAAE,KAAK,CAAC,qBAAqB,CAAC;QAClE,uBAAuB;KACxB,CAAC;AACJ,CAAC,CAAC"}
|
|
@@ -63,6 +63,14 @@ export type OpenClawBaselineSummary = {
|
|
|
63
63
|
automaticHelped: number;
|
|
64
64
|
automaticHarmed: number;
|
|
65
65
|
};
|
|
66
|
+
governance: {
|
|
67
|
+
harmfulOrMisfiredHints: number;
|
|
68
|
+
harmfulOrMisfiredRate: number;
|
|
69
|
+
metaDominantSelections: number;
|
|
70
|
+
metaDominantRate: number;
|
|
71
|
+
realDevAlignedSelections: number;
|
|
72
|
+
realDevAlignedRate: number;
|
|
73
|
+
};
|
|
66
74
|
benchmark: BenchmarkSummary;
|
|
67
75
|
trend?: OpenClawBaselineTrend;
|
|
68
76
|
modeComparison: {
|
|
@@ -2,6 +2,7 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
|
2
2
|
import { dirname, join, resolve } from "node:path";
|
|
3
3
|
import { loadConfig } from "../config/load-config.js";
|
|
4
4
|
import { resolveExperienceEnginePaths } from "../config/path-resolver.js";
|
|
5
|
+
import { deriveGovernanceSignals, isPotentialMisfire, parseInjectionScorecard } from "../experience-management/governance-observability.js";
|
|
5
6
|
import { openDatabase } from "../store/sqlite/db.js";
|
|
6
7
|
import { InjectionRepository } from "../store/sqlite/repositories/injection-repo.js";
|
|
7
8
|
import { buildBenchmarkSummary, buildModeBenchmarkSummary } from "./benchmark-summary.js";
|
|
@@ -145,6 +146,15 @@ export const collectOpenClawBaselineSummary = (db, config, options = {}) => {
|
|
|
145
146
|
automaticHarmed: injectionRepo.countAutomaticFeedbackByDeliveryMode("holdout", "mark_harmed")
|
|
146
147
|
})
|
|
147
148
|
};
|
|
149
|
+
const governanceRows = db.prepare(`SELECT scorecard_json, harm_observed, attribution_reason
|
|
150
|
+
FROM injection_events ${recordFilter}`).all();
|
|
151
|
+
const harmfulOrMisfiredHints = governanceRows.filter((row) => isPotentialMisfire({
|
|
152
|
+
harm_observed: row.harm_observed == null ? null : Boolean(row.harm_observed),
|
|
153
|
+
attribution_reason: row.attribution_reason ?? undefined
|
|
154
|
+
})).length;
|
|
155
|
+
const governanceSignals = governanceRows.map((row) => deriveGovernanceSignals(parseInjectionScorecard(row.scorecard_json)));
|
|
156
|
+
const metaDominantSelections = governanceSignals.filter((signal) => signal.metaDominant).length;
|
|
157
|
+
const realDevAlignedSelections = governanceSignals.filter((signal) => signal.realDevAligned).length;
|
|
148
158
|
const attributionReasons = [
|
|
149
159
|
"success_outcome",
|
|
150
160
|
"relevant_failure",
|
|
@@ -219,6 +229,14 @@ export const collectOpenClawBaselineSummary = (db, config, options = {}) => {
|
|
|
219
229
|
automaticHelped,
|
|
220
230
|
automaticHarmed
|
|
221
231
|
},
|
|
232
|
+
governance: {
|
|
233
|
+
harmfulOrMisfiredHints,
|
|
234
|
+
harmfulOrMisfiredRate: ratio(harmfulOrMisfiredHints, injectionTotal),
|
|
235
|
+
metaDominantSelections,
|
|
236
|
+
metaDominantRate: ratio(metaDominantSelections, injectionTotal),
|
|
237
|
+
realDevAlignedSelections,
|
|
238
|
+
realDevAlignedRate: ratio(realDevAlignedSelections, injectionTotal)
|
|
239
|
+
},
|
|
222
240
|
benchmark: buildBenchmarkSummary({
|
|
223
241
|
decisions: injectionTotal,
|
|
224
242
|
live: liveDecisions,
|
|
@@ -316,6 +334,15 @@ export const renderOpenClawBaselineMarkdown = (summary) => `# OpenClaw Baseline
|
|
|
316
334
|
- Automatic helped: ${summary.effectiveness.automaticHelped}
|
|
317
335
|
- Automatic harmed: ${summary.effectiveness.automaticHarmed}
|
|
318
336
|
|
|
337
|
+
## Governance
|
|
338
|
+
|
|
339
|
+
- Harmful or misfired hints: ${summary.governance.harmfulOrMisfiredHints}
|
|
340
|
+
- Harmful or misfired rate: ${summary.governance.harmfulOrMisfiredRate}
|
|
341
|
+
- Meta-dominant selections: ${summary.governance.metaDominantSelections}
|
|
342
|
+
- Meta-dominant rate: ${summary.governance.metaDominantRate}
|
|
343
|
+
- Real-dev-aligned selections: ${summary.governance.realDevAlignedSelections}
|
|
344
|
+
- Real-dev-aligned rate: ${summary.governance.realDevAlignedRate}
|
|
345
|
+
|
|
319
346
|
## Benchmark Summary
|
|
320
347
|
|
|
321
348
|
- Delivery rate: ${summary.benchmark.deliveryRate}
|