npm - ultimate-pi - Versions diffs - 0.1.7 → 0.2.2 - Mend

ultimate-pi 0.1.7 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (524) hide show

package/.pi/harness/router/apply-router-proposal.mjs ADDED Viewed

@@ -0,0 +1,153 @@
+#!/usr/bin/env node
+import crypto from "node:crypto";
+import fs from "node:fs";
+import path from "node:path";
+const ROUTER_PATH = ".pi/model-router.json";
+const BACKUP_DIR = ".pi/harness/router/backups";
+function fail(message) {
+	process.stderr.write(`Error: ${message}\n`);
+	process.exit(1);
+}
+function parseArgs(argv) {
+	const args = {};
+	for (let i = 0; i < argv.length; i++) {
+		const token = argv[i];
+		if (!token.startsWith("--")) continue;
+		const key = token.slice(2);
+		const value = argv[i + 1];
+		if (!value || value.startsWith("--")) {
+			args[key] = true;
+			continue;
+		}
+		args[key] = value;
+		i++;
+	}
+	return args;
+}
+function readJson(filePath, label) {
+	if (!fs.existsSync(filePath)) fail(`${label} not found: ${filePath}`);
+	try {
+		return JSON.parse(fs.readFileSync(filePath, "utf8"));
+	} catch (error) {
+		fail(`${label} is not valid JSON (${filePath}): ${error.message}`);
+	}
+}
+function sha256FromJson(value) {
+	const canonical = `${JSON.stringify(value, null, 2)}\n`;
+	return crypto.createHash("sha256").update(canonical).digest("hex");
+}
+function validateProposal(proposal) {
+	if (proposal.status !== "proposed") {
+		fail(`proposal status must be 'proposed', got '${proposal.status}'`);
+	}
+	if (proposal.router_path !== ROUTER_PATH) {
+		fail(`proposal router_path must be '${ROUTER_PATH}'`);
+	}
+	const evidence = proposal.evidence ?? {};
+	if (
+		!Number.isInteger(evidence.sample_count) ||
+		!Number.isInteger(evidence.min_sample_count)
+	) {
+		fail("proposal evidence sample counts are invalid");
+	}
+	if (evidence.sample_count < evidence.min_sample_count) {
+		fail("proposal evidence does not meet minimum sample threshold");
+	}
+	if (evidence.regression_guard_passed !== true) {
+		fail("proposal regression guard is not passing");
+	}
+	if (!proposal.candidate_router || typeof proposal.candidate_router !== "object") {
+		fail("proposal missing candidate_router object");
+	}
+}
+const args = parseArgs(process.argv.slice(2));
+if (args.help || args.h) {
+	process.stdout.write(
+		[
+			"Usage:",
+			"  node .pi/harness/router/apply-router-proposal.mjs \\",
+			"    --proposal <proposal.json> \\",
+			"    --approve-by <human> \\",
+			"    --justification <reason> \\",
+			"    --write",
+			"",
+			"Behavior:",
+			"  - validates proposal status and evidence",
+			"  - verifies base router hash matches current router file",
+			"  - creates backup before atomic write",
+			"  - refuses write unless explicit --write is provided",
+		].join("\n"),
+	);
+	process.exit(0);
+}
+if (!args.proposal) fail("missing --proposal");
+if (!args["approve-by"]) fail("missing --approve-by");
+if (!args.justification) fail("missing --justification");
+if (!args.write) {
+	fail("missing --write (blind writes and implicit applies are disallowed)");
+}
+const proposalPath = path.resolve(args.proposal);
+const proposal = readJson(proposalPath, "proposal");
+const currentRouter = readJson(ROUTER_PATH, "current router");
+validateProposal(proposal);
+const currentHash = sha256FromJson(currentRouter);
+if (currentHash !== proposal.base_router_sha256) {
+	fail(
+		[
+			"base router hash mismatch; refusing apply.",
+			`current:  ${currentHash}`,
+			`proposal: ${proposal.base_router_sha256}`,
+		].join("\n"),
+	);
+}
+const candidateHash = sha256FromJson(proposal.candidate_router);
+if (candidateHash !== proposal.candidate_router_sha256) {
+	fail("proposal candidate_router hash mismatch; artifact may be tampered");
+}
+const now = new Date().toISOString();
+fs.mkdirSync(BACKUP_DIR, { recursive: true });
+const backupPath = path.join(
+	BACKUP_DIR,
+	`model-router.${now.replace(/[:.]/g, "-")}.json`,
+);
+fs.copyFileSync(ROUTER_PATH, backupPath);
+const routerTemp = `${ROUTER_PATH}.tmp`;
+fs.writeFileSync(routerTemp, `${JSON.stringify(proposal.candidate_router, null, 2)}\n`);
+fs.renameSync(routerTemp, ROUTER_PATH);
+proposal.status = "approved_applied";
+proposal.approval = {
+	required: true,
+	approved_by: args["approve-by"],
+	approved_at: now,
+	justification: args.justification,
+};
+proposal.applied_router_sha256 = candidateHash;
+proposal.backup_router_path = backupPath;
+proposal.applied_at = now;
+fs.writeFileSync(proposalPath, `${JSON.stringify(proposal, null, 2)}\n`);
+process.stdout.write(
+	[
+		"Router proposal applied safely.",
+		`proposal: ${proposalPath}`,
+		`backup: ${backupPath}`,
+		`router: ${ROUTER_PATH}`,
+	].join("\n") + "\n",
+);

package/.pi/harness/router/propose-router-tuning.mjs ADDED Viewed

@@ -0,0 +1,149 @@
+#!/usr/bin/env node
+import crypto from "node:crypto";
+import fs from "node:fs";
+import path from "node:path";
+const ROUTER_PATH = ".pi/model-router.json";
+function fail(message) {
+	process.stderr.write(`Error: ${message}\n`);
+	process.exit(1);
+}
+function parseArgs(argv) {
+	const args = {};
+	for (let i = 0; i < argv.length; i++) {
+		const token = argv[i];
+		if (!token.startsWith("--")) continue;
+		const key = token.slice(2);
+		const value = argv[i + 1];
+		if (!value || value.startsWith("--")) {
+			args[key] = true;
+			continue;
+		}
+		args[key] = value;
+		i++;
+	}
+	return args;
+}
+function readJson(filePath, label) {
+	if (!fs.existsSync(filePath)) {
+		fail(`${label} not found: ${filePath}`);
+	}
+	try {
+		return JSON.parse(fs.readFileSync(filePath, "utf8"));
+	} catch (error) {
+		fail(`${label} is not valid JSON (${filePath}): ${error.message}`);
+	}
+}
+function sha256FromJson(value) {
+	const canonical = `${JSON.stringify(value, null, 2)}\n`;
+	return crypto.createHash("sha256").update(canonical).digest("hex");
+}
+function ensureEvidence(evidence) {
+	const required = [
+		"sample_count",
+		"min_sample_count",
+		"success_rate_delta",
+		"cost_per_task_delta",
+		"regression_guard_passed",
+		"trace_refs",
+	];
+	for (const field of required) {
+		if (!(field in evidence)) fail(`evidence missing required field: ${field}`);
+	}
+	if (!Number.isInteger(evidence.sample_count) || evidence.sample_count < 1) {
+		fail("evidence.sample_count must be an integer >= 1");
+	}
+	if (
+		!Number.isInteger(evidence.min_sample_count) ||
+		evidence.min_sample_count < 1
+	) {
+		fail("evidence.min_sample_count must be an integer >= 1");
+	}
+	if (evidence.sample_count < evidence.min_sample_count) {
+		fail(
+			`insufficient sample_count (${evidence.sample_count} < ${evidence.min_sample_count})`,
+		);
+	}
+	if (typeof evidence.success_rate_delta !== "number") {
+		fail("evidence.success_rate_delta must be numeric");
+	}
+	if (typeof evidence.cost_per_task_delta !== "number") {
+		fail("evidence.cost_per_task_delta must be numeric");
+	}
+	if (evidence.regression_guard_passed !== true) {
+		fail("evidence.regression_guard_passed must be true");
+	}
+	if (!Array.isArray(evidence.trace_refs) || evidence.trace_refs.length === 0) {
+		fail("evidence.trace_refs must be a non-empty array");
+	}
+}
+const args = parseArgs(process.argv.slice(2));
+if (args.help || args.h) {
+	process.stdout.write(
+		[
+			"Usage:",
+			"  node .pi/harness/router/propose-router-tuning.mjs \\",
+			"    --evidence <evidence.json> \\",
+			"    --candidate <candidate-router.json> \\",
+			"    --proposal-out <proposal.json>",
+			"",
+			"Behavior:",
+			"  - validates evidence thresholds",
+			"  - captures base/candidate router hashes",
+			"  - emits proposal artifact without changing .pi/model-router.json",
+		].join("\n"),
+	);
+	process.exit(0);
+}
+if (!args.evidence) fail("missing --evidence");
+if (!args.candidate) fail("missing --candidate");
+if (!args["proposal-out"]) fail("missing --proposal-out");
+const baseRouter = readJson(ROUTER_PATH, "base router");
+const candidateRouter = readJson(args.candidate, "candidate router");
+const evidence = readJson(args.evidence, "evidence");
+ensureEvidence(evidence);
+const now = new Date().toISOString();
+const proposalId = `router-tune-${now.replace(/[:.]/g, "-")}`;
+const proposal = {
+	schema_version: "1.0.0",
+	proposal_id: proposalId,
+	created_at: now,
+	router_path: ROUTER_PATH,
+	base_router_sha256: sha256FromJson(baseRouter),
+	candidate_router_sha256: sha256FromJson(candidateRouter),
+	evidence,
+	status: "proposed",
+	approval: {
+		required: true,
+		approved_by: null,
+		approved_at: null,
+		justification: null,
+	},
+	candidate_router: candidateRouter,
+};
+const outputPath = path.resolve(args["proposal-out"]);
+fs.mkdirSync(path.dirname(outputPath), { recursive: true });
+fs.writeFileSync(outputPath, `${JSON.stringify(proposal, null, 2)}\n`);
+process.stdout.write(
+	[
+		"Router tuning proposal created.",
+		`proposal_id: ${proposal.proposal_id}`,
+		`output: ${outputPath}`,
+		"status: proposed (no router write performed)",
+	].join("\n") + "\n",
+);

package/.pi/harness/specs/README.md ADDED Viewed

@@ -0,0 +1,37 @@
+# Harness Artifact Contracts
+This directory is the canonical contract surface for Phase 1 harness artifacts.
+## Versioning
+- Contract family version: `1.0.0`
+- Each schema includes a `contract_version` field and `schema_version` metadata.
+- Backward-compatible additions: add optional fields only.
+- Breaking changes: bump major version and add migration notes before use.
+## Scope
+These schemas define the minimum machine-readable contracts for:
+- planning (`PlanPacket`)
+- execution telemetry (`RunTrace`)
+- independent evaluation (`EvalVerdict`)
+- adversarial findings (`AdversaryReport`)
+- incidents and overrides (`IncidentRecord`)
+- debate rounds (`RoundResult`)
+- policy consensus (`ConsensusPacket`)
+- budget hard-stop events (`BudgetExhausted`)
+- router tuning proposals (`RouterTuningProposal`)
+## Governance Defaults Locked In
+- Debate profile is `aggressive` (`max_rounds=6`, `round_token_cap=2500`, `debate_global_cap=35000`)
+- Consensus confidence weights are fixed at:
+  - `claim_quality=0.20`
+  - `reproducibility=0.40`
+  - `agreement=0.40`
+- Severity policy gate thresholds are fixed at:
+  - `security>=0.70` or `correctness>=0.70` blocks
+  - `architecture>=0.80` or `test_integrity>=0.80` blocks
+- Strict pre-PR gate prerequisites are explicit and must all pass.
+- Policy override allows one human approver only, with mandatory justification.

package/.pi/harness/specs/adversary-report.schema.json ADDED Viewed

@@ -0,0 +1,53 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://ultimate-pi.local/.pi/harness/specs/adversary-report.schema.json",
+	"title": "AdversaryReport",
+	"description": "Adversarial reviewer findings with merge-blocking authority signals.",
+	"type": "object",
+	"additionalProperties": false,
+	"required": [
+		"schema_version",
+		"contract_version",
+		"run_id",
+		"severity",
+		"findings",
+		"repro_steps",
+		"block_merge"
+	],
+	"properties": {
+		"schema_version": {
+			"type": "string",
+			"const": "1.0.0"
+		},
+		"contract_version": {
+			"type": "string",
+			"const": "1.0.0"
+		},
+		"run_id": {
+			"type": "string",
+			"minLength": 1
+		},
+		"severity": {
+			"type": "string",
+			"enum": ["low", "medium", "high", "critical"]
+		},
+		"findings": {
+			"type": "array",
+			"minItems": 1,
+			"items": {
+				"type": "string",
+				"minLength": 1
+			}
+		},
+		"repro_steps": {
+			"type": "array",
+			"items": {
+				"type": "string",
+				"minLength": 1
+			}
+		},
+		"block_merge": {
+			"type": "boolean"
+		}
+	}
+}

package/.pi/harness/specs/budget-exhausted-event.schema.json ADDED Viewed

@@ -0,0 +1,93 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://ultimate-pi.local/.pi/harness/specs/budget-exhausted-event.schema.json",
+	"title": "BudgetExhausted",
+	"description": "Structured hard-stop event emitted when debate budget is exhausted.",
+	"type": "object",
+	"additionalProperties": false,
+	"required": [
+		"schema_version",
+		"contract_version",
+		"event_type",
+		"run_id",
+		"debate_id",
+		"round_count",
+		"budget_used",
+		"exhaustion_reason",
+		"caps",
+		"minimum_evidence_confidence",
+		"default_policy_outcome",
+		"human_override_allowed"
+	],
+	"properties": {
+		"schema_version": {
+			"type": "string",
+			"const": "1.0.0"
+		},
+		"contract_version": {
+			"type": "string",
+			"const": "1.0.0"
+		},
+		"event_type": {
+			"type": "string",
+			"const": "budget_exhausted"
+		},
+		"run_id": {
+			"type": "string",
+			"minLength": 1
+		},
+		"debate_id": {
+			"type": "string",
+			"minLength": 1
+		},
+		"round_count": {
+			"type": "integer",
+			"minimum": 1,
+			"maximum": 6
+		},
+		"budget_used": {
+			"type": "integer",
+			"minimum": 0
+		},
+		"exhaustion_reason": {
+			"type": "string",
+			"enum": [
+				"max_rounds_reached",
+				"round_token_cap_exceeded",
+				"debate_global_cap_exceeded"
+			]
+		},
+		"caps": {
+			"type": "object",
+			"additionalProperties": false,
+			"required": ["max_rounds", "round_token_cap", "debate_global_cap"],
+			"properties": {
+				"max_rounds": {
+					"type": "integer",
+					"const": 6
+				},
+				"round_token_cap": {
+					"type": "integer",
+					"const": 2500
+				},
+				"debate_global_cap": {
+					"type": "integer",
+					"const": 35000
+				}
+			}
+		},
+		"minimum_evidence_confidence": {
+			"type": "number",
+			"minimum": 0,
+			"maximum": 1
+		},
+		"default_policy_outcome": {
+			"type": "string",
+			"enum": ["block", "human_required"]
+		},
+		"human_override_allowed": {
+			"type": "boolean",
+			"const": true
+		}
+	}
+}

package/.pi/harness/specs/consensus-packet.schema.json ADDED Viewed

@@ -0,0 +1,175 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://ultimate-pi.local/.pi/harness/specs/consensus-packet.schema.json",
+	"title": "ConsensusPacket",
+	"description": "Final debate consensus consumed by severity-policy-engine and strict pre-PR gate.",
+	"type": "object",
+	"additionalProperties": false,
+	"required": [
+		"schema_version",
+		"contract_version",
+		"run_id",
+		"debate_id",
+		"round_count",
+		"budget_used",
+		"severity_scores",
+		"confidence_weights",
+		"evidence_refs",
+		"strict_gate_prerequisites",
+		"policy_decision",
+		"rationale"
+	],
+	"properties": {
+		"schema_version": {
+			"type": "string",
+			"const": "1.0.0"
+		},
+		"contract_version": {
+			"type": "string",
+			"const": "1.0.0"
+		},
+		"run_id": {
+			"type": "string",
+			"minLength": 1
+		},
+		"debate_id": {
+			"type": "string",
+			"minLength": 1
+		},
+		"round_count": {
+			"type": "integer",
+			"minimum": 1,
+			"maximum": 6
+		},
+		"budget_used": {
+			"type": "integer",
+			"minimum": 0,
+			"maximum": 35000
+		},
+		"severity_scores": {
+			"type": "object",
+			"additionalProperties": false,
+			"required": ["correctness", "security", "architecture", "test_integrity"],
+			"properties": {
+				"correctness": {
+					"type": "number",
+					"minimum": 0,
+					"maximum": 1
+				},
+				"security": {
+					"type": "number",
+					"minimum": 0,
+					"maximum": 1
+				},
+				"architecture": {
+					"type": "number",
+					"minimum": 0,
+					"maximum": 1
+				},
+				"test_integrity": {
+					"type": "number",
+					"minimum": 0,
+					"maximum": 1
+				}
+			}
+		},
+		"severity_thresholds": {
+			"type": "object",
+			"additionalProperties": false,
+			"required": [
+				"correctness_block_at",
+				"security_block_at",
+				"architecture_block_at",
+				"test_integrity_block_at"
+			],
+			"properties": {
+				"correctness_block_at": {
+					"type": "number",
+					"const": 0.7
+				},
+				"security_block_at": {
+					"type": "number",
+					"const": 0.7
+				},
+				"architecture_block_at": {
+					"type": "number",
+					"const": 0.8
+				},
+				"test_integrity_block_at": {
+					"type": "number",
+					"const": 0.8
+				}
+			}
+		},
+		"confidence_weights": {
+			"type": "object",
+			"additionalProperties": false,
+			"required": ["claim_quality", "reproducibility", "agreement"],
+			"properties": {
+				"claim_quality": {
+					"type": "number",
+					"const": 0.2
+				},
+				"reproducibility": {
+					"type": "number",
+					"const": 0.4
+				},
+				"agreement": {
+					"type": "number",
+					"const": 0.4
+				}
+			}
+		},
+		"evidence_refs": {
+			"type": "array",
+			"items": {
+				"type": "string",
+				"minLength": 1
+			}
+		},
+		"strict_gate_prerequisites": {
+			"type": "object",
+			"additionalProperties": false,
+			"required": [
+				"plan_gate_passed",
+				"execution_completed",
+				"evaluator_passed",
+				"adversarial_debate_completed",
+				"severity_policy_ok",
+				"benchmark_delta_checks_passed",
+				"rollback_artifacts_generated"
+			],
+			"properties": {
+				"plan_gate_passed": {
+					"type": "boolean"
+				},
+				"execution_completed": {
+					"type": "boolean"
+				},
+				"evaluator_passed": {
+					"type": "boolean"
+				},
+				"adversarial_debate_completed": {
+					"type": "boolean"
+				},
+				"severity_policy_ok": {
+					"type": "boolean"
+				},
+				"benchmark_delta_checks_passed": {
+					"type": "boolean"
+				},
+				"rollback_artifacts_generated": {
+					"type": "boolean"
+				}
+			}
+		},
+		"policy_decision": {
+			"type": "string",
+			"enum": ["pass", "conditional_pass", "block", "human_required"]
+		},
+		"rationale": {
+			"type": "string",
+			"minLength": 1
+		}
+	}
+}