@nomos-arc/arc 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +10 -0
- package/.nomos-config.json +5 -0
- package/CLAUDE.md +108 -0
- package/LICENSE +190 -0
- package/README.md +569 -0
- package/dist/cli.js +21120 -0
- package/docs/auth/googel_plan.yaml +1093 -0
- package/docs/auth/google_task.md +235 -0
- package/docs/auth/hardened_blueprint.yaml +1658 -0
- package/docs/auth/red_team_report.yaml +336 -0
- package/docs/auth/session_state.yaml +162 -0
- package/docs/certificate/cer_enhance_plan.md +605 -0
- package/docs/certificate/certificate_report.md +338 -0
- package/docs/dev_overview.md +419 -0
- package/docs/feature_assessment.md +156 -0
- package/docs/how_it_works.md +78 -0
- package/docs/infrastructure/map.md +867 -0
- package/docs/init/master_plan.md +3581 -0
- package/docs/init/red_team_report.md +215 -0
- package/docs/init/report_phase_1a.md +304 -0
- package/docs/integrity-gate/enhance_drift.md +703 -0
- package/docs/integrity-gate/overview.md +108 -0
- package/docs/management/manger-task.md +99 -0
- package/docs/management/scafffold.md +76 -0
- package/docs/map/ATOMIC_BLUEPRINT.md +1349 -0
- package/docs/map/RED_TEAM_REPORT.md +159 -0
- package/docs/map/map_task.md +147 -0
- package/docs/map/semantic_graph_task.md +792 -0
- package/docs/map/semantic_master_plan.md +705 -0
- package/docs/phase7/TEAM_RED.md +249 -0
- package/docs/phase7/plan.md +1682 -0
- package/docs/phase7/task.md +275 -0
- package/docs/prompts/USAGE.md +312 -0
- package/docs/prompts/architect.md +165 -0
- package/docs/prompts/executer.md +190 -0
- package/docs/prompts/hardener.md +190 -0
- package/docs/prompts/red_team.md +146 -0
- package/docs/verification/goveranance-overview.md +396 -0
- package/docs/verification/governance-overview.md +245 -0
- package/docs/verification/verification-arc-ar.md +560 -0
- package/docs/verification/verification-architecture.md +560 -0
- package/docs/very_next.md +52 -0
- package/docs/whitepaper.md +89 -0
- package/overview.md +1469 -0
- package/package.json +63 -0
- package/src/adapters/__tests__/git.test.ts +296 -0
- package/src/adapters/__tests__/stdio.test.ts +70 -0
- package/src/adapters/git.ts +226 -0
- package/src/adapters/pty.ts +159 -0
- package/src/adapters/stdio.ts +113 -0
- package/src/cli.ts +83 -0
- package/src/commands/apply.ts +47 -0
- package/src/commands/auth.ts +301 -0
- package/src/commands/certificate.ts +89 -0
- package/src/commands/discard.ts +24 -0
- package/src/commands/drift.ts +116 -0
- package/src/commands/index.ts +78 -0
- package/src/commands/init.ts +121 -0
- package/src/commands/list.ts +75 -0
- package/src/commands/map.ts +55 -0
- package/src/commands/plan.ts +30 -0
- package/src/commands/review.ts +58 -0
- package/src/commands/run.ts +63 -0
- package/src/commands/search.ts +147 -0
- package/src/commands/show.ts +63 -0
- package/src/commands/status.ts +59 -0
- package/src/core/__tests__/budget.test.ts +213 -0
- package/src/core/__tests__/certificate.test.ts +385 -0
- package/src/core/__tests__/config.test.ts +191 -0
- package/src/core/__tests__/preflight.test.ts +24 -0
- package/src/core/__tests__/prompt.test.ts +358 -0
- package/src/core/__tests__/review.test.ts +161 -0
- package/src/core/__tests__/state.test.ts +362 -0
- package/src/core/auth/__tests__/manager.test.ts +166 -0
- package/src/core/auth/__tests__/server.test.ts +220 -0
- package/src/core/auth/gcp-projects.ts +160 -0
- package/src/core/auth/manager.ts +114 -0
- package/src/core/auth/server.ts +141 -0
- package/src/core/budget.ts +119 -0
- package/src/core/certificate.ts +502 -0
- package/src/core/config.ts +212 -0
- package/src/core/errors.ts +54 -0
- package/src/core/factory.ts +49 -0
- package/src/core/graph/__tests__/builder.test.ts +272 -0
- package/src/core/graph/__tests__/contract-writer.test.ts +175 -0
- package/src/core/graph/__tests__/enricher.test.ts +299 -0
- package/src/core/graph/__tests__/parser.test.ts +200 -0
- package/src/core/graph/__tests__/pipeline.test.ts +202 -0
- package/src/core/graph/__tests__/renderer.test.ts +128 -0
- package/src/core/graph/__tests__/resolver.test.ts +185 -0
- package/src/core/graph/__tests__/scanner.test.ts +231 -0
- package/src/core/graph/__tests__/show.test.ts +134 -0
- package/src/core/graph/builder.ts +303 -0
- package/src/core/graph/constraints.ts +94 -0
- package/src/core/graph/contract-writer.ts +93 -0
- package/src/core/graph/drift/__tests__/classifier.test.ts +215 -0
- package/src/core/graph/drift/__tests__/comparator.test.ts +335 -0
- package/src/core/graph/drift/__tests__/drift.test.ts +453 -0
- package/src/core/graph/drift/__tests__/reporter.test.ts +203 -0
- package/src/core/graph/drift/classifier.ts +165 -0
- package/src/core/graph/drift/comparator.ts +205 -0
- package/src/core/graph/drift/reporter.ts +77 -0
- package/src/core/graph/enricher.ts +251 -0
- package/src/core/graph/grammar-paths.ts +30 -0
- package/src/core/graph/html-template.ts +493 -0
- package/src/core/graph/map-schema.ts +137 -0
- package/src/core/graph/parser.ts +336 -0
- package/src/core/graph/pipeline.ts +209 -0
- package/src/core/graph/renderer.ts +92 -0
- package/src/core/graph/resolver.ts +195 -0
- package/src/core/graph/scanner.ts +145 -0
- package/src/core/logger.ts +46 -0
- package/src/core/orchestrator.ts +792 -0
- package/src/core/plan-file-manager.ts +66 -0
- package/src/core/preflight.ts +64 -0
- package/src/core/prompt.ts +173 -0
- package/src/core/review.ts +95 -0
- package/src/core/state.ts +294 -0
- package/src/core/worktree-coordinator.ts +77 -0
- package/src/search/__tests__/chunk-extractor.test.ts +339 -0
- package/src/search/__tests__/embedder-auth.test.ts +124 -0
- package/src/search/__tests__/embedder.test.ts +267 -0
- package/src/search/__tests__/graph-enricher.test.ts +178 -0
- package/src/search/__tests__/indexer.test.ts +518 -0
- package/src/search/__tests__/integration.test.ts +649 -0
- package/src/search/__tests__/query-engine.test.ts +334 -0
- package/src/search/__tests__/similarity.test.ts +78 -0
- package/src/search/__tests__/vector-store.test.ts +281 -0
- package/src/search/chunk-extractor.ts +167 -0
- package/src/search/embedder.ts +209 -0
- package/src/search/graph-enricher.ts +95 -0
- package/src/search/indexer.ts +483 -0
- package/src/search/lexical-searcher.ts +190 -0
- package/src/search/query-engine.ts +225 -0
- package/src/search/vector-store.ts +311 -0
- package/src/types/index.ts +572 -0
- package/src/utils/__tests__/ansi.test.ts +54 -0
- package/src/utils/__tests__/frontmatter.test.ts +79 -0
- package/src/utils/__tests__/sanitize.test.ts +229 -0
- package/src/utils/ansi.ts +19 -0
- package/src/utils/context.ts +44 -0
- package/src/utils/frontmatter.ts +27 -0
- package/src/utils/sanitize.ts +78 -0
- package/test/e2e/lifecycle.test.ts +330 -0
- package/test/fixtures/mock-planner-hang.ts +5 -0
- package/test/fixtures/mock-planner.ts +26 -0
- package/test/fixtures/mock-reviewer-bad.ts +8 -0
- package/test/fixtures/mock-reviewer-retry.ts +34 -0
- package/test/fixtures/mock-reviewer.ts +18 -0
- package/test/fixtures/sample-project/src/circular-a.ts +6 -0
- package/test/fixtures/sample-project/src/circular-b.ts +6 -0
- package/test/fixtures/sample-project/src/config.ts +15 -0
- package/test/fixtures/sample-project/src/main.ts +19 -0
- package/test/fixtures/sample-project/src/services/product-service.ts +20 -0
- package/test/fixtures/sample-project/src/services/user-service.ts +18 -0
- package/test/fixtures/sample-project/src/types.ts +14 -0
- package/test/fixtures/sample-project/src/utils/index.ts +14 -0
- package/test/fixtures/sample-project/src/utils/validate.ts +12 -0
- package/tsconfig.json +20 -0
- package/vitest.config.ts +12 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import * as path from 'path';
|
|
2
|
+
import type { TaskState, NomosConfig } from '../types/index.js';
|
|
3
|
+
|
|
4
|
+
// RT2-4.3 fix: Token estimation returns separate input/output counts.
|
|
5
|
+
// The char/4 heuristic is rough but splitting it correctly applies
|
|
6
|
+
// the right pricing rate to each component.
|
|
7
|
+
export interface TokenEstimate {
|
|
8
|
+
input_tokens: number;
|
|
9
|
+
output_tokens: number;
|
|
10
|
+
total: number;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* ROUGH ESTIMATE: Divides character count by 4.
|
|
15
|
+
* RT2-4.3 fix: Returns SEPARATE input and output token estimates so that
|
|
16
|
+
* calculateCost() can apply the correct rate to each. The char/4 heuristic
|
|
17
|
+
* still underestimates for CJK text (~1 char per 1-2 tokens), but splitting
|
|
18
|
+
* input/output eliminates the ~3x pricing error from applying a flat rate.
|
|
19
|
+
* Returns tokens_source: 'estimated' — callers should label this in display output.
|
|
20
|
+
*/
|
|
21
|
+
export function estimateTokens(prompt: string, output: string): TokenEstimate {
|
|
22
|
+
const input_tokens = Math.ceil(prompt.length / 4);
|
|
23
|
+
const output_tokens = Math.ceil(output.length / 4);
|
|
24
|
+
return { input_tokens, output_tokens, total: input_tokens + output_tokens };
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Extracts the token count from the binary's output using the configured regex pattern.
|
|
29
|
+
* RT2-4.3 fix: Returns a TokenEstimate with separate input/output when the pattern
|
|
30
|
+
* captures two groups (e.g., "Input: (\d+).*Output: (\d+)"). Falls back to total-only
|
|
31
|
+
* (split 90/10 input/output as a heuristic) when only one group is captured.
|
|
32
|
+
* Returns null if the pattern is null or does not match.
|
|
33
|
+
*/
|
|
34
|
+
export function parseTokensFromOutput(
|
|
35
|
+
output: string,
|
|
36
|
+
usagePattern: string | null,
|
|
37
|
+
): TokenEstimate | null {
|
|
38
|
+
if (!usagePattern) return null;
|
|
39
|
+
const match = output.match(new RegExp(usagePattern));
|
|
40
|
+
if (!match?.[1]) return null;
|
|
41
|
+
|
|
42
|
+
if (match[2]) {
|
|
43
|
+
// Pattern captured both input and output groups
|
|
44
|
+
const input_tokens = parseInt(match[1], 10);
|
|
45
|
+
const output_tokens = parseInt(match[2], 10);
|
|
46
|
+
if (isNaN(input_tokens) || isNaN(output_tokens)) return null;
|
|
47
|
+
return { input_tokens, output_tokens, total: input_tokens + output_tokens };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Single group — total only. Apply 90/10 heuristic split for cost estimation.
|
|
51
|
+
const total = parseInt(match[1], 10);
|
|
52
|
+
if (isNaN(total)) return null;
|
|
53
|
+
const input_tokens = Math.round(total * 0.9);
|
|
54
|
+
const output_tokens = total - input_tokens;
|
|
55
|
+
return { input_tokens, output_tokens, total };
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* RT2-4.3 fix: Cost calculation with separate input/output rates.
|
|
60
|
+
* costMap keys use basename of the binary cmd (e.g., 'claude', 'codex').
|
|
61
|
+
* The previous version used the raw binaryCmd as the key — if cmd was an
|
|
62
|
+
* absolute path ('/usr/local/bin/claude') or 'npx', lookup returned undefined
|
|
63
|
+
* and cost tracking silently stopped.
|
|
64
|
+
*
|
|
65
|
+
* Rate structure: costMap values are objects with input/output rates per 1K tokens.
|
|
66
|
+
* For backward compatibility, a plain number is treated as the output rate
|
|
67
|
+
* with input rate at 1/5th (the typical Claude input/output ratio).
|
|
68
|
+
*/
|
|
69
|
+
export function calculateCost(
|
|
70
|
+
tokens: TokenEstimate,
|
|
71
|
+
binaryCmd: string,
|
|
72
|
+
costMap: Record<string, number | { input: number; output: number }>,
|
|
73
|
+
): number {
|
|
74
|
+
// RT2-4.3 fix: normalize binaryCmd to basename for reliable map lookup.
|
|
75
|
+
// '/usr/local/bin/claude' → 'claude', 'npx' → 'npx'
|
|
76
|
+
const key = path.basename(binaryCmd);
|
|
77
|
+
const rateEntry = costMap[key];
|
|
78
|
+
if (rateEntry === undefined) return 0;
|
|
79
|
+
|
|
80
|
+
let inputRate: number;
|
|
81
|
+
let outputRate: number;
|
|
82
|
+
if (typeof rateEntry === 'number') {
|
|
83
|
+
// Backward compat: plain number = output rate; input = 1/5th
|
|
84
|
+
outputRate = rateEntry;
|
|
85
|
+
inputRate = rateEntry / 5;
|
|
86
|
+
} else {
|
|
87
|
+
inputRate = rateEntry.input;
|
|
88
|
+
outputRate = rateEntry.output;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const inputCost = (tokens.input_tokens / 1000) * inputRate;
|
|
92
|
+
const outputCost = (tokens.output_tokens / 1000) * outputRate;
|
|
93
|
+
return Math.round((inputCost + outputCost) * 1_000_000) / 1_000_000;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export function checkBudget(
|
|
97
|
+
state: TaskState,
|
|
98
|
+
config: NomosConfig,
|
|
99
|
+
): { allowed: boolean; warning?: string; error?: string } {
|
|
100
|
+
const { tokens_used } = state.budget;
|
|
101
|
+
const { max_tokens_per_task, warn_at_percent } = config.budget;
|
|
102
|
+
if (tokens_used >= max_tokens_per_task) {
|
|
103
|
+
return {
|
|
104
|
+
allowed: false,
|
|
105
|
+
error: `Token budget exceeded for task "${state.task_id}". ` +
|
|
106
|
+
`Used: ${tokens_used} / ${max_tokens_per_task}. ` +
|
|
107
|
+
`Run: arc plan ${state.task_id} --extend-budget to increase limit.`,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
const warnThreshold = (warn_at_percent / 100) * max_tokens_per_task;
|
|
111
|
+
if (tokens_used >= warnThreshold) {
|
|
112
|
+
const pct = Math.round((tokens_used / max_tokens_per_task) * 100);
|
|
113
|
+
return {
|
|
114
|
+
allowed: true,
|
|
115
|
+
warning: `Task "${state.task_id}" at ${pct}% of token budget (${tokens_used} / ${max_tokens_per_task}).`,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
return { allowed: true };
|
|
119
|
+
}
|
|
@@ -0,0 +1,502 @@
|
|
|
1
|
+
import { createHash } from 'crypto';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import { NomosError } from './errors.js';
|
|
4
|
+
import type {
|
|
5
|
+
TaskState,
|
|
6
|
+
TaskStatus,
|
|
7
|
+
HistoryEntry,
|
|
8
|
+
ReviewIssue,
|
|
9
|
+
CertificatePayload,
|
|
10
|
+
CertificateIteration,
|
|
11
|
+
VerificationResult,
|
|
12
|
+
VerificationCheck,
|
|
13
|
+
} from '../types/index.js';
|
|
14
|
+
|
|
15
|
+
// ── Package version (used in certificate generator field) ────────────────────
|
|
16
|
+
|
|
17
|
+
// Read from package.json at build time isn't worth a dynamic import;
|
|
18
|
+
// keep in sync manually. Bump when releasing.
|
|
19
|
+
const GENERATOR_VERSION = 'nomos-arc@0.1.0';
|
|
20
|
+
|
|
21
|
+
// ── Eligible statuses for certificate generation ─────────────────────────────
|
|
22
|
+
|
|
23
|
+
const ELIGIBLE_STATUSES: TaskStatus[] = ['approved', 'merged'];
|
|
24
|
+
|
|
25
|
+
// ── Zod schema for certificate validation (--verify) ─────────────────────────
|
|
26
|
+
|
|
27
|
+
const ReviewIssueSchema = z.object({
|
|
28
|
+
severity: z.enum(['high', 'medium', 'low']),
|
|
29
|
+
category: z.enum(['security', 'performance', 'architecture', 'correctness', 'maintainability']),
|
|
30
|
+
description: z.string(),
|
|
31
|
+
suggestion: z.string(),
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
const IterationTokensSchema = z.object({
|
|
35
|
+
input: z.number().nonnegative(),
|
|
36
|
+
output: z.number().nonnegative(),
|
|
37
|
+
source: z.enum(['metered', 'estimated']),
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
const PlanningEntrySchema = z.object({
|
|
41
|
+
binary: z.string(),
|
|
42
|
+
mode: z.enum(['supervised', 'auto', 'dry-run']),
|
|
43
|
+
started_at: z.string().datetime(),
|
|
44
|
+
completed_at: z.string().datetime(),
|
|
45
|
+
output_hash: z.string(),
|
|
46
|
+
tokens: IterationTokensSchema,
|
|
47
|
+
rules_snapshot: z.array(z.string()),
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
const ReviewEntrySchema = z.object({
|
|
51
|
+
binary: z.string(),
|
|
52
|
+
mode: z.enum(['supervised', 'auto', 'dry-run']),
|
|
53
|
+
started_at: z.string().datetime(),
|
|
54
|
+
completed_at: z.string().datetime(),
|
|
55
|
+
output_hash: z.string(),
|
|
56
|
+
tokens: IterationTokensSchema,
|
|
57
|
+
score: z.number().min(0).max(1),
|
|
58
|
+
issues: z.array(ReviewIssueSchema),
|
|
59
|
+
summary: z.string(),
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
const CertificateIterationSchema = z.object({
|
|
63
|
+
version: z.number().int().nonnegative(),
|
|
64
|
+
planning: PlanningEntrySchema,
|
|
65
|
+
review: ReviewEntrySchema.nullable(),
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
const CertificatePayloadSchema = z.object({
|
|
69
|
+
certificate_version: z.literal(1),
|
|
70
|
+
generated_at: z.string().datetime(),
|
|
71
|
+
generator: z.string(),
|
|
72
|
+
task_id: z.string(),
|
|
73
|
+
task_status: z.enum([
|
|
74
|
+
'init', 'planning', 'pending_review', 'reviewing',
|
|
75
|
+
'refinement', 'approved', 'merged', 'discarded',
|
|
76
|
+
'failed', 'merge_conflict', 'stalled',
|
|
77
|
+
]),
|
|
78
|
+
created_at: z.string().datetime(),
|
|
79
|
+
completed_at: z.string().datetime(),
|
|
80
|
+
repository: z.object({
|
|
81
|
+
base_commit: z.string(),
|
|
82
|
+
shadow_branch: z.string(),
|
|
83
|
+
branch_status: z.enum(['active', 'merged', 'discarded']),
|
|
84
|
+
}),
|
|
85
|
+
models: z.object({
|
|
86
|
+
planner: z.string(),
|
|
87
|
+
reviewer: z.string(),
|
|
88
|
+
}),
|
|
89
|
+
rules: z.object({
|
|
90
|
+
files: z.array(z.string()),
|
|
91
|
+
rules_hash: z.string(),
|
|
92
|
+
}),
|
|
93
|
+
iterations: z.array(CertificateIterationSchema),
|
|
94
|
+
final_review: z.object({
|
|
95
|
+
score: z.number().min(0).max(1),
|
|
96
|
+
summary: z.string(),
|
|
97
|
+
issues: z.array(ReviewIssueSchema),
|
|
98
|
+
approval_reason: z.enum(['score_threshold', 'max_iterations_reached']),
|
|
99
|
+
}),
|
|
100
|
+
budget: z.object({
|
|
101
|
+
total_tokens: z.number().nonnegative(),
|
|
102
|
+
estimated_cost_usd: z.number().nonnegative(),
|
|
103
|
+
token_breakdown: z.object({
|
|
104
|
+
input_tokens: z.number().nonnegative(),
|
|
105
|
+
output_tokens: z.number().nonnegative(),
|
|
106
|
+
}),
|
|
107
|
+
}),
|
|
108
|
+
integrity: z.object({
|
|
109
|
+
chain_hash: z.string(),
|
|
110
|
+
entry_hashes: z.array(z.string()),
|
|
111
|
+
canonical_entries: z.array(z.string()),
|
|
112
|
+
chain_algorithm: z.literal('sha256-sequential'),
|
|
113
|
+
}),
|
|
114
|
+
certificate_hash: z.string(),
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
// ── Canonical entry serialization ────────────────────────────────────────────
|
|
118
|
+
// Deterministic JSON with fixed key order. Excludes raw_output (large/sensitive)
|
|
119
|
+
// but includes output_hash which binds it cryptographically.
|
|
120
|
+
|
|
121
|
+
function canonicalEntry(entry: HistoryEntry): string {
|
|
122
|
+
return JSON.stringify({
|
|
123
|
+
version: entry.version,
|
|
124
|
+
step: entry.step,
|
|
125
|
+
mode: entry.mode,
|
|
126
|
+
binary: entry.binary,
|
|
127
|
+
started_at: entry.started_at,
|
|
128
|
+
completed_at: entry.completed_at,
|
|
129
|
+
output_hash: entry.output_hash,
|
|
130
|
+
input_tokens: entry.input_tokens,
|
|
131
|
+
output_tokens: entry.output_tokens,
|
|
132
|
+
rules_snapshot: entry.rules_snapshot,
|
|
133
|
+
review_score: entry.review?.score ?? null,
|
|
134
|
+
review_summary: entry.review?.summary ?? null,
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function sha256(data: string): string {
|
|
139
|
+
return createHash('sha256').update(data).digest('hex');
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// ── CertificateEngine ───────────────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
export class CertificateEngine {
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Compute the sequential Merkle-like chain hash over history entries.
|
|
148
|
+
* H_0 = SHA-256(canonical(entry_0))
|
|
149
|
+
* H_n = SHA-256(H_{n-1} || canonical(entry_n))
|
|
150
|
+
*/
|
|
151
|
+
computeChainHash(history: HistoryEntry[]): { chainHash: string; canonicalEntries: string[] } {
|
|
152
|
+
if (history.length === 0) {
|
|
153
|
+
const empty = sha256('');
|
|
154
|
+
return { chainHash: `sha256:${empty}`, canonicalEntries: [] };
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const canonicalEntries: string[] = [];
|
|
158
|
+
let currentHash = '';
|
|
159
|
+
|
|
160
|
+
for (let i = 0; i < history.length; i++) {
|
|
161
|
+
const canonical = canonicalEntry(history[i]!);
|
|
162
|
+
canonicalEntries.push(canonical);
|
|
163
|
+
|
|
164
|
+
if (i === 0) {
|
|
165
|
+
currentHash = sha256(canonical);
|
|
166
|
+
} else {
|
|
167
|
+
currentHash = sha256(currentHash + canonical);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return { chainHash: `sha256:${currentHash}`, canonicalEntries };
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Re-compute chain hash from stored canonical entries (used during verification).
|
|
176
|
+
*/
|
|
177
|
+
recomputeChainFromCanonical(canonicalEntries: string[]): string {
|
|
178
|
+
if (canonicalEntries.length === 0) {
|
|
179
|
+
return `sha256:${sha256('')}`;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
let currentHash = '';
|
|
183
|
+
for (let i = 0; i < canonicalEntries.length; i++) {
|
|
184
|
+
if (i === 0) {
|
|
185
|
+
currentHash = sha256(canonicalEntries[i]!);
|
|
186
|
+
} else {
|
|
187
|
+
currentHash = sha256(currentHash + canonicalEntries[i]!);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
return `sha256:${currentHash}`;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Generate a Certificate of AI Engineering Integrity from a completed task state.
|
|
196
|
+
*/
|
|
197
|
+
generate(state: TaskState): CertificatePayload {
|
|
198
|
+
// Precondition: task must be approved or merged
|
|
199
|
+
if (!ELIGIBLE_STATUSES.includes(state.meta.status)) {
|
|
200
|
+
throw new NomosError(
|
|
201
|
+
'certificate_not_eligible',
|
|
202
|
+
`Cannot generate certificate for task "${state.task_id}" in status "${state.meta.status}". ` +
|
|
203
|
+
`Task must be in one of: [${ELIGIBLE_STATUSES.join(', ')}]`,
|
|
204
|
+
);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Precondition: must have at least one plan + one review
|
|
208
|
+
const planEntries = state.history.filter(e => e.step === 'planning');
|
|
209
|
+
const reviewEntries = state.history.filter(e => e.step === 'reviewing');
|
|
210
|
+
if (planEntries.length === 0 || reviewEntries.length === 0) {
|
|
211
|
+
throw new NomosError(
|
|
212
|
+
'certificate_not_eligible',
|
|
213
|
+
`Cannot generate certificate for task "${state.task_id}": ` +
|
|
214
|
+
`requires at least one planning and one reviewing entry in history`,
|
|
215
|
+
);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Group history entries into iterations by version
|
|
219
|
+
const iterations = this.groupIterations(state.history);
|
|
220
|
+
|
|
221
|
+
// Extract final review
|
|
222
|
+
const lastReviewEntry = reviewEntries[reviewEntries.length - 1]!;
|
|
223
|
+
if (!lastReviewEntry.review) {
|
|
224
|
+
throw new NomosError(
|
|
225
|
+
'certificate_not_eligible',
|
|
226
|
+
`Cannot generate certificate for task "${state.task_id}": ` +
|
|
227
|
+
`last review entry has no review result`,
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Compute chain hash
|
|
232
|
+
const { chainHash, canonicalEntries } = this.computeChainHash(state.history);
|
|
233
|
+
const entryHashes = state.history.map(e => e.output_hash);
|
|
234
|
+
|
|
235
|
+
// Sum token breakdown
|
|
236
|
+
let totalInputTokens = 0;
|
|
237
|
+
let totalOutputTokens = 0;
|
|
238
|
+
for (const entry of state.history) {
|
|
239
|
+
totalInputTokens += entry.input_tokens;
|
|
240
|
+
totalOutputTokens += entry.output_tokens;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Completed at = last history entry's completed_at
|
|
244
|
+
const completedAt = state.history[state.history.length - 1]!.completed_at;
|
|
245
|
+
|
|
246
|
+
// Build payload (without certificate_hash)
|
|
247
|
+
const payload: Omit<CertificatePayload, 'certificate_hash'> = {
|
|
248
|
+
certificate_version: 1,
|
|
249
|
+
generated_at: new Date().toISOString(),
|
|
250
|
+
generator: GENERATOR_VERSION,
|
|
251
|
+
|
|
252
|
+
task_id: state.task_id,
|
|
253
|
+
task_status: state.meta.status,
|
|
254
|
+
created_at: state.meta.created_at,
|
|
255
|
+
completed_at: completedAt,
|
|
256
|
+
|
|
257
|
+
repository: {
|
|
258
|
+
base_commit: state.shadow_branch.base_commit,
|
|
259
|
+
shadow_branch: state.shadow_branch.branch,
|
|
260
|
+
branch_status: state.shadow_branch.status,
|
|
261
|
+
},
|
|
262
|
+
|
|
263
|
+
models: {
|
|
264
|
+
planner: state.orchestration.planner_bin,
|
|
265
|
+
reviewer: state.orchestration.reviewer_bin,
|
|
266
|
+
},
|
|
267
|
+
|
|
268
|
+
rules: {
|
|
269
|
+
files: state.context.rules,
|
|
270
|
+
rules_hash: state.context.rules_hash,
|
|
271
|
+
},
|
|
272
|
+
|
|
273
|
+
iterations,
|
|
274
|
+
|
|
275
|
+
final_review: {
|
|
276
|
+
score: lastReviewEntry.review.score,
|
|
277
|
+
summary: lastReviewEntry.review.summary,
|
|
278
|
+
issues: lastReviewEntry.review.issues,
|
|
279
|
+
approval_reason: state.meta.approval_reason ?? 'score_threshold',
|
|
280
|
+
},
|
|
281
|
+
|
|
282
|
+
budget: {
|
|
283
|
+
total_tokens: state.budget.tokens_used,
|
|
284
|
+
estimated_cost_usd: state.budget.estimated_cost_usd,
|
|
285
|
+
token_breakdown: {
|
|
286
|
+
input_tokens: totalInputTokens,
|
|
287
|
+
output_tokens: totalOutputTokens,
|
|
288
|
+
},
|
|
289
|
+
},
|
|
290
|
+
|
|
291
|
+
integrity: {
|
|
292
|
+
chain_hash: chainHash,
|
|
293
|
+
entry_hashes: entryHashes,
|
|
294
|
+
canonical_entries: canonicalEntries,
|
|
295
|
+
chain_algorithm: 'sha256-sequential',
|
|
296
|
+
},
|
|
297
|
+
};
|
|
298
|
+
|
|
299
|
+
// Self-seal: compute certificate_hash over the payload
|
|
300
|
+
const payloadJson = JSON.stringify(payload, null, 2);
|
|
301
|
+
const certificateHash = `sha256:${sha256(payloadJson)}`;
|
|
302
|
+
|
|
303
|
+
return { ...payload, certificate_hash: certificateHash };
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* Verify a certificate's integrity. Returns detailed check results.
|
|
308
|
+
*/
|
|
309
|
+
verify(certificate: CertificatePayload): VerificationResult {
|
|
310
|
+
const checks: VerificationCheck[] = [];
|
|
311
|
+
|
|
312
|
+
// Check 1: Status validity
|
|
313
|
+
checks.push(this.checkStatusValidity(certificate));
|
|
314
|
+
|
|
315
|
+
// Check 2: Review completeness
|
|
316
|
+
checks.push(this.checkReviewCompleteness(certificate));
|
|
317
|
+
|
|
318
|
+
// Check 3: Certificate self-hash
|
|
319
|
+
checks.push(this.checkCertificateHash(certificate));
|
|
320
|
+
|
|
321
|
+
// Check 4: Chain hash
|
|
322
|
+
checks.push(this.checkChainHash(certificate));
|
|
323
|
+
|
|
324
|
+
// Check 5: Entry hash consistency
|
|
325
|
+
checks.push(this.checkEntryHashConsistency(certificate));
|
|
326
|
+
|
|
327
|
+
return {
|
|
328
|
+
valid: checks.every(c => c.passed),
|
|
329
|
+
checks,
|
|
330
|
+
};
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
/**
|
|
334
|
+
* Parse and validate a certificate JSON string. Throws on invalid structure.
|
|
335
|
+
*/
|
|
336
|
+
parse(json: string): CertificatePayload {
|
|
337
|
+
let raw: unknown;
|
|
338
|
+
try {
|
|
339
|
+
raw = JSON.parse(json);
|
|
340
|
+
} catch {
|
|
341
|
+
throw new NomosError('certificate_invalid', 'Certificate file is not valid JSON');
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
const result = CertificatePayloadSchema.safeParse(raw);
|
|
345
|
+
if (!result.success) {
|
|
346
|
+
const issues = result.error.issues.map(i => `${i.path.join('.')}: ${i.message}`).join('; ');
|
|
347
|
+
throw new NomosError('certificate_invalid', `Certificate validation failed: ${issues}`);
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
return result.data as CertificatePayload;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// ── Private helpers ──────────────────────────────────────────────────────
|
|
354
|
+
|
|
355
|
+
private groupIterations(history: HistoryEntry[]): CertificateIteration[] {
|
|
356
|
+
const versionMap = new Map<number, { planning?: HistoryEntry; review?: HistoryEntry }>();
|
|
357
|
+
|
|
358
|
+
for (const entry of history) {
|
|
359
|
+
if (!versionMap.has(entry.version)) {
|
|
360
|
+
versionMap.set(entry.version, {});
|
|
361
|
+
}
|
|
362
|
+
const group = versionMap.get(entry.version)!;
|
|
363
|
+
if (entry.step === 'planning') {
|
|
364
|
+
group.planning = entry;
|
|
365
|
+
} else {
|
|
366
|
+
group.review = entry;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
const iterations: CertificateIteration[] = [];
|
|
371
|
+
const sortedVersions = [...versionMap.keys()].sort((a, b) => a - b);
|
|
372
|
+
|
|
373
|
+
for (const version of sortedVersions) {
|
|
374
|
+
const group = versionMap.get(version)!;
|
|
375
|
+
if (!group.planning) continue; // skip orphan review entries
|
|
376
|
+
|
|
377
|
+
iterations.push({
|
|
378
|
+
version,
|
|
379
|
+
planning: {
|
|
380
|
+
binary: group.planning.binary,
|
|
381
|
+
mode: group.planning.mode,
|
|
382
|
+
started_at: group.planning.started_at,
|
|
383
|
+
completed_at: group.planning.completed_at,
|
|
384
|
+
output_hash: group.planning.output_hash,
|
|
385
|
+
tokens: {
|
|
386
|
+
input: group.planning.input_tokens,
|
|
387
|
+
output: group.planning.output_tokens,
|
|
388
|
+
source: group.planning.tokens_source,
|
|
389
|
+
},
|
|
390
|
+
rules_snapshot: group.planning.rules_snapshot,
|
|
391
|
+
},
|
|
392
|
+
review: group.review ? {
|
|
393
|
+
binary: group.review.binary,
|
|
394
|
+
mode: group.review.mode,
|
|
395
|
+
started_at: group.review.started_at,
|
|
396
|
+
completed_at: group.review.completed_at,
|
|
397
|
+
output_hash: group.review.output_hash,
|
|
398
|
+
tokens: {
|
|
399
|
+
input: group.review.input_tokens,
|
|
400
|
+
output: group.review.output_tokens,
|
|
401
|
+
source: group.review.tokens_source,
|
|
402
|
+
},
|
|
403
|
+
score: group.review.review?.score ?? 0,
|
|
404
|
+
issues: group.review.review?.issues ?? [],
|
|
405
|
+
summary: group.review.review?.summary ?? '',
|
|
406
|
+
} : null,
|
|
407
|
+
});
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
return iterations;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
private checkStatusValidity(cert: CertificatePayload): VerificationCheck {
|
|
414
|
+
const passed = ELIGIBLE_STATUSES.includes(cert.task_status);
|
|
415
|
+
return {
|
|
416
|
+
name: 'status_validity',
|
|
417
|
+
passed,
|
|
418
|
+
detail: passed
|
|
419
|
+
? `Task status "${cert.task_status}" is eligible for certification`
|
|
420
|
+
: `Task status "${cert.task_status}" is not eligible. Expected: [${ELIGIBLE_STATUSES.join(', ')}]`,
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
private checkReviewCompleteness(cert: CertificatePayload): VerificationCheck {
|
|
425
|
+
const hasReview = cert.final_review.score >= 0 && cert.final_review.summary.length > 0;
|
|
426
|
+
return {
|
|
427
|
+
name: 'review_completeness',
|
|
428
|
+
passed: hasReview,
|
|
429
|
+
detail: hasReview
|
|
430
|
+
? `Final review present with score ${cert.final_review.score}`
|
|
431
|
+
: 'Final review is missing or incomplete',
|
|
432
|
+
};
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
private checkCertificateHash(cert: CertificatePayload): VerificationCheck {
|
|
436
|
+
// Remove certificate_hash, re-serialize, re-hash
|
|
437
|
+
const { certificate_hash, ...payloadWithoutHash } = cert;
|
|
438
|
+
const recomputed = `sha256:${sha256(JSON.stringify(payloadWithoutHash, null, 2))}`;
|
|
439
|
+
const passed = recomputed === certificate_hash;
|
|
440
|
+
return {
|
|
441
|
+
name: 'certificate_hash',
|
|
442
|
+
passed,
|
|
443
|
+
detail: passed
|
|
444
|
+
? 'Certificate self-hash is valid — payload has not been modified'
|
|
445
|
+
: `Certificate hash mismatch. Expected ${certificate_hash}, computed ${recomputed}`,
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
private checkChainHash(cert: CertificatePayload): VerificationCheck {
|
|
450
|
+
const recomputed = this.recomputeChainFromCanonical(cert.integrity.canonical_entries);
|
|
451
|
+
const passed = recomputed === cert.integrity.chain_hash;
|
|
452
|
+
return {
|
|
453
|
+
name: 'chain_hash',
|
|
454
|
+
passed,
|
|
455
|
+
detail: passed
|
|
456
|
+
? 'Chain hash is valid — history entries have not been tampered with'
|
|
457
|
+
: `Chain hash mismatch. Expected ${cert.integrity.chain_hash}, computed ${recomputed}`,
|
|
458
|
+
};
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
private checkEntryHashConsistency(cert: CertificatePayload): VerificationCheck {
|
|
462
|
+
// Extract output_hash from each canonical entry and compare with entry_hashes
|
|
463
|
+
const canonicalHashes: string[] = [];
|
|
464
|
+
for (const canonical of cert.integrity.canonical_entries) {
|
|
465
|
+
try {
|
|
466
|
+
const parsed = JSON.parse(canonical) as { output_hash: string };
|
|
467
|
+
canonicalHashes.push(parsed.output_hash);
|
|
468
|
+
} catch {
|
|
469
|
+
return {
|
|
470
|
+
name: 'entry_hash_consistency',
|
|
471
|
+
passed: false,
|
|
472
|
+
detail: 'Failed to parse canonical entry — data may be corrupted',
|
|
473
|
+
};
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// Compare with entry_hashes array
|
|
478
|
+
if (canonicalHashes.length !== cert.integrity.entry_hashes.length) {
|
|
479
|
+
return {
|
|
480
|
+
name: 'entry_hash_consistency',
|
|
481
|
+
passed: false,
|
|
482
|
+
detail: `Entry count mismatch: ${canonicalHashes.length} canonical entries vs ${cert.integrity.entry_hashes.length} entry_hashes`,
|
|
483
|
+
};
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
for (let i = 0; i < canonicalHashes.length; i++) {
|
|
487
|
+
if (canonicalHashes[i] !== cert.integrity.entry_hashes[i]) {
|
|
488
|
+
return {
|
|
489
|
+
name: 'entry_hash_consistency',
|
|
490
|
+
passed: false,
|
|
491
|
+
detail: `Entry hash mismatch at index ${i}: canonical has ${canonicalHashes[i]}, entry_hashes has ${cert.integrity.entry_hashes[i]}`,
|
|
492
|
+
};
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
return {
|
|
497
|
+
name: 'entry_hash_consistency',
|
|
498
|
+
passed: true,
|
|
499
|
+
detail: `All ${canonicalHashes.length} entry hashes are consistent between canonical entries and entry_hashes array`,
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
}
|