@toolbaux/guardian 0.1.22 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/adapters/runner.js +72 -3
- package/dist/adapters/typescript-adapter.js +24 -10
- package/dist/benchmarking/metrics/context-coverage.js +82 -0
- package/dist/benchmarking/metrics/drift-score.js +104 -0
- package/dist/benchmarking/metrics/search-recall.js +207 -0
- package/dist/benchmarking/metrics/token-efficiency.js +79 -0
- package/dist/benchmarking/report.js +131 -0
- package/dist/benchmarking/runner.js +175 -0
- package/dist/benchmarking/types.js +13 -0
- package/dist/cli.js +53 -10
- package/dist/commands/benchmark.js +62 -0
- package/dist/commands/discrepancy.js +1 -1
- package/dist/commands/doc-generate.js +1 -1
- package/dist/commands/doc-html.js +1 -1
- package/dist/commands/extract.js +1 -1
- package/dist/commands/feature-context.js +1 -1
- package/dist/commands/init.js +1 -0
- package/dist/commands/intel.js +47 -1
- package/dist/commands/mcp-serve.js +48 -321
- package/dist/commands/search.js +602 -14
- package/dist/db/file-specs-store.js +174 -0
- package/dist/db/fts-builder.js +305 -0
- package/dist/db/index.js +55 -0
- package/dist/db/specs-store.js +13 -0
- package/dist/db/sqlite-specs-store.js +441 -0
- package/dist/extract/codebase-intel.js +31 -2
- package/dist/extract/compress.js +70 -3
- package/dist/extract/context-block.js +11 -2
- package/dist/extract/function-intel.js +5 -2
- package/dist/extract/index.js +1 -23
- package/dist/extract/writer.js +6 -0
- package/package.json +3 -1
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Guardian-Bench report formatter
|
|
3
|
+
*
|
|
4
|
+
* Renders BenchmarkSummary into human-readable text and JSON outputs.
|
|
5
|
+
* Designed for arXiv paper table extraction.
|
|
6
|
+
*/
|
|
7
|
+
export function renderReport(summary, format = "text") {
|
|
8
|
+
if (format === "json")
|
|
9
|
+
return JSON.stringify(summary, null, 2);
|
|
10
|
+
if (format === "markdown")
|
|
11
|
+
return renderMarkdown(summary);
|
|
12
|
+
return renderText(summary);
|
|
13
|
+
}
|
|
14
|
+
// ── Text ──────────────────────────────────────────────────────────────────────
|
|
15
|
+
function renderText(summary) {
|
|
16
|
+
const { aggregate: agg, total_tasks, completed_tasks, failed_tasks } = summary;
|
|
17
|
+
const lines = [];
|
|
18
|
+
lines.push("Guardian-Bench Results");
|
|
19
|
+
lines.push("=".repeat(60));
|
|
20
|
+
lines.push(`Guardian version : ${summary.guardian_version}`);
|
|
21
|
+
lines.push(`Generated : ${summary.generated_at}`);
|
|
22
|
+
lines.push(`Tasks : ${completed_tasks}/${total_tasks} completed, ${failed_tasks} failed`);
|
|
23
|
+
lines.push("");
|
|
24
|
+
lines.push("Search Recall (k=5)");
|
|
25
|
+
lines.push("-".repeat(40));
|
|
26
|
+
lines.push(` Mean precision@5 : ${pct(agg.search_recall.mean_precision_at_5)}`);
|
|
27
|
+
lines.push(` Mean recall@5 : ${pct(agg.search_recall.mean_recall_at_5)}`);
|
|
28
|
+
lines.push(` Mean F1@5 : ${pct(agg.search_recall.mean_f1_at_5)}`);
|
|
29
|
+
lines.push(` Any-hit rate : ${pct(agg.search_recall.any_hit_rate)}`);
|
|
30
|
+
lines.push("");
|
|
31
|
+
lines.push("Token Efficiency");
|
|
32
|
+
lines.push("-".repeat(40));
|
|
33
|
+
lines.push(` Mean ratio : ${agg.token_efficiency.mean_efficiency_ratio.toFixed(3)}`);
|
|
34
|
+
lines.push(` Median ratio : ${agg.token_efficiency.median_efficiency_ratio.toFixed(3)}`);
|
|
35
|
+
lines.push(` Mean tokens saved : ${agg.token_efficiency.mean_tokens_saved.toLocaleString()}`);
|
|
36
|
+
lines.push(` Total tokens saved: ${agg.token_efficiency.total_tokens_saved.toLocaleString()}`);
|
|
37
|
+
lines.push("");
|
|
38
|
+
lines.push("Drift Score");
|
|
39
|
+
lines.push("-".repeat(40));
|
|
40
|
+
lines.push(` Mean drift increase : ${agg.drift_score.mean_drift_increase.toFixed(3)}`);
|
|
41
|
+
lines.push(` Tasks with patch : ${agg.drift_score.tasks_with_patch}`);
|
|
42
|
+
lines.push(` Stable post-patch : ${agg.drift_score.tasks_with_stable_post_patch}`);
|
|
43
|
+
lines.push("");
|
|
44
|
+
lines.push("Context Coverage");
|
|
45
|
+
lines.push("-".repeat(40));
|
|
46
|
+
lines.push(` Mean coverage : ${pct(agg.context_coverage.mean_coverage)}`);
|
|
47
|
+
lines.push(` Full coverage rate: ${pct(agg.context_coverage.full_coverage_rate)}`);
|
|
48
|
+
lines.push("");
|
|
49
|
+
if (summary.results.some(r => r.error)) {
|
|
50
|
+
lines.push("Failed Tasks");
|
|
51
|
+
lines.push("-".repeat(40));
|
|
52
|
+
for (const r of summary.results.filter(r => r.error)) {
|
|
53
|
+
lines.push(` [${r.task_id}] ${r.error}`);
|
|
54
|
+
}
|
|
55
|
+
lines.push("");
|
|
56
|
+
}
|
|
57
|
+
return lines.join("\n");
|
|
58
|
+
}
|
|
59
|
+
// ── Markdown (paper table style) ─────────────────────────────────────────────
|
|
60
|
+
function renderMarkdown(summary) {
|
|
61
|
+
const { aggregate: agg } = summary;
|
|
62
|
+
const lines = [];
|
|
63
|
+
lines.push(`# Guardian-Bench Results`);
|
|
64
|
+
lines.push(``);
|
|
65
|
+
lines.push(`**Guardian version:** ${summary.guardian_version} | **Tasks:** ${summary.completed_tasks}/${summary.total_tasks} | **Generated:** ${summary.generated_at}`);
|
|
66
|
+
lines.push(``);
|
|
67
|
+
lines.push(`## Aggregate Metrics`);
|
|
68
|
+
lines.push(``);
|
|
69
|
+
lines.push(`| Metric | Value |`);
|
|
70
|
+
lines.push(`|--------|-------|`);
|
|
71
|
+
lines.push(`| Search Recall — Precision@5 | ${pct(agg.search_recall.mean_precision_at_5)} |`);
|
|
72
|
+
lines.push(`| Search Recall — Recall@5 | ${pct(agg.search_recall.mean_recall_at_5)} |`);
|
|
73
|
+
lines.push(`| Search Recall — F1@5 | ${pct(agg.search_recall.mean_f1_at_5)} |`);
|
|
74
|
+
lines.push(`| Search Recall — Any-Hit Rate | ${pct(agg.search_recall.any_hit_rate)} |`);
|
|
75
|
+
lines.push(`| Token Efficiency — Mean Ratio | ${agg.token_efficiency.mean_efficiency_ratio.toFixed(3)}× |`);
|
|
76
|
+
lines.push(`| Token Efficiency — Median Ratio | ${agg.token_efficiency.median_efficiency_ratio.toFixed(3)}× |`);
|
|
77
|
+
lines.push(`| Token Efficiency — Mean Tokens Saved | ${agg.token_efficiency.mean_tokens_saved.toLocaleString()} |`);
|
|
78
|
+
lines.push(`| Drift Score — Mean Increase | ${agg.drift_score.mean_drift_increase.toFixed(3)} |`);
|
|
79
|
+
lines.push(`| Context Coverage — Mean | ${pct(agg.context_coverage.mean_coverage)} |`);
|
|
80
|
+
lines.push(`| Context Coverage — Full Coverage Rate | ${pct(agg.context_coverage.full_coverage_rate)} |`);
|
|
81
|
+
lines.push(``);
|
|
82
|
+
lines.push(`## Per-Task Results`);
|
|
83
|
+
lines.push(``);
|
|
84
|
+
lines.push(`| Task | Repo | P@5 | R@5 | F1@5 | Eff.Ratio | Coverage |`);
|
|
85
|
+
lines.push(`|------|------|-----|-----|------|-----------|----------|`);
|
|
86
|
+
for (const r of summary.results) {
|
|
87
|
+
const m = r.metrics;
|
|
88
|
+
lines.push(`| ${r.task_id} | ${r.repo} ` +
|
|
89
|
+
`| ${pct(m.search_recall.precision_at_k)} ` +
|
|
90
|
+
`| ${pct(m.search_recall.recall_at_k)} ` +
|
|
91
|
+
`| ${pct(m.search_recall.f1_at_k)} ` +
|
|
92
|
+
`| ${m.token_efficiency.efficiency_ratio.toFixed(3)}× ` +
|
|
93
|
+
`| ${pct(m.context_coverage.coverage)} |`);
|
|
94
|
+
}
|
|
95
|
+
lines.push(``);
|
|
96
|
+
return lines.join("\n");
|
|
97
|
+
}
|
|
98
|
+
// ── Helpers ───────────────────────────────────────────────────────────────────
|
|
99
|
+
function pct(n) {
|
|
100
|
+
return `${(n * 100).toFixed(1)}%`;
|
|
101
|
+
}
|
|
102
|
+
/** Extract per-task rows suitable for pandas/CSV */
|
|
103
|
+
export function toCSV(summary) {
|
|
104
|
+
const header = [
|
|
105
|
+
"task_id", "repo", "language", "source",
|
|
106
|
+
"precision_at_5", "recall_at_5", "f1_at_5", "any_hit",
|
|
107
|
+
"efficiency_ratio", "tokens_saved",
|
|
108
|
+
"drift_increase", "context_coverage",
|
|
109
|
+
"duration_ms", "error",
|
|
110
|
+
].join(",");
|
|
111
|
+
const rows = summary.results.map(r => {
|
|
112
|
+
const m = r.metrics;
|
|
113
|
+
return [
|
|
114
|
+
r.task_id,
|
|
115
|
+
r.repo,
|
|
116
|
+
r.language ?? "",
|
|
117
|
+
r.source ?? "",
|
|
118
|
+
m.search_recall.precision_at_k,
|
|
119
|
+
m.search_recall.recall_at_k,
|
|
120
|
+
m.search_recall.f1_at_k,
|
|
121
|
+
m.search_recall.files_found.length > 0 ? 1 : 0,
|
|
122
|
+
m.token_efficiency.efficiency_ratio,
|
|
123
|
+
m.token_efficiency.tokens_saved,
|
|
124
|
+
m.drift_score.drift_increase ?? "",
|
|
125
|
+
m.context_coverage.coverage,
|
|
126
|
+
r.duration_ms,
|
|
127
|
+
r.error ? `"${r.error.replace(/"/g, "'")}"` : "",
|
|
128
|
+
].join(",");
|
|
129
|
+
});
|
|
130
|
+
return [header, ...rows].join("\n");
|
|
131
|
+
}
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Guardian-Bench runner
|
|
3
|
+
*
|
|
4
|
+
* Processes a JSONL file of BenchmarkTask entries, computes all 4 metrics
|
|
5
|
+
* for each task, and returns a BenchmarkSummary.
|
|
6
|
+
*
|
|
7
|
+
* Metrics are fully offline — no LLM API calls required.
|
|
8
|
+
*/
|
|
9
|
+
import fs from "node:fs/promises";
|
|
10
|
+
import { measureSearchRecall } from "./metrics/search-recall.js";
|
|
11
|
+
import { measureTokenEfficiency } from "./metrics/token-efficiency.js";
|
|
12
|
+
import { measureDriftScore } from "./metrics/drift-score.js";
|
|
13
|
+
import { measureContextCoverage } from "./metrics/context-coverage.js";
|
|
14
|
+
export async function runBenchmark(options) {
|
|
15
|
+
const { tasksFile, specsDir, repoDir, k = 5, concurrency = 4 } = options;
|
|
16
|
+
// Load tasks from JSONL
|
|
17
|
+
const raw = await fs.readFile(tasksFile, "utf8");
|
|
18
|
+
const tasks = raw
|
|
19
|
+
.split("\n")
|
|
20
|
+
.map(l => l.trim())
|
|
21
|
+
.filter(l => l.length > 0 && !l.startsWith("//"))
|
|
22
|
+
.map(l => JSON.parse(l));
|
|
23
|
+
const results = [];
|
|
24
|
+
let completed = 0;
|
|
25
|
+
// Process tasks with limited concurrency
|
|
26
|
+
for (let i = 0; i < tasks.length; i += concurrency) {
|
|
27
|
+
const batch = tasks.slice(i, i + concurrency);
|
|
28
|
+
const batchResults = await Promise.all(batch.map(task => runTask(task, { specsDir, repoDir, k })));
|
|
29
|
+
for (const r of batchResults) {
|
|
30
|
+
results.push(r);
|
|
31
|
+
completed++;
|
|
32
|
+
options.onProgress?.(completed, tasks.length, r);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
const guardianVersion = await readPackageVersion();
|
|
36
|
+
const summary = buildSummary(results, guardianVersion);
|
|
37
|
+
return summary;
|
|
38
|
+
}
|
|
39
|
+
async function runTask(task, opts) {
|
|
40
|
+
const start = Date.now();
|
|
41
|
+
const specsDir = opts.specsDir ?? task.specs_dir ?? ".specs";
|
|
42
|
+
const repoDir = opts.repoDir ?? task.repo_dir;
|
|
43
|
+
try {
|
|
44
|
+
const [searchRecall, tokenEfficiency, driftScore, contextCoverage] = await Promise.all([
|
|
45
|
+
measureSearchRecall({
|
|
46
|
+
specsDir,
|
|
47
|
+
query: task.query,
|
|
48
|
+
groundTruthFiles: task.ground_truth_files,
|
|
49
|
+
groundTruthSymbols: task.ground_truth_symbols,
|
|
50
|
+
k: opts.k,
|
|
51
|
+
}),
|
|
52
|
+
measureTokenEfficiency({
|
|
53
|
+
specsDir,
|
|
54
|
+
groundTruthFiles: task.ground_truth_files,
|
|
55
|
+
repoDir,
|
|
56
|
+
}),
|
|
57
|
+
measureDriftScore({
|
|
58
|
+
specsDir,
|
|
59
|
+
patch: task.patch,
|
|
60
|
+
}),
|
|
61
|
+
measureContextCoverage({
|
|
62
|
+
specsDir,
|
|
63
|
+
groundTruthFiles: task.ground_truth_files,
|
|
64
|
+
groundTruthSymbols: task.ground_truth_symbols,
|
|
65
|
+
}),
|
|
66
|
+
]);
|
|
67
|
+
return {
|
|
68
|
+
task_id: task.id,
|
|
69
|
+
repo: task.repo,
|
|
70
|
+
language: task.language,
|
|
71
|
+
source: task.source,
|
|
72
|
+
specs_dir: specsDir,
|
|
73
|
+
metrics: { search_recall: searchRecall, token_efficiency: tokenEfficiency, drift_score: driftScore, context_coverage: contextCoverage },
|
|
74
|
+
duration_ms: Date.now() - start,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
catch (err) {
|
|
78
|
+
const emptyEfficiency = {
|
|
79
|
+
mcp_tokens: 0, raw_file_tokens: 0, efficiency_ratio: 0,
|
|
80
|
+
tokens_saved: 0, raw_file_bytes: 0, mcp_response_bytes: 0,
|
|
81
|
+
};
|
|
82
|
+
const emptyDrift = {
|
|
83
|
+
baseline_delta: null, post_patch_delta: null, drift_increase: null,
|
|
84
|
+
baseline_status: "error", post_patch_status: "error", patch_applied: false,
|
|
85
|
+
};
|
|
86
|
+
return {
|
|
87
|
+
task_id: task.id,
|
|
88
|
+
repo: task.repo,
|
|
89
|
+
language: task.language,
|
|
90
|
+
source: task.source,
|
|
91
|
+
specs_dir: specsDir,
|
|
92
|
+
metrics: {
|
|
93
|
+
search_recall: { precision_at_k: 0, recall_at_k: 0, f1_at_k: 0, k: opts.k, files_found: [], files_missed: task.ground_truth_files, symbols_found: [], symbols_missed: task.ground_truth_symbols ?? [], result_files: [], result_symbols: [] },
|
|
94
|
+
token_efficiency: emptyEfficiency,
|
|
95
|
+
drift_score: emptyDrift,
|
|
96
|
+
context_coverage: { coverage: 0, modules_mentioned: [], modules_missing: [], files_mentioned: 0, files_total: task.ground_truth_files.length },
|
|
97
|
+
},
|
|
98
|
+
duration_ms: Date.now() - start,
|
|
99
|
+
error: err instanceof Error ? err.message : String(err),
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
function buildSummary(results, guardianVersion) {
|
|
104
|
+
const completed = results.filter(r => !r.error);
|
|
105
|
+
const failed = results.filter(r => r.error);
|
|
106
|
+
const aggregate = {
|
|
107
|
+
search_recall: {
|
|
108
|
+
mean_precision_at_5: mean(completed.map(r => r.metrics.search_recall.precision_at_k)),
|
|
109
|
+
mean_recall_at_5: mean(completed.map(r => r.metrics.search_recall.recall_at_k)),
|
|
110
|
+
mean_f1_at_5: mean(completed.map(r => r.metrics.search_recall.f1_at_k)),
|
|
111
|
+
any_hit_rate: completed.length > 0
|
|
112
|
+
? completed.filter(r => r.metrics.search_recall.files_found.length > 0).length / completed.length
|
|
113
|
+
: 0,
|
|
114
|
+
},
|
|
115
|
+
token_efficiency: {
|
|
116
|
+
mean_efficiency_ratio: mean(completed.map(r => r.metrics.token_efficiency.efficiency_ratio)),
|
|
117
|
+
median_efficiency_ratio: median(completed.map(r => r.metrics.token_efficiency.efficiency_ratio)),
|
|
118
|
+
mean_tokens_saved: mean(completed.map(r => r.metrics.token_efficiency.tokens_saved)),
|
|
119
|
+
total_tokens_saved: sum(completed.map(r => r.metrics.token_efficiency.tokens_saved)),
|
|
120
|
+
},
|
|
121
|
+
drift_score: {
|
|
122
|
+
mean_drift_increase: mean(completed
|
|
123
|
+
.map(r => r.metrics.drift_score.drift_increase)
|
|
124
|
+
.filter((v) => v !== null)),
|
|
125
|
+
tasks_with_stable_post_patch: completed.filter(r => r.metrics.drift_score.post_patch_status === "stable").length,
|
|
126
|
+
tasks_with_patch: completed.filter(r => r.metrics.drift_score.patch_applied).length,
|
|
127
|
+
},
|
|
128
|
+
context_coverage: {
|
|
129
|
+
mean_coverage: mean(completed.map(r => r.metrics.context_coverage.coverage)),
|
|
130
|
+
full_coverage_rate: completed.length > 0
|
|
131
|
+
? completed.filter(r => r.metrics.context_coverage.coverage >= 1.0).length / completed.length
|
|
132
|
+
: 0,
|
|
133
|
+
},
|
|
134
|
+
};
|
|
135
|
+
return {
|
|
136
|
+
generated_at: new Date().toISOString(),
|
|
137
|
+
guardian_version: guardianVersion,
|
|
138
|
+
total_tasks: results.length,
|
|
139
|
+
completed_tasks: completed.length,
|
|
140
|
+
failed_tasks: failed.length,
|
|
141
|
+
aggregate,
|
|
142
|
+
results,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
async function readPackageVersion() {
|
|
146
|
+
try {
|
|
147
|
+
const pkgPath = new URL("../../package.json", import.meta.url).pathname;
|
|
148
|
+
const raw = await fs.readFile(pkgPath, "utf8");
|
|
149
|
+
return JSON.parse(raw).version;
|
|
150
|
+
}
|
|
151
|
+
catch {
|
|
152
|
+
return "unknown";
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
function mean(values) {
|
|
156
|
+
if (values.length === 0)
|
|
157
|
+
return 0;
|
|
158
|
+
return round(values.reduce((a, b) => a + b, 0) / values.length);
|
|
159
|
+
}
|
|
160
|
+
function median(values) {
|
|
161
|
+
if (values.length === 0)
|
|
162
|
+
return 0;
|
|
163
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
164
|
+
const mid = Math.floor(sorted.length / 2);
|
|
165
|
+
const val = sorted.length % 2 === 0
|
|
166
|
+
? (sorted[mid - 1] + sorted[mid]) / 2
|
|
167
|
+
: sorted[mid];
|
|
168
|
+
return round(val);
|
|
169
|
+
}
|
|
170
|
+
function sum(values) {
|
|
171
|
+
return values.reduce((a, b) => a + b, 0);
|
|
172
|
+
}
|
|
173
|
+
function round(n) {
|
|
174
|
+
return Math.round(n * 1000) / 1000;
|
|
175
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Guardian-Bench types
|
|
3
|
+
*
|
|
4
|
+
* Task format is JSONL, one task per line — compatible with HuggingFace datasets.
|
|
5
|
+
* Results are structured for direct inclusion in paper tables.
|
|
6
|
+
*
|
|
7
|
+
* Benchmark dimensions (all offline, no LLM API required):
|
|
8
|
+
* 1. Search Recall — precision/recall of guardian_search vs ground-truth files
|
|
9
|
+
* 2. Token Efficiency — MCP response tokens vs reading ground-truth files directly
|
|
10
|
+
* 3. Drift Score — architectural drift increase after applying a patch
|
|
11
|
+
* 4. Context Coverage — how much of architecture-context.md covers the task's modules
|
|
12
|
+
*/
|
|
13
|
+
export {};
|
package/dist/cli.js
CHANGED
|
@@ -15,13 +15,14 @@ import { runContext } from "./commands/context.js";
|
|
|
15
15
|
import { runGenerate } from "./commands/generate.js";
|
|
16
16
|
import { runVerifyDrift } from "./commands/verify-drift.js";
|
|
17
17
|
import { runAnalyzeDepth } from "./commands/analyze-depth.js";
|
|
18
|
-
import { runIntel } from "./commands/intel.js";
|
|
19
18
|
import { runFeatureContext } from "./commands/feature-context.js";
|
|
20
19
|
import { runDocGenerate } from "./commands/doc-generate.js";
|
|
21
20
|
import { runDiscrepancy } from "./commands/discrepancy.js";
|
|
22
21
|
import { runDocHtml } from "./commands/doc-html.js";
|
|
23
22
|
import { runInit } from "./commands/init.js";
|
|
23
|
+
import { runIntel } from "./commands/intel.js";
|
|
24
24
|
import { runMcpServe } from "./commands/mcp-serve.js";
|
|
25
|
+
import { runBenchmarkCommand } from "./commands/benchmark.js";
|
|
25
26
|
import { DEFAULT_SPECS_DIR } from "./config.js";
|
|
26
27
|
const program = new Command();
|
|
27
28
|
program
|
|
@@ -58,18 +59,20 @@ program
|
|
|
58
59
|
.option("--backend-root <path>", "Path to backend root")
|
|
59
60
|
.option("--frontend-root <path>", "Path to frontend root")
|
|
60
61
|
.option("--output <path>", "Output directory", DEFAULT_SPECS_DIR)
|
|
61
|
-
.option("--
|
|
62
|
+
.option("--no-file-graph", "Exclude file-level dependency graph")
|
|
62
63
|
.option("--config <path>", "Path to guardian.config.json")
|
|
63
64
|
.option("--docs-mode <mode>", "Docs mode (lean|full)")
|
|
65
|
+
.option("--backend <backend>", "Storage backend: 'file' (default) or 'sqlite' (also builds guardian.db + FTS index)")
|
|
64
66
|
.action(async (projectRoot, options) => {
|
|
65
67
|
await runExtract({
|
|
66
68
|
projectRoot,
|
|
67
69
|
backendRoot: options.backendRoot,
|
|
68
70
|
frontendRoot: options.frontendRoot,
|
|
69
71
|
output: options.output ?? DEFAULT_SPECS_DIR,
|
|
70
|
-
includeFileGraph: options.
|
|
72
|
+
includeFileGraph: options.fileGraph !== false,
|
|
71
73
|
configPath: options.config,
|
|
72
|
-
docsMode: options.docsMode
|
|
74
|
+
docsMode: options.docsMode,
|
|
75
|
+
backend: options.backend,
|
|
73
76
|
});
|
|
74
77
|
});
|
|
75
78
|
program
|
|
@@ -209,17 +212,31 @@ program
|
|
|
209
212
|
});
|
|
210
213
|
program
|
|
211
214
|
.command("search")
|
|
212
|
-
.description("Search
|
|
215
|
+
.description("Search snapshots and intelligence files. Use --query for semantic search or a mode flag for targeted lookups.")
|
|
213
216
|
.option("--input <path>", "Snapshot output directory", DEFAULT_SPECS_DIR)
|
|
214
|
-
.
|
|
217
|
+
.option("--query <text>", "Semantic search query")
|
|
215
218
|
.option("--output <path>", "Write search results to a file")
|
|
216
219
|
.option("--types <items>", "Comma-separated filters: models,endpoints,components,modules,tasks")
|
|
220
|
+
.option("--verbose", "Show full grouped output instead of compact file-first format")
|
|
221
|
+
.option("--format <fmt>", "Output format for --query: text (default) or json (categorical)")
|
|
222
|
+
.option("--orient", "Return architecture-context.md as compact JSON (project map)")
|
|
223
|
+
.option("--file <path>", "Return context for a file path or endpoint (e.g. 'POST /api/auth/login')")
|
|
224
|
+
.option("--model <name>", "Return model fields, relationships, and usage (e.g. 'User')")
|
|
225
|
+
.option("--impact <path>", "Return impact analysis: what breaks if you change this file")
|
|
226
|
+
.option("--backend <backend>", "Storage backend: 'file' (default linear scan) or 'sqlite' (FTS5/BM25)")
|
|
217
227
|
.action(async (options) => {
|
|
218
228
|
await runSearch({
|
|
219
229
|
input: options.input ?? DEFAULT_SPECS_DIR,
|
|
220
230
|
query: options.query,
|
|
221
231
|
output: options.output,
|
|
222
|
-
types: options.types ? [options.types] : undefined
|
|
232
|
+
types: options.types ? [options.types] : undefined,
|
|
233
|
+
verbose: options.verbose ?? false,
|
|
234
|
+
format: options.format,
|
|
235
|
+
orient: options.orient ?? false,
|
|
236
|
+
file: options.file,
|
|
237
|
+
model: options.model,
|
|
238
|
+
impact: options.impact,
|
|
239
|
+
backend: options.backend,
|
|
223
240
|
});
|
|
224
241
|
});
|
|
225
242
|
program
|
|
@@ -262,13 +279,16 @@ program
|
|
|
262
279
|
});
|
|
263
280
|
program
|
|
264
281
|
.command("intel")
|
|
265
|
-
.description("
|
|
282
|
+
.description("[deprecated] Use `guardian extract` instead")
|
|
266
283
|
.option("--specs <dir>", "Snapshot output directory", DEFAULT_SPECS_DIR)
|
|
267
|
-
.option("--output <path>", "Output path for codebase-intelligence.json")
|
|
284
|
+
.option("--output <path>", "Output path for codebase-intelligence.json (file backend only)")
|
|
285
|
+
.option("--backend <backend>", "Storage backend: 'file' (default) or 'sqlite'")
|
|
268
286
|
.action(async (options) => {
|
|
287
|
+
console.warn("⚠ `guardian intel` is deprecated — use `guardian extract` instead.");
|
|
269
288
|
await runIntel({
|
|
270
289
|
specs: options.specs,
|
|
271
|
-
output: options.output
|
|
290
|
+
output: options.output,
|
|
291
|
+
backend: options.backend,
|
|
272
292
|
});
|
|
273
293
|
});
|
|
274
294
|
program
|
|
@@ -333,6 +353,7 @@ program
|
|
|
333
353
|
.option("--frontend-root <path>", "Path to frontend root")
|
|
334
354
|
.option("--output <path>", "Output directory", DEFAULT_SPECS_DIR)
|
|
335
355
|
.option("--skip-hook", "Skip pre-commit hook installation", false)
|
|
356
|
+
.option("--backend <backend>", "Storage backend: 'file' (default) or 'sqlite' (builds guardian.db + FTS index)")
|
|
336
357
|
.action(async (projectRoot, options) => {
|
|
337
358
|
await runInit({
|
|
338
359
|
projectRoot,
|
|
@@ -340,6 +361,28 @@ program
|
|
|
340
361
|
frontendRoot: options.frontendRoot,
|
|
341
362
|
output: options.output,
|
|
342
363
|
skipHook: options.skipHook ?? false,
|
|
364
|
+
backend: options.backend,
|
|
365
|
+
});
|
|
366
|
+
});
|
|
367
|
+
program
|
|
368
|
+
.command("benchmark")
|
|
369
|
+
.description("Run Guardian-Bench offline evaluation suite (4 metrics, no LLM required)")
|
|
370
|
+
.requiredOption("--tasks <file>", "Path to JSONL tasks file")
|
|
371
|
+
.option("--specs <dir>", "Specs directory override for all tasks")
|
|
372
|
+
.option("--repo-dir <dir>", "Repo root directory override for all tasks")
|
|
373
|
+
.option("--output <path>", "Write report to file (in addition to stdout)")
|
|
374
|
+
.option("--format <fmt>", "Output format: text, json, markdown, csv (default: text)", "text")
|
|
375
|
+
.option("--k <n>", "k for precision/recall (default: 5)", "5")
|
|
376
|
+
.option("--concurrency <n>", "Max parallel tasks (default: 4)", "4")
|
|
377
|
+
.action(async (options) => {
|
|
378
|
+
await runBenchmarkCommand({
|
|
379
|
+
tasks: options.tasks,
|
|
380
|
+
specs: options.specs,
|
|
381
|
+
repoDir: options.repoDir,
|
|
382
|
+
output: options.output,
|
|
383
|
+
format: options.format,
|
|
384
|
+
k: options.k,
|
|
385
|
+
concurrency: options.concurrency,
|
|
343
386
|
});
|
|
344
387
|
});
|
|
345
388
|
program
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `guardian benchmark` — run Guardian-Bench offline evaluation suite
|
|
3
|
+
*
|
|
4
|
+
* Reads a JSONL file of tasks, computes 4 metrics per task (search recall,
|
|
5
|
+
* token efficiency, drift score, context coverage), and writes a report.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* guardian benchmark --tasks tasks.jsonl --specs .specs
|
|
9
|
+
* guardian benchmark --tasks tasks.jsonl --output results.json --format json
|
|
10
|
+
*/
|
|
11
|
+
import fs from "node:fs/promises";
|
|
12
|
+
import path from "node:path";
|
|
13
|
+
import { runBenchmark } from "../benchmarking/runner.js";
|
|
14
|
+
import { renderReport, toCSV } from "../benchmarking/report.js";
|
|
15
|
+
export async function runBenchmarkCommand(options) {
|
|
16
|
+
const tasksFile = path.resolve(options.tasks);
|
|
17
|
+
const specsDir = options.specs ? path.resolve(options.specs) : undefined;
|
|
18
|
+
const repoDir = options.repoDir ? path.resolve(options.repoDir) : undefined;
|
|
19
|
+
const format = (options.format ?? "text");
|
|
20
|
+
const k = typeof options.k === "string" ? parseInt(options.k, 10) : (options.k ?? 5);
|
|
21
|
+
const concurrency = typeof options.concurrency === "string"
|
|
22
|
+
? parseInt(options.concurrency, 10)
|
|
23
|
+
: (options.concurrency ?? 4);
|
|
24
|
+
// Validate tasks file
|
|
25
|
+
try {
|
|
26
|
+
await fs.access(tasksFile);
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
console.error(`Error: tasks file not found: ${tasksFile}`);
|
|
30
|
+
process.exit(1);
|
|
31
|
+
}
|
|
32
|
+
console.error(`Guardian-Bench: running tasks from ${tasksFile}`);
|
|
33
|
+
const summary = await runBenchmark({
|
|
34
|
+
tasksFile,
|
|
35
|
+
specsDir,
|
|
36
|
+
repoDir,
|
|
37
|
+
k,
|
|
38
|
+
concurrency,
|
|
39
|
+
onProgress(completed, total, result) {
|
|
40
|
+
const status = result.error ? "FAIL" : "OK";
|
|
41
|
+
const f1 = result.metrics.search_recall.f1_at_k.toFixed(3);
|
|
42
|
+
const cov = result.metrics.context_coverage.coverage.toFixed(3);
|
|
43
|
+
console.error(` [${completed}/${total}] ${status} ${result.task_id} | F1@${k}=${f1} | coverage=${cov}`);
|
|
44
|
+
},
|
|
45
|
+
});
|
|
46
|
+
// Render output
|
|
47
|
+
let output;
|
|
48
|
+
if (format === "csv") {
|
|
49
|
+
output = toCSV(summary);
|
|
50
|
+
}
|
|
51
|
+
else {
|
|
52
|
+
output = renderReport(summary, format === "json" || format === "markdown" ? format : "text");
|
|
53
|
+
}
|
|
54
|
+
if (options.output) {
|
|
55
|
+
const outputPath = path.resolve(options.output);
|
|
56
|
+
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
57
|
+
await fs.writeFile(outputPath, output, "utf8");
|
|
58
|
+
console.error(`Wrote results to ${outputPath}`);
|
|
59
|
+
}
|
|
60
|
+
// Always print to stdout
|
|
61
|
+
console.log(output);
|
|
62
|
+
}
|
|
@@ -22,7 +22,7 @@ export async function runDiscrepancy(options) {
|
|
|
22
22
|
// Load codebase intelligence
|
|
23
23
|
const intelPath = path.join(layout.machineDir, "codebase-intelligence.json");
|
|
24
24
|
const intel = await loadCodebaseIntelligence(intelPath).catch(() => {
|
|
25
|
-
throw new Error(`Could not load codebase-intelligence.json from ${intelPath}. Run \`guardian
|
|
25
|
+
throw new Error(`Could not load codebase-intelligence.json from ${intelPath}. Run \`guardian extract --output ${options.specs}\` first.`);
|
|
26
26
|
});
|
|
27
27
|
const baselinePath = path.join(layout.machineDir, "product-document.baseline.json");
|
|
28
28
|
const featureSpecsDir = options.featureSpecs ? path.resolve(options.featureSpecs) : null;
|
|
@@ -46,7 +46,7 @@ export async function runDocGenerate(options) {
|
|
|
46
46
|
process.stdout.write("Loading codebase intelligence... ");
|
|
47
47
|
const intel = await loadCodebaseIntelligence(intelPath).catch(() => {
|
|
48
48
|
console.log("failed");
|
|
49
|
-
throw new Error(`Could not load ${intelPath}. Run \`guardian
|
|
49
|
+
throw new Error(`Could not load ${intelPath}. Run \`guardian extract --output ${options.specs}\` first.`);
|
|
50
50
|
});
|
|
51
51
|
console.log(`${intel.meta.counts.endpoints} endpoints, ${intel.meta.counts.models} models, ` +
|
|
52
52
|
`${intel.meta.counts.enums} enums, ${intel.meta.counts.tasks} tasks`);
|
|
@@ -23,7 +23,7 @@ export async function runDocHtml(options) {
|
|
|
23
23
|
process.stdout.write("Loading codebase intelligence... ");
|
|
24
24
|
const intel = await loadCodebaseIntelligence(intelPath).catch(() => {
|
|
25
25
|
console.log("failed");
|
|
26
|
-
throw new Error(`Could not load ${intelPath}. Run \`guardian
|
|
26
|
+
throw new Error(`Could not load ${intelPath}. Run \`guardian extract --output ${options.specs}\` first.`);
|
|
27
27
|
});
|
|
28
28
|
console.log(`${intel.meta.counts.endpoints} endpoints, ${intel.meta.counts.models} models`);
|
|
29
29
|
// ── Feature arcs (optional) ───────────────────────────────────────────────
|
package/dist/commands/extract.js
CHANGED
|
@@ -11,7 +11,7 @@ export async function runExtract(options) {
|
|
|
11
11
|
// Auto-build codebase intelligence after every extract
|
|
12
12
|
const specsDir = path.resolve(options.output);
|
|
13
13
|
try {
|
|
14
|
-
await runIntel({ specs: specsDir });
|
|
14
|
+
await runIntel({ specs: specsDir, backend: options.backend });
|
|
15
15
|
}
|
|
16
16
|
catch {
|
|
17
17
|
// Non-fatal — intel build failure should not break extract
|
|
@@ -26,7 +26,7 @@ export async function runFeatureContext(options) {
|
|
|
26
26
|
// Load codebase intelligence
|
|
27
27
|
const intelPath = path.join(layout.machineDir, "codebase-intelligence.json");
|
|
28
28
|
const intel = await loadCodebaseIntelligence(intelPath).catch(() => {
|
|
29
|
-
throw new Error(`Could not load codebase-intelligence.json from ${intelPath}. Run \`guardian
|
|
29
|
+
throw new Error(`Could not load codebase-intelligence.json from ${intelPath}. Run \`guardian extract --output ${options.specs}\` first.`);
|
|
30
30
|
});
|
|
31
31
|
// Build filtered context
|
|
32
32
|
const context = buildFeatureContext(spec, intel);
|
package/dist/commands/init.js
CHANGED
package/dist/commands/intel.js
CHANGED
|
@@ -2,16 +2,62 @@
|
|
|
2
2
|
* `guardian intel` — build codebase-intelligence.json from existing snapshots.
|
|
3
3
|
*
|
|
4
4
|
* Reads: specs-out/machine/architecture.snapshot.yaml + ux.snapshot.yaml
|
|
5
|
-
* Writes: specs-out/machine/codebase-intelligence.json
|
|
5
|
+
* Writes: specs-out/machine/codebase-intelligence.json (file backend, default)
|
|
6
|
+
* specs-out/guardian.db (sqlite backend)
|
|
6
7
|
*
|
|
7
8
|
* Also auto-runs at the end of `guardian extract`.
|
|
8
9
|
*/
|
|
9
10
|
import path from "node:path";
|
|
10
11
|
import { writeCodebaseIntelligence } from "../extract/codebase-intel.js";
|
|
12
|
+
import { writeCodebaseIntelligenceViaStore } from "../extract/codebase-intel.js";
|
|
11
13
|
import { getOutputLayout } from "../output-layout.js";
|
|
14
|
+
import { SqliteSpecsStore } from "../db/sqlite-specs-store.js";
|
|
15
|
+
import { populateFTSIndex } from "../db/fts-builder.js";
|
|
12
16
|
export async function runIntel(options) {
|
|
13
17
|
const specsDir = path.resolve(options.specs);
|
|
14
18
|
const layout = getOutputLayout(specsDir);
|
|
19
|
+
if (options.backend === "sqlite") {
|
|
20
|
+
// ── SQLite path ──
|
|
21
|
+
// extract always writes snapshots as files, so we read those then write
|
|
22
|
+
// intel + FTS into guardian.db. This avoids requiring --backend on extract.
|
|
23
|
+
const store = new SqliteSpecsStore(layout.rootDir);
|
|
24
|
+
await store.init();
|
|
25
|
+
try {
|
|
26
|
+
// Read snapshots from the existing file-based layout
|
|
27
|
+
const machineDir = layout.machineDir;
|
|
28
|
+
const [archRaw, uxRaw] = await Promise.all([
|
|
29
|
+
(await import("node:fs/promises")).readFile((await import("node:path")).join(machineDir, "architecture.snapshot.yaml"), "utf8"),
|
|
30
|
+
(await import("node:fs/promises")).readFile((await import("node:path")).join(machineDir, "ux.snapshot.yaml"), "utf8"),
|
|
31
|
+
]);
|
|
32
|
+
// Populate snapshots into the store so writeCodebaseIntelligenceViaStore can read them
|
|
33
|
+
await store.writeSpec("architecture.snapshot", archRaw, "yaml");
|
|
34
|
+
await store.writeSpec("ux.snapshot", uxRaw, "yaml");
|
|
35
|
+
// Build intel and write to DB
|
|
36
|
+
await writeCodebaseIntelligenceViaStore(store);
|
|
37
|
+
// Build FTS5 index — enrich with all extract output for best recall
|
|
38
|
+
const intelEntry = await store.readSpec("codebase-intelligence");
|
|
39
|
+
if (intelEntry) {
|
|
40
|
+
const intel = JSON.parse(intelEntry.content);
|
|
41
|
+
const archEntry = await store.readSpec("architecture.snapshot");
|
|
42
|
+
const arch = archEntry ? (await import("js-yaml")).load(archEntry.content) : undefined;
|
|
43
|
+
// Also load function-intelligence if present in the machine dir
|
|
44
|
+
let funcIntel;
|
|
45
|
+
try {
|
|
46
|
+
const fnRaw = await (await import("node:fs/promises")).readFile((await import("node:path")).join(machineDir, "function-intelligence.json"), "utf8");
|
|
47
|
+
funcIntel = JSON.parse(fnRaw);
|
|
48
|
+
}
|
|
49
|
+
catch { /* not generated yet — skip */ }
|
|
50
|
+
populateFTSIndex(store, intel, arch, funcIntel);
|
|
51
|
+
console.log(`Built FTS5 search index (${Object.keys(intel.api_registry ?? {}).length} endpoints indexed)`);
|
|
52
|
+
}
|
|
53
|
+
console.log(`Wrote guardian.db → ${layout.rootDir}`);
|
|
54
|
+
}
|
|
55
|
+
finally {
|
|
56
|
+
await store.close();
|
|
57
|
+
}
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
// ── File path (default): original behavior, unchanged ──
|
|
15
61
|
const outputPath = options.output
|
|
16
62
|
? path.resolve(options.output)
|
|
17
63
|
: path.join(layout.machineDir, "codebase-intelligence.json");
|