edsger 0.55.4 → 0.56.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,312 @@
1
+ /**
2
+ * Type definitions for the quality-benchmark phase.
3
+ *
4
+ * The shapes in this file are the canonical TS counterpart to the JSON
5
+ * structures stored in the `quality_reports` table and emitted by the
6
+ * Claude Agent SDK at the end of a benchmark run. Keep them in sync with:
7
+ * * supabase/migrations/20260515000000_create_quality_reports.sql
8
+ * * packages/edsger/src/phases/quality-benchmark/rubric.md
9
+ */
10
+ export declare const DIMENSIONS: readonly ["architecture", "code_quality", "test_coverage", "security", "performance", "documentation", "maintainability", "dependency_health"];
11
+ export type Dimension = (typeof DIMENSIONS)[number];
12
+ export type Grade = 'A' | 'B' | 'C' | 'D' | 'F' | 'N/A';
13
+ export type Severity = 'critical' | 'high' | 'medium' | 'low';
14
+ export type Effort = 'low' | 'medium' | 'high';
15
+ export type Impact = 'low' | 'medium' | 'high';
16
+ export declare const RUBRIC_VERSION: "v1";
17
+ export type Archetype = 'library' | 'cli' | 'web-app' | 'backend-service' | 'mobile' | 'data-pipeline' | 'infra' | 'monorepo' | 'embedded' | 'desktop-app' | 'other';
18
+ export interface DetectedContext {
19
+ archetype: Archetype;
20
+ primary_languages: string[];
21
+ frameworks: string[];
22
+ package_managers: string[];
23
+ build_system?: string;
24
+ test_frameworks: string[];
25
+ ci_configured: boolean;
26
+ lockfile_present: boolean;
27
+ scanned_commit_sha: string;
28
+ file_count_scanned: number;
29
+ total_loc_approx: number;
30
+ }
31
+ /** Language tag used to gate tool applicability. `'all'` = polyglot tools. */
32
+ export type LanguageTag = 'js' | 'ts' | 'py' | 'go' | 'rust' | 'java' | 'kotlin' | 'ruby' | 'c' | 'cpp' | 'cs' | 'swift' | 'all';
33
+ export type ToolCategory = 'lint' | 'sast' | 'duplication' | 'complexity' | 'dead-code' | 'cycles' | 'coverage' | 'dep-vuln' | 'dep-outdated' | 'dep-unused' | 'dep-license' | 'secrets' | 'loc-stats' | 'typecheck';
34
+ export type InstallerPrereq = 'pipx' | 'go' | 'cargo' | 'npx' | 'gem' | null;
35
+ export interface ToolCatalogEntry {
36
+ /** Stable identifier matching `tool_versions` keys and parser names. */
37
+ id: string;
38
+ /** Human-readable label for UI / logs. */
39
+ label: string;
40
+ category: ToolCategory;
41
+ /** Languages this tool applies to. */
42
+ applies_to: readonly LanguageTag[];
43
+ /** Bash command to detect presence. Should be safe (no side effects). */
44
+ probe: string;
45
+ /**
46
+ * Bash command to install the tool to user-space. null means we never
47
+ * attempt to install (e.g. project-local-only tools accessed via npx).
48
+ */
49
+ install: string | null;
50
+ /** Prerequisite installer that must itself be present. */
51
+ install_prereq: InstallerPrereq;
52
+ /**
53
+ * Command template. Placeholders:
54
+ * %REPO_ROOT% - absolute path to the repo
55
+ * %PKG_MANAGER% - detected package manager (npm/pnpm/yarn)
56
+ * %SCAN_DIR% - per-run scratch dir for tool outputs
57
+ */
58
+ command: string;
59
+ timeout_minutes: number;
60
+ /** Parser module key under `phases/quality-benchmark/parsers/`. */
61
+ parser: string;
62
+ /**
63
+ * Subscores this tool feeds. A single tool may contribute to multiple
64
+ * subscores (e.g. semgrep -> security.sast + performance.n_plus_one).
65
+ */
66
+ subscores: readonly `${Dimension}.${string}`[];
67
+ /**
68
+ * Optional conditional gate that depends on detected_context — used when
69
+ * applies_to is too coarse (e.g. npm-audit only runs with package-lock.json).
70
+ */
71
+ requires?: {
72
+ file_present?: string[];
73
+ package_manager?: string[];
74
+ framework?: string[];
75
+ };
76
+ /**
77
+ * If non-zero exit code is expected even on success (e.g. linters that
78
+ * exit 1 when findings exist), tolerate it as long as stdout parses.
79
+ */
80
+ tolerate_nonzero_exit?: boolean;
81
+ }
82
+ export interface UnavailableTool {
83
+ name: string;
84
+ category: ToolCategory;
85
+ install_command: string | null;
86
+ reason: 'not_found' | 'wrong_version' | 'install_failed' | 'prereq_missing' | 'install_disabled' | 'timed_out' | 'unknown';
87
+ /** Tail of stderr when install or probe failed (<=500 chars). */
88
+ detail?: string;
89
+ }
90
+ export interface ToolRunOutput {
91
+ tool_id: string;
92
+ ran_at: string;
93
+ duration_ms: number;
94
+ exit_code: number;
95
+ /** Count of findings parsed from stdout (post-parser). */
96
+ findings_count: number;
97
+ /** Short human-readable summary, e.g. "23 errors, 41 warnings". */
98
+ summary: string;
99
+ /** True if the tool's parser succeeded; false if output couldn't be parsed. */
100
+ parsed: boolean;
101
+ /** Tail of stderr if non-zero exit and not tolerated (<=500 chars). */
102
+ stderr_tail?: string;
103
+ /** Path on disk where the full tool output is saved for audit. */
104
+ raw_output_path?: string;
105
+ }
106
+ /**
107
+ * Tier 1 — counts only. Used for style/quality linters where individual
108
+ * findings are noisy and the score depends on volume, not specific lines.
109
+ * Examples: eslint, ruff, golangci-lint, clippy, rubocop, mypy, tsc.
110
+ */
111
+ export interface ToolSummaryCounts {
112
+ tier: 'counts';
113
+ counts: {
114
+ errors: number;
115
+ warnings: number;
116
+ info: number;
117
+ };
118
+ }
119
+ /**
120
+ * Tier 2 — counts plus the top-N most severe findings with file:line.
121
+ * Used for security and correctness tools where users need to see
122
+ * specific offending lines. Top findings feed the Verifier and the
123
+ * evidence section of the report.
124
+ * Examples: semgrep, gosec, bandit, gitleaks, npm-audit, pip-audit.
125
+ */
126
+ export interface ToolSummaryFindings {
127
+ tier: 'findings';
128
+ counts: {
129
+ critical: number;
130
+ high: number;
131
+ medium: number;
132
+ low: number;
133
+ total: number;
134
+ };
135
+ top_findings: RawFinding[];
136
+ }
137
+ /**
138
+ * Tier 3 — domain-specific structured metrics. Used for tools whose
139
+ * value is statistics or distributions, not findings.
140
+ * Examples: scc (LOC), lizard (complexity), radon (complexity buckets),
141
+ * license-checker (license breakdown), jscpd (duplication %).
142
+ */
143
+ export interface ToolSummaryMetrics {
144
+ tier: 'metrics';
145
+ metrics: Record<string, unknown>;
146
+ }
147
+ export type ToolSummary = ToolSummaryCounts | ToolSummaryFindings | ToolSummaryMetrics;
148
+ export interface ParsedToolOutput {
149
+ tool_id: string;
150
+ /** Discriminated union of the three summary tiers. */
151
+ summary: ToolSummary;
152
+ /** Short, human-readable one-liner, e.g. "23 errors, 41 warnings". */
153
+ oneliner: string;
154
+ }
155
+ export interface ParserContext {
156
+ repo_root: string;
157
+ /** Repo-relative paths in this set are valid (cheap existence check). */
158
+ files_known?: Set<string>;
159
+ }
160
+ export type ParserFn = (stdout: string, stderr: string, ctx: ParserContext) => ParsedToolOutput;
161
+ export type FindingSource = `tool:${string}` | 'llm_judgment';
162
+ export interface RawFinding {
163
+ /** Repo-relative path. */
164
+ file: string;
165
+ /** 1-based line number. */
166
+ line: number;
167
+ /** One-sentence description. */
168
+ issue: string;
169
+ severity: Severity;
170
+ /** Optional code snippet, <=200 chars. */
171
+ snippet?: string;
172
+ /** Source attribution. */
173
+ source: FindingSource;
174
+ /** Tool-specific rule id when available. */
175
+ rule_id?: string;
176
+ /** CWE identifiers when known. */
177
+ cwe?: string[];
178
+ /** Subscore this finding feeds into, e.g. "security.sast". */
179
+ subscore_key: `${Dimension}.${string}`;
180
+ }
181
+ export interface VerifiedFinding extends RawFinding {
182
+ /** When the same issue is found by multiple tools, all source IDs are merged. */
183
+ sources: FindingSource[];
184
+ /** Internal stable id used for cross-referencing in recommendations. */
185
+ id: string;
186
+ }
187
+ export interface AppliedCheck {
188
+ /** Stable check id, e.g. "arch.circular_deps". */
189
+ id: string;
190
+ /** Tool id from catalog, or "llm_judgment". */
191
+ tool: string;
192
+ /** Weight within parent subscore. */
193
+ weight: number;
194
+ /** True if the check actually executed and produced parseable output. */
195
+ measured: boolean;
196
+ /** Reason it didn't run (when measured = false). */
197
+ unmeasured_reason?: string;
198
+ }
199
+ export type AppliedChecksByDimension = Record<Dimension, AppliedCheck[]>;
200
+ export interface GitSignals {
201
+ authors_90d?: number;
202
+ top_churn_files?: {
203
+ file: string;
204
+ commits_30d: number;
205
+ }[];
206
+ bug_fix_commits_90d?: number;
207
+ commit_message_sample?: string[];
208
+ }
209
+ export interface GitHubIssueSignals {
210
+ open_bugs?: number;
211
+ open_security?: number;
212
+ open_regressions?: number;
213
+ avg_issue_age_days?: number;
214
+ unavailable?: string;
215
+ }
216
+ export interface SentrySignals {
217
+ unresolved_7d?: number;
218
+ top_errors?: {
219
+ title: string;
220
+ count: number;
221
+ first_seen: string;
222
+ last_seen: string;
223
+ }[];
224
+ unavailable?: string;
225
+ }
226
+ export interface ExternalSignals {
227
+ git?: GitSignals;
228
+ github_issues?: GitHubIssueSignals;
229
+ sentry?: SentrySignals;
230
+ }
231
+ export interface Subscore {
232
+ /** Aggregated 0-100 value for this subscore (or null if N/A). */
233
+ value: number | null;
234
+ /** Fraction of weighted checks that actually ran (0..1). */
235
+ measured_coverage: number;
236
+ /** Optional human-readable detail, e.g. "Limited measurement…". */
237
+ summary?: string;
238
+ }
239
+ export interface Recommendation {
240
+ title: string;
241
+ effort: Effort;
242
+ impact: Impact;
243
+ description: string;
244
+ files?: string[];
245
+ /** Optional reference back to the evidence IDs this rec resolves. */
246
+ blocks_evidence?: string[];
247
+ }
248
+ export interface DimensionScore {
249
+ score: number | null;
250
+ grade: Grade;
251
+ summary: string;
252
+ subscores: Record<string, Subscore>;
253
+ evidence: VerifiedFinding[];
254
+ recommendations: Recommendation[];
255
+ n_a_reason: string | null;
256
+ }
257
+ export type DimensionScores = Record<Dimension, DimensionScore>;
258
+ export interface QualityReportPayload {
259
+ rubric_version: typeof RUBRIC_VERSION;
260
+ detected_context: DetectedContext;
261
+ tool_versions: Record<string, string>;
262
+ unavailable_tools: UnavailableTool[];
263
+ applied_checks: AppliedChecksByDimension;
264
+ tool_outputs: Record<string, ToolRunOutput>;
265
+ external_signals: ExternalSignals;
266
+ dimension_scores: DimensionScores;
267
+ dropped_findings: number;
268
+ overall_score: number | null;
269
+ overall_grade: Grade;
270
+ executive_summary: string;
271
+ low_confidence: boolean;
272
+ }
273
+ export type RunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
274
+ export type RunPhase = 'detection' | 'probing' | 'installation' | 'execution' | 'external_signals' | 'verification' | 'synthesis';
275
+ export interface RunProgress {
276
+ tools_total?: number;
277
+ tools_completed?: number;
278
+ current_tool?: string;
279
+ current_dimension?: Dimension;
280
+ message?: string;
281
+ }
282
+ export interface QualityReportRow {
283
+ id: string;
284
+ product_id: string;
285
+ commit_sha: string;
286
+ branch: string | null;
287
+ repo_root: string | null;
288
+ rubric_version: string;
289
+ detected_context: DetectedContext | Record<string, never>;
290
+ tool_versions: Record<string, string>;
291
+ unavailable_tools: UnavailableTool[];
292
+ applied_checks: AppliedChecksByDimension | Record<string, never>;
293
+ tool_outputs: Record<string, ToolRunOutput>;
294
+ external_signals: ExternalSignals;
295
+ dropped_findings: number;
296
+ dimension_scores: DimensionScores | Record<string, never>;
297
+ overall_score: number | null;
298
+ overall_grade: Grade | null;
299
+ executive_summary: string | null;
300
+ low_confidence: boolean;
301
+ status: RunStatus;
302
+ current_phase: RunPhase | null;
303
+ progress: RunProgress;
304
+ error_message: string | null;
305
+ error_phase: RunPhase | null;
306
+ started_at: string | null;
307
+ completed_at: string | null;
308
+ duration_seconds: number | null;
309
+ created_by: string | null;
310
+ created_at: string;
311
+ updated_at: string;
312
+ }
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Type definitions for the quality-benchmark phase.
3
+ *
4
+ * The shapes in this file are the canonical TS counterpart to the JSON
5
+ * structures stored in the `quality_reports` table and emitted by the
6
+ * Claude Agent SDK at the end of a benchmark run. Keep them in sync with:
7
+ * * supabase/migrations/20260515000000_create_quality_reports.sql
8
+ * * packages/edsger/src/phases/quality-benchmark/rubric.md
9
+ */
10
+ // ---------------------------------------------------------------------------
11
+ // Rubric: dimensions, grades, severities
12
+ // ---------------------------------------------------------------------------
13
+ export const DIMENSIONS = [
14
+ 'architecture',
15
+ 'code_quality',
16
+ 'test_coverage',
17
+ 'security',
18
+ 'performance',
19
+ 'documentation',
20
+ 'maintainability',
21
+ 'dependency_health',
22
+ ];
23
+ export const RUBRIC_VERSION = 'v1';
package/package.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "edsger",
3
- "version": "0.55.4",
3
+ "version": "0.56.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "edsger": "dist/index.js"
7
7
  },
8
8
  "scripts": {
9
- "build": "tsc -p tsconfig.build.json && cp -r src/phases/app-store-generation/assets dist/phases/app-store-generation/ && rm -rf dist/skills && mkdir -p dist/skills && cp -r ../edsger-skills/skills/phase dist/skills/phase",
9
+ "build": "tsc -p tsconfig.build.json && cp -r src/phases/app-store-generation/assets dist/phases/app-store-generation/ && cp src/phases/quality-benchmark/rubric.md dist/phases/quality-benchmark/rubric.md && rm -rf dist/skills && mkdir -p dist/skills && cp -r ../edsger-skills/skills/phase dist/skills/phase",
10
10
  "dev": "tsc -p tsconfig.build.json --watch",
11
11
  "lint": "eslint .",
12
12
  "lint:fix": "eslint . --fix",
@@ -49,8 +49,8 @@
49
49
  "commander": "^12.0.0",
50
50
  "cosmiconfig": "^9.0.0",
51
51
  "dotenv": "^16.4.5",
52
- "edsger-contract": "0.2.0",
53
- "edsger-tools": "0.2.0",
52
+ "edsger-contract": "0.3.0",
53
+ "edsger-tools": "0.3.0",
54
54
  "gray-matter": "^4.0.3",
55
55
  "zod": "^4.0.0"
56
56
  },