npm - edsger - Versions diffs - 0.55.4 → 0.56.0 - Mend

edsger 0.55.4 → 0.56.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/commands/quality-benchmark/index.d.ts +32 -0
package/dist/commands/quality-benchmark/index.js +124 -0
package/dist/index.js +24 -0
package/dist/phases/quality-benchmark/index.d.ts +65 -0
package/dist/phases/quality-benchmark/index.js +194 -0
package/dist/phases/quality-benchmark/mcp-server.d.ts +46 -0
package/dist/phases/quality-benchmark/mcp-server.js +252 -0
package/dist/phases/quality-benchmark/parsers.d.ts +22 -0
package/dist/phases/quality-benchmark/parsers.js +1022 -0
package/dist/phases/quality-benchmark/prompts.d.ts +31 -0
package/dist/phases/quality-benchmark/prompts.js +154 -0
package/dist/phases/quality-benchmark/rubric.md +1066 -0
package/dist/phases/quality-benchmark/tool-catalog.d.ts +33 -0
package/dist/phases/quality-benchmark/tool-catalog.js +597 -0
package/dist/phases/quality-benchmark/tool-runner.d.ts +69 -0
package/dist/phases/quality-benchmark/tool-runner.js +399 -0
package/dist/phases/quality-benchmark/types.d.ts +312 -0
package/dist/phases/quality-benchmark/types.js +23 -0
package/package.json +4 -4

package/dist/phases/quality-benchmark/types.d.ts ADDED Viewed

@@ -0,0 +1,312 @@
+/**
+ * Type definitions for the quality-benchmark phase.
+ *
+ * The shapes in this file are the canonical TS counterpart to the JSON
+ * structures stored in the `quality_reports` table and emitted by the
+ * Claude Agent SDK at the end of a benchmark run. Keep them in sync with:
+ *   * supabase/migrations/20260515000000_create_quality_reports.sql
+ *   * packages/edsger/src/phases/quality-benchmark/rubric.md
+ */
+export declare const DIMENSIONS: readonly ["architecture", "code_quality", "test_coverage", "security", "performance", "documentation", "maintainability", "dependency_health"];
+export type Dimension = (typeof DIMENSIONS)[number];
+export type Grade = 'A' | 'B' | 'C' | 'D' | 'F' | 'N/A';
+export type Severity = 'critical' | 'high' | 'medium' | 'low';
+export type Effort = 'low' | 'medium' | 'high';
+export type Impact = 'low' | 'medium' | 'high';
+export declare const RUBRIC_VERSION: "v1";
+export type Archetype = 'library' | 'cli' | 'web-app' | 'backend-service' | 'mobile' | 'data-pipeline' | 'infra' | 'monorepo' | 'embedded' | 'desktop-app' | 'other';
+export interface DetectedContext {
+    archetype: Archetype;
+    primary_languages: string[];
+    frameworks: string[];
+    package_managers: string[];
+    build_system?: string;
+    test_frameworks: string[];
+    ci_configured: boolean;
+    lockfile_present: boolean;
+    scanned_commit_sha: string;
+    file_count_scanned: number;
+    total_loc_approx: number;
+}
+/** Language tag used to gate tool applicability. `'all'` = polyglot tools. */
+export type LanguageTag = 'js' | 'ts' | 'py' | 'go' | 'rust' | 'java' | 'kotlin' | 'ruby' | 'c' | 'cpp' | 'cs' | 'swift' | 'all';
+export type ToolCategory = 'lint' | 'sast' | 'duplication' | 'complexity' | 'dead-code' | 'cycles' | 'coverage' | 'dep-vuln' | 'dep-outdated' | 'dep-unused' | 'dep-license' | 'secrets' | 'loc-stats' | 'typecheck';
+export type InstallerPrereq = 'pipx' | 'go' | 'cargo' | 'npx' | 'gem' | null;
+export interface ToolCatalogEntry {
+    /** Stable identifier matching `tool_versions` keys and parser names. */
+    id: string;
+    /** Human-readable label for UI / logs. */
+    label: string;
+    category: ToolCategory;
+    /** Languages this tool applies to. */
+    applies_to: readonly LanguageTag[];
+    /** Bash command to detect presence. Should be safe (no side effects). */
+    probe: string;
+    /**
+     * Bash command to install the tool to user-space. null means we never
+     * attempt to install (e.g. project-local-only tools accessed via npx).
+     */
+    install: string | null;
+    /** Prerequisite installer that must itself be present. */
+    install_prereq: InstallerPrereq;
+    /**
+     * Command template. Placeholders:
+     *   %REPO_ROOT%   - absolute path to the repo
+     *   %PKG_MANAGER% - detected package manager (npm/pnpm/yarn)
+     *   %SCAN_DIR%    - per-run scratch dir for tool outputs
+     */
+    command: string;
+    timeout_minutes: number;
+    /** Parser module key under `phases/quality-benchmark/parsers/`. */
+    parser: string;
+    /**
+     * Subscores this tool feeds. A single tool may contribute to multiple
+     * subscores (e.g. semgrep -> security.sast + performance.n_plus_one).
+     */
+    subscores: readonly `${Dimension}.${string}`[];
+    /**
+     * Optional conditional gate that depends on detected_context — used when
+     * applies_to is too coarse (e.g. npm-audit only runs with package-lock.json).
+     */
+    requires?: {
+        file_present?: string[];
+        package_manager?: string[];
+        framework?: string[];
+    };
+    /**
+     * If non-zero exit code is expected even on success (e.g. linters that
+     * exit 1 when findings exist), tolerate it as long as stdout parses.
+     */
+    tolerate_nonzero_exit?: boolean;
+}
+export interface UnavailableTool {
+    name: string;
+    category: ToolCategory;
+    install_command: string | null;
+    reason: 'not_found' | 'wrong_version' | 'install_failed' | 'prereq_missing' | 'install_disabled' | 'timed_out' | 'unknown';
+    /** Tail of stderr when install or probe failed (<=500 chars). */
+    detail?: string;
+}
+export interface ToolRunOutput {
+    tool_id: string;
+    ran_at: string;
+    duration_ms: number;
+    exit_code: number;
+    /** Count of findings parsed from stdout (post-parser). */
+    findings_count: number;
+    /** Short human-readable summary, e.g. "23 errors, 41 warnings". */
+    summary: string;
+    /** True if the tool's parser succeeded; false if output couldn't be parsed. */
+    parsed: boolean;
+    /** Tail of stderr if non-zero exit and not tolerated (<=500 chars). */
+    stderr_tail?: string;
+    /** Path on disk where the full tool output is saved for audit. */
+    raw_output_path?: string;
+}
+/**
+ * Tier 1 — counts only. Used for style/quality linters where individual
+ * findings are noisy and the score depends on volume, not specific lines.
+ * Examples: eslint, ruff, golangci-lint, clippy, rubocop, mypy, tsc.
+ */
+export interface ToolSummaryCounts {
+    tier: 'counts';
+    counts: {
+        errors: number;
+        warnings: number;
+        info: number;
+    };
+}
+/**
+ * Tier 2 — counts plus the top-N most severe findings with file:line.
+ * Used for security and correctness tools where users need to see
+ * specific offending lines. Top findings feed the Verifier and the
+ * evidence section of the report.
+ * Examples: semgrep, gosec, bandit, gitleaks, npm-audit, pip-audit.
+ */
+export interface ToolSummaryFindings {
+    tier: 'findings';
+    counts: {
+        critical: number;
+        high: number;
+        medium: number;
+        low: number;
+        total: number;
+    };
+    top_findings: RawFinding[];
+}
+/**
+ * Tier 3 — domain-specific structured metrics. Used for tools whose
+ * value is statistics or distributions, not findings.
+ * Examples: scc (LOC), lizard (complexity), radon (complexity buckets),
+ * license-checker (license breakdown), jscpd (duplication %).
+ */
+export interface ToolSummaryMetrics {
+    tier: 'metrics';
+    metrics: Record<string, unknown>;
+}
+export type ToolSummary = ToolSummaryCounts | ToolSummaryFindings | ToolSummaryMetrics;
+export interface ParsedToolOutput {
+    tool_id: string;
+    /** Discriminated union of the three summary tiers. */
+    summary: ToolSummary;
+    /** Short, human-readable one-liner, e.g. "23 errors, 41 warnings". */
+    oneliner: string;
+}
+export interface ParserContext {
+    repo_root: string;
+    /** Repo-relative paths in this set are valid (cheap existence check). */
+    files_known?: Set<string>;
+}
+export type ParserFn = (stdout: string, stderr: string, ctx: ParserContext) => ParsedToolOutput;
+export type FindingSource = `tool:${string}` | 'llm_judgment';
+export interface RawFinding {
+    /** Repo-relative path. */
+    file: string;
+    /** 1-based line number. */
+    line: number;
+    /** One-sentence description. */
+    issue: string;
+    severity: Severity;
+    /** Optional code snippet, <=200 chars. */
+    snippet?: string;
+    /** Source attribution. */
+    source: FindingSource;
+    /** Tool-specific rule id when available. */
+    rule_id?: string;
+    /** CWE identifiers when known. */
+    cwe?: string[];
+    /** Subscore this finding feeds into, e.g. "security.sast". */
+    subscore_key: `${Dimension}.${string}`;
+}
+export interface VerifiedFinding extends RawFinding {
+    /** When the same issue is found by multiple tools, all source IDs are merged. */
+    sources: FindingSource[];
+    /** Internal stable id used for cross-referencing in recommendations. */
+    id: string;
+}
+export interface AppliedCheck {
+    /** Stable check id, e.g. "arch.circular_deps". */
+    id: string;
+    /** Tool id from catalog, or "llm_judgment". */
+    tool: string;
+    /** Weight within parent subscore. */
+    weight: number;
+    /** True if the check actually executed and produced parseable output. */
+    measured: boolean;
+    /** Reason it didn't run (when measured = false). */
+    unmeasured_reason?: string;
+}
+export type AppliedChecksByDimension = Record<Dimension, AppliedCheck[]>;
+export interface GitSignals {
+    authors_90d?: number;
+    top_churn_files?: {
+        file: string;
+        commits_30d: number;
+    }[];
+    bug_fix_commits_90d?: number;
+    commit_message_sample?: string[];
+}
+export interface GitHubIssueSignals {
+    open_bugs?: number;
+    open_security?: number;
+    open_regressions?: number;
+    avg_issue_age_days?: number;
+    unavailable?: string;
+}
+export interface SentrySignals {
+    unresolved_7d?: number;
+    top_errors?: {
+        title: string;
+        count: number;
+        first_seen: string;
+        last_seen: string;
+    }[];
+    unavailable?: string;
+}
+export interface ExternalSignals {
+    git?: GitSignals;
+    github_issues?: GitHubIssueSignals;
+    sentry?: SentrySignals;
+}
+export interface Subscore {
+    /** Aggregated 0-100 value for this subscore (or null if N/A). */
+    value: number | null;
+    /** Fraction of weighted checks that actually ran (0..1). */
+    measured_coverage: number;
+    /** Optional human-readable detail, e.g. "Limited measurement…". */
+    summary?: string;
+}
+export interface Recommendation {
+    title: string;
+    effort: Effort;
+    impact: Impact;
+    description: string;
+    files?: string[];
+    /** Optional reference back to the evidence IDs this rec resolves. */
+    blocks_evidence?: string[];
+}
+export interface DimensionScore {
+    score: number | null;
+    grade: Grade;
+    summary: string;
+    subscores: Record<string, Subscore>;
+    evidence: VerifiedFinding[];
+    recommendations: Recommendation[];
+    n_a_reason: string | null;
+}
+export type DimensionScores = Record<Dimension, DimensionScore>;
+export interface QualityReportPayload {
+    rubric_version: typeof RUBRIC_VERSION;
+    detected_context: DetectedContext;
+    tool_versions: Record<string, string>;
+    unavailable_tools: UnavailableTool[];
+    applied_checks: AppliedChecksByDimension;
+    tool_outputs: Record<string, ToolRunOutput>;
+    external_signals: ExternalSignals;
+    dimension_scores: DimensionScores;
+    dropped_findings: number;
+    overall_score: number | null;
+    overall_grade: Grade;
+    executive_summary: string;
+    low_confidence: boolean;
+}
+export type RunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
+export type RunPhase = 'detection' | 'probing' | 'installation' | 'execution' | 'external_signals' | 'verification' | 'synthesis';
+export interface RunProgress {
+    tools_total?: number;
+    tools_completed?: number;
+    current_tool?: string;
+    current_dimension?: Dimension;
+    message?: string;
+}
+export interface QualityReportRow {
+    id: string;
+    product_id: string;
+    commit_sha: string;
+    branch: string | null;
+    repo_root: string | null;
+    rubric_version: string;
+    detected_context: DetectedContext | Record<string, never>;
+    tool_versions: Record<string, string>;
+    unavailable_tools: UnavailableTool[];
+    applied_checks: AppliedChecksByDimension | Record<string, never>;
+    tool_outputs: Record<string, ToolRunOutput>;
+    external_signals: ExternalSignals;
+    dropped_findings: number;
+    dimension_scores: DimensionScores | Record<string, never>;
+    overall_score: number | null;
+    overall_grade: Grade | null;
+    executive_summary: string | null;
+    low_confidence: boolean;
+    status: RunStatus;
+    current_phase: RunPhase | null;
+    progress: RunProgress;
+    error_message: string | null;
+    error_phase: RunPhase | null;
+    started_at: string | null;
+    completed_at: string | null;
+    duration_seconds: number | null;
+    created_by: string | null;
+    created_at: string;
+    updated_at: string;
+}

package/dist/phases/quality-benchmark/types.js ADDED Viewed

@@ -0,0 +1,23 @@
+/**
+ * Type definitions for the quality-benchmark phase.
+ *
+ * The shapes in this file are the canonical TS counterpart to the JSON
+ * structures stored in the `quality_reports` table and emitted by the
+ * Claude Agent SDK at the end of a benchmark run. Keep them in sync with:
+ *   * supabase/migrations/20260515000000_create_quality_reports.sql
+ *   * packages/edsger/src/phases/quality-benchmark/rubric.md
+ */
+// ---------------------------------------------------------------------------
+// Rubric: dimensions, grades, severities
+// ---------------------------------------------------------------------------
+export const DIMENSIONS = [
+    'architecture',
+    'code_quality',
+    'test_coverage',
+    'security',
+    'performance',
+    'documentation',
+    'maintainability',
+    'dependency_health',
+];
+export const RUBRIC_VERSION = 'v1';

package/package.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "edsger",
-  "version": "0.55.4",
+  "version": "0.56.0",
   "type": "module",
   "bin": {
     "edsger": "dist/index.js"
   },
   "scripts": {
-    "build": "tsc -p tsconfig.build.json && cp -r src/phases/app-store-generation/assets dist/phases/app-store-generation/ && rm -rf dist/skills && mkdir -p dist/skills && cp -r ../edsger-skills/skills/phase dist/skills/phase",
+    "build": "tsc -p tsconfig.build.json && cp -r src/phases/app-store-generation/assets dist/phases/app-store-generation/ && cp src/phases/quality-benchmark/rubric.md dist/phases/quality-benchmark/rubric.md && rm -rf dist/skills && mkdir -p dist/skills && cp -r ../edsger-skills/skills/phase dist/skills/phase",
     "dev": "tsc -p tsconfig.build.json --watch",
     "lint": "eslint .",
     "lint:fix": "eslint . --fix",
@@ -49,8 +49,8 @@
     "commander": "^12.0.0",
     "cosmiconfig": "^9.0.0",
     "dotenv": "^16.4.5",
-    "edsger-contract": "0.2.0",
-    "edsger-tools": "0.2.0",
+    "edsger-contract": "0.3.0",
+    "edsger-tools": "0.3.0",
     "gray-matter": "^4.0.3",
     "zod": "^4.0.0"
   },