npm - cto-ai-cli - Versions diffs - 6.1.0 → 7.1.0 - Mend

cto-ai-cli 6.1.0 → 7.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/engine/index.d.ts CHANGED Viewed

@@ -247,17 +247,6 @@ interface SelectionInput {
 }
 declare function selectContext(input: SelectionInput): Promise<ContextSelection>;
-declare function scoreAllFiles(files: AnalyzedFile[], graph: ProjectGraph, weights?: RiskWeights): void;
-declare function scoreFile(file: AnalyzedFile, graph: ProjectGraph, weights?: RiskWeights): number;
-declare function calculateCoverage(targetPaths: string[], includedPaths: string[], allFiles: AnalyzedFile[], graph: ProjectGraph, depth?: number): CoverageResult;
-declare function getPruneLevelForRisk(riskScore: number): PruneLevel;
-declare function optimizeBudget(files: AnalyzedFile[], budget: number): Promise<BudgetPlan>;
-declare function pruneFile(file: AnalyzedFile, level: PruneLevel): Promise<PrunedContent>;
-declare function pruneFiles(files: AnalyzedFile[], levelFn: (file: AnalyzedFile) => PruneLevel): Promise<PrunedContent[]>;
 /**
  * TF-IDF Semantic Matching Engine
  *
@@ -326,6 +315,82 @@ declare function tokenize(text: string): string[];
  */
 declare function boostByPath(matches: SemanticMatch[], allFiles: string[], taskDescription: string): SemanticMatch[];
+/**
+ * Persistent TF-IDF Index Cache
+ *
+ * Problem: Building a TF-IDF index reads every source file and tokenizes it.
+ * For a 50K-file repo, that's 5-10 seconds per query. With 20K devs running
+ * queries concurrently, re-indexing on every call is unacceptable.
+ *
+ * Solution: Persist the index to disk with per-file mtime tracking.
+ * On subsequent queries, only re-index files that changed since last build.
+ *
+ * Storage: .cto/index-cache.json
+ *   {
+ *     version: 2,
+ *     builtAt: ISO timestamp,
+ *     files: { [relativePath]: { mtime: number, terms: { [term]: count }, length: number } },
+ *     idf: { [term]: number },
+ *     avgDocLength: number,
+ *     totalDocs: number,
+ *   }
+ *
+ * Invalidation:
+ *   - Per-file: mtime changed → re-tokenize that file
+ *   - New files: not in cache → tokenize and add
+ *   - Deleted files: in cache but not on disk → remove
+ *   - Version bump: cache format changed → full rebuild
+ *
+ * The IDF values are recomputed after any incremental update because
+ * document frequency changes affect all terms globally.
+ */
+interface IndexCacheStats {
+    /** Total files in the index */
+    totalFiles: number;
+    /** Files that were re-indexed (changed or new) */
+    updatedFiles: number;
+    /** Files removed from cache (deleted from disk) */
+    removedFiles: number;
+    /** Files reused from cache (unchanged) */
+    cachedFiles: number;
+    /** Whether the cache existed before this build */
+    cacheHit: boolean;
+    /** Time to build/update the index (ms) */
+    buildTimeMs: number;
+}
+/**
+ * Build or update a TF-IDF index with disk caching.
+ *
+ * First call: builds full index and writes cache to .cto/index-cache.json
+ * Subsequent calls: reads cache, updates only changed files, rewrites cache
+ *
+ * @param projectPath - Root of the project (for .cto/ directory)
+ * @param files - All files to index: { relativePath, absolutePath, content? }
+ *   If content is provided, it's used directly. Otherwise, the file is read from disk.
+ * @returns The TF-IDF index + stats about cache hits/misses
+ */
+declare function buildIndexCached(projectPath: string, files: {
+    relativePath: string;
+    absolutePath: string;
+    content?: string;
+}[]): {
+    index: TfIdfIndex;
+    stats: IndexCacheStats;
+};
+/**
+ * Invalidate the entire cache (force full rebuild on next call).
+ */
+declare function invalidateCache(projectPath: string): void;
+/**
+ * Get cache stats without rebuilding.
+ */
+declare function getCacheInfo(projectPath: string): {
+    exists: boolean;
+    fileCount: number;
+    builtAt: string | null;
+};
 /**
  * Usage Learner — Gets smarter with every use.
  *
@@ -403,6 +468,477 @@ declare function getLearnerStats(model: LearnerModel): {
  */
 declare function extractPattern(filePath: string): string;
+/**
+ * Multi-Repo Context Selection
+ *
+ * Discovers sibling repositories in a workspace and queries them
+ * for relevant files when selecting context for a task.
+ *
+ * How it works:
+ *   1. Discover sibling repos (scan parent dir or use explicit paths)
+ *   2. For each sibling: list source files, read contents, build TF-IDF index
+ *   3. Query each sibling's index with the task description
+ *   4. Return ranked matches with repo attribution
+ *
+ * This is NOT the cross-repo learning system (cross-repo.ts).
+ * This is actual multi-repo file discovery and querying.
+ */
+interface SiblingRepo {
+    /** Absolute path to the repo root */
+    path: string;
+    /** Short name (directory name) */
+    name: string;
+    /** Detected stack (from package.json, tsconfig, etc.) */
+    stack: string[];
+    /** Number of source files found */
+    fileCount: number;
+}
+interface SiblingMatch {
+    /** Which sibling repo this file belongs to */
+    repoName: string;
+    /** Absolute path to the repo */
+    repoPath: string;
+    /** Relative path within the sibling repo */
+    relativePath: string;
+    /** Absolute path to the file */
+    absolutePath: string;
+    /** Semantic relevance score (0-1) */
+    score: number;
+    /** File content */
+    content: string;
+    /** Estimated token count */
+    tokens: number;
+}
+interface MultiRepoResult {
+    /** Sibling repos that were discovered/used */
+    siblings: SiblingRepo[];
+    /** Top matches from sibling repos, ranked by score */
+    matches: SiblingMatch[];
+    /** Total time spent indexing + querying (ms) */
+    timeMs: number;
+}
+/**
+ * Discover sibling repositories by scanning the parent directory.
+ * A directory is a "repo" if it contains a known project marker file.
+ */
+declare function discoverSiblingRepos(projectPath: string): SiblingRepo[];
+/**
+ * Query sibling repos for files relevant to a task.
+ *
+ * For each sibling:
+ *   1. List source files
+ *   2. Build TF-IDF index from file contents
+ *   3. Query with task description
+ *   4. Return top matches with content
+ *
+ * @param siblings - Sibling repos to query (from discoverSiblingRepos or explicit paths)
+ * @param task - Task description to match against
+ * @param maxPerRepo - Max matches per repo (default 5)
+ * @param minScore - Minimum semantic score to include (default 0.3)
+ */
+declare function querySiblingRepos(siblings: SiblingRepo[], task: string, maxPerRepo?: number, minScore?: number): MultiRepoResult;
+/**
+ * Parse explicit repo paths from a comma-separated string.
+ * Resolves relative paths against the current project's parent directory.
+ */
+declare function parseSiblingPaths(pathsStr: string, projectPath: string): SiblingRepo[];
+/**
+ * Render multi-repo results for CLI output.
+ */
+declare function renderMultiRepoSummary(result: MultiRepoResult): string;
+/**
+ * Shared Context Pipeline
+ *
+ * Single function that runs the full context selection pipeline:
+ *   read files → build TF-IDF index → query → boost → load learner → selectContext
+ *
+ * Used by both CLI and MCP server. No duplication.
+ */
+interface ContextPipelineInput {
+    projectPath: string;
+    task: string;
+    analysis: ProjectAnalysis;
+    budget?: number;
+    /** Optional sibling repos for cross-repo context */
+    siblingRepos?: SiblingRepo[];
+}
+interface ContextPipelineResult {
+    selection: ContextSelection;
+    taskType: string;
+    fileContentMap: Map<string, string>;
+    semanticMap: Map<string, SemanticMatch>;
+    learnerMap: Map<string, LearnerBoost>;
+    /** Cross-repo results (only present if siblingRepos were provided) */
+    multiRepo?: MultiRepoResult;
+    /** Index cache stats (how many files were cached vs rebuilt) */
+    indexCacheStats?: IndexCacheStats;
+}
+/**
+ * Run the full context selection pipeline.
+ * One function, used everywhere. No copy-paste.
+ */
+declare function runContextPipeline(input: ContextPipelineInput): Promise<ContextPipelineResult>;
+declare function scoreAllFiles(files: AnalyzedFile[], graph: ProjectGraph, weights?: RiskWeights): void;
+declare function scoreFile(file: AnalyzedFile, graph: ProjectGraph, weights?: RiskWeights): number;
+declare function calculateCoverage(targetPaths: string[], includedPaths: string[], allFiles: AnalyzedFile[], graph: ProjectGraph, depth?: number): CoverageResult;
+declare function getPruneLevelForRisk(riskScore: number): PruneLevel;
+declare function optimizeBudget(files: AnalyzedFile[], budget: number): Promise<BudgetPlan>;
+declare function pruneFile(file: AnalyzedFile, level: PruneLevel): Promise<PrunedContent>;
+declare function pruneFiles(files: AnalyzedFile[], levelFn: (file: AnalyzedFile) => PruneLevel): Promise<PrunedContent[]>;
+/**
+ * Closed-Loop A/B Testing Engine
+ *
+ * The missing piece: the feedback system records data but never closes the loop.
+ * This module adds real experimentation:
+ *
+ *   1. Define experiments with control + variant strategies
+ *   2. Assign requests to groups (deterministic hashing for consistency)
+ *   3. Collect outcomes per group
+ *   4. Compute statistical significance (z-test for proportions)
+ *   5. Auto-promote winning variants when significance threshold met
+ *
+ * Example experiment:
+ *   - Control: default composite scoring (semantic 0.55, risk 0.25, learner 0.20)
+ *   - Variant: reranker-heavy scoring (reranker 0.70, risk 0.15, learner 0.15)
+ *   - Metric: acceptance rate
+ *   - Significance: p < 0.05
+ *
+ * Storage: .cto/experiments.json
+ * Design: Pure functions. No external deps. Deterministic assignment.
+ */
+interface Experiment {
+    /** Unique experiment ID */
+    id: string;
+    /** Human-readable name */
+    name: string;
+    /** What we're testing */
+    description: string;
+    /** Current status */
+    status: 'running' | 'concluded' | 'paused';
+    /** When the experiment started */
+    startedAt: string;
+    /** When it concluded (if applicable) */
+    concludedAt?: string;
+    /** Traffic split: 0.5 = 50/50 */
+    trafficSplit: number;
+    /** Minimum observations per group before significance test */
+    minObservations: number;
+    /** P-value threshold for significance */
+    significanceThreshold: number;
+    /** Control group config */
+    control: ExperimentGroup;
+    /** Variant group config */
+    variant: ExperimentGroup;
+    /** Conclusion (when experiment ends) */
+    conclusion?: ExperimentConclusion;
+}
+interface ExperimentGroup {
+    /** Group name */
+    name: string;
+    /** Strategy parameters (passed to the engine) */
+    params: Record<string, unknown>;
+    /** Collected metrics */
+    metrics: GroupMetrics;
+}
+interface GroupMetrics {
+    /** Total observations */
+    total: number;
+    /** Successful outcomes (accepted) */
+    successes: number;
+    /** Accept rate = successes / total */
+    acceptRate: number;
+    /** Average time to accept (ms) */
+    avgTimeToAccept: number;
+    /** Compilable rate */
+    compilableRate: number;
+    /** Sum of time values (for running average) */
+    timeSum: number;
+    /** Count of compilable results */
+    compilableCount: number;
+}
+interface ExperimentConclusion {
+    /** Which group won */
+    winner: 'control' | 'variant' | 'no_difference';
+    /** Observed p-value */
+    pValue: number;
+    /** Effect size (difference in accept rates) */
+    effectSize: number;
+    /** Confidence interval for effect size */
+    confidenceInterval: [number, number];
+    /** Human-readable summary */
+    summary: string;
+}
+interface AssignmentResult {
+    /** Which group the request was assigned to */
+    group: 'control' | 'variant';
+    /** The strategy params for this group */
+    params: Record<string, unknown>;
+    /** Experiment ID for tracking */
+    experimentId: string;
+}
+declare function loadExperiments(projectPath: string): Experiment[];
+declare function saveExperiments(projectPath: string, experiments: Experiment[]): void;
+declare function createExperiment(id: string, name: string, description: string, controlParams: Record<string, unknown>, variantParams: Record<string, unknown>, options?: {
+    trafficSplit?: number;
+    minObservations?: number;
+    significanceThreshold?: number;
+}): Experiment;
+/**
+ * Assign a request to control or variant group.
+ * Uses deterministic hashing: same (experiment_id, task) → same group.
+ * This ensures consistency (retries get the same group).
+ */
+declare function assignGroup(experiment: Experiment, task: string): AssignmentResult | null;
+/**
+ * Record an outcome for an experiment group.
+ * Updates running statistics and checks for significance.
+ */
+declare function recordOutcome(experiment: Experiment, group: 'control' | 'variant', outcome: {
+    accepted: boolean;
+    compilable?: boolean;
+    timeToAcceptMs?: number;
+}): Experiment;
+interface SignificanceResult {
+    /** Two-sided p-value */
+    pValue: number;
+    /** Z-score */
+    zScore: number;
+    /** Effect size: variant rate - control rate */
+    effectSize: number;
+    /** 95% confidence interval for effect size */
+    confidenceInterval: [number, number];
+    /** Whether the result is significant at the experiment's threshold */
+    significant: boolean;
+}
+/**
+ * Two-proportion z-test for A/B testing.
+ *
+ * H0: p_control = p_variant
+ * H1: p_control ≠ p_variant (two-sided)
+ *
+ * This is the standard test for comparing conversion rates.
+ */
+declare function testSignificance(experiment: Experiment): SignificanceResult;
+/**
+ * Get the active experiment for this project (if any).
+ */
+declare function getActiveExperiment(experiments: Experiment[]): Experiment | null;
+/**
+ * Get all concluded experiments with their results.
+ */
+declare function getConcludedExperiments(experiments: Experiment[]): Experiment[];
+/**
+ * Render experiment summary for CLI/dashboard.
+ */
+declare function renderExperimentSummary(experiment: Experiment): string;
+/**
+ * Polyglot Dependency Graph — Import Parsing for Python, Go, Java, Rust
+ *
+ * Problem: The existing graph.ts uses ts-morph (AST) which only handles TS/JS.
+ * For a 20K-dev org with Java, Python, Go, Rust — the dependency graph is empty.
+ * No graph → no hub detection → no risk scoring → useless context selection.
+ *
+ * Solution: Regex-based import parsers for each language. Not AST-accurate, but
+ * good enough for dependency graph construction. We don't need perfect resolution;
+ * we need to know "file A probably depends on file B" for hub/risk scoring.
+ *
+ * Each parser:
+ *   1. Extracts import specifiers from file content using regex
+ *   2. Resolves specifiers to relative file paths within the project
+ *   3. Returns edges: { from: relativePath, to: relativePath }
+ *
+ * Supported languages:
+ *   - Python: import x, from x import y, relative imports
+ *   - Go: import "pkg", import ( "pkg" ... )
+ *   - Java: import com.example.Foo, package declaration
+ *   - Rust: use crate::x, mod x, use super::x
+ *
+ * Design: Pure functions. No external deps. Deterministic.
+ */
+type SupportedLanguage = 'python' | 'go' | 'java' | 'rust' | 'typescript';
+interface ImportSpec {
+    /** The raw import specifier as written in the source */
+    raw: string;
+    /** Whether this is a relative import */
+    isRelative: boolean;
+}
+declare function detectLanguage(filePath: string): SupportedLanguage | null;
+/**
+ * Parse imports from a non-TS file and resolve to project-relative paths.
+ * Returns dependency edges for the project graph.
+ *
+ * @param filePath - Absolute path to the source file
+ * @param relativePath - Project-relative path (e.g., "src/auth/login.py")
+ * @param projectPath - Absolute path to the project root
+ * @param allRelativePaths - Set of all file paths in the project (for resolution)
+ * @param content - Optional file content (read from disk if not provided)
+ */
+declare function parseImports(filePath: string, relativePath: string, projectPath: string, allRelativePaths: Set<string>, content?: string): GraphEdge[];
+/**
+ * Parse imports for ALL non-TS files in a project.
+ * Call this alongside ts-morph's buildProjectGraph for TS files.
+ */
+declare function parseAllPolyglotImports(files: {
+    relativePath: string;
+    absolutePath: string;
+    content?: string;
+}[], projectPath: string): GraphEdge[];
+/**
+ * Estimate cyclomatic complexity from source code using regex.
+ * Not AST-accurate but good enough for risk scoring.
+ */
+declare function estimateComplexity(content: string, lang: SupportedLanguage): number;
+/**
+ * Multi-Stage Reranker
+ *
+ * The problem: BM25 retrieval gets 54% precision. Adding risk scoring drops it
+ * to 33% because high-risk irrelevant files fill the budget.
+ *
+ * The solution: a 3-stage pipeline that turns BM25 candidates into a precision-
+ * optimized selection:
+ *
+ *   Stage 1: RETRIEVE (BM25 top-K) — already done by tfidf.ts
+ *   Stage 2: RERANK (multi-signal rescoring)
+ *     - Term coverage: what fraction of UNIQUE query terms does the file match?
+ *     - Term specificity: are the matched terms rare (high IDF) or generic?
+ *     - Bigram proximity: do query terms appear near each other in the file?
+ *     - Dependency signal: is this file in the dependency cone of a top match?
+ *     - Path relevance: does the file path match query terms?
+ *   Stage 3: QUALITY GATE (adaptive cutoff)
+ *     - Hard floor: files below absolute threshold are excluded
+ *     - Elbow detection: find the natural drop-off point in scores
+ *     - Don't fill budget with noise — stop when quality degrades
+ *
+ * This is a cross-encoder-like approach using hand-crafted features instead
+ * of a neural model. No ML dependencies. Deterministic.
+ */
+interface RerankInput {
+    /** Task description */
+    task: string;
+    /** BM25 candidates from tfidf.query() */
+    candidates: SemanticMatch[];
+    /** The TF-IDF index (for IDF weights) */
+    index: TfIdfIndex;
+    /** File contents for bigram proximity analysis */
+    fileContents: Map<string, string>;
+    /** Dependency edges: from → to[] */
+    dependencies: Map<string, string[]>;
+    /** All file paths in the project */
+    allFilePaths: string[];
+}
+interface RerankResult {
+    /** Reranked and filtered files — only high-quality matches */
+    files: RerankedFile[];
+    /** Files that were cut by the quality gate */
+    filtered: FilteredFile[];
+    /** The quality threshold used */
+    qualityThreshold: number;
+    /** Telemetry data for observability and debugging */
+    telemetry: RerankTelemetry;
+}
+interface RerankTelemetry {
+    /** Total candidates received from BM25 */
+    candidatesIn: number;
+    /** Files that passed the quality gate */
+    candidatesOut: number;
+    /** Files filtered out */
+    candidatesFiltered: number;
+    /** Timing in milliseconds */
+    durationMs: number;
+    /** Signal weight configuration used */
+    weights: typeof WEIGHTS;
+    /** Quality gate thresholds used */
+    gateConfig: {
+        absoluteFloor: number;
+        elbowDropRatio: number;
+        minTermCoverage: number;
+    };
+    /** Aggregate signal statistics across all candidates (before gate) */
+    signalStats: {
+        termCoverage: {
+            min: number;
+            max: number;
+            mean: number;
+            median: number;
+        };
+        termSpecificity: {
+            min: number;
+            max: number;
+            mean: number;
+            median: number;
+        };
+        bigramProximity: {
+            min: number;
+            max: number;
+            mean: number;
+            median: number;
+        };
+        dependencySignal: {
+            min: number;
+            max: number;
+            mean: number;
+            median: number;
+        };
+        pathRelevance: {
+            min: number;
+            max: number;
+            mean: number;
+            median: number;
+        };
+    };
+    /** Filter reason breakdown: reason → count */
+    filterReasons: Record<string, number>;
+    /** Score distribution: [min, p25, p50, p75, max] across all scored candidates */
+    scoreDistribution: [number, number, number, number, number];
+    /** Number of unique query terms */
+    queryTermCount: number;
+    /** Size of the dependency relevance cone */
+    relevanceConeSize: number;
+}
+interface RerankedFile {
+    filePath: string;
+    /** Final reranked score (0-1) */
+    score: number;
+    /** Original BM25 score */
+    bm25Score: number;
+    /** Individual signal scores */
+    signals: {
+        termCoverage: number;
+        termSpecificity: number;
+        bigramProximity: number;
+        dependencySignal: number;
+        pathRelevance: number;
+    };
+}
+interface FilteredFile {
+    filePath: string;
+    score: number;
+    reason: string;
+}
+declare const WEIGHTS: {
+    termCoverage: number;
+    termSpecificity: number;
+    bigramProximity: number;
+    dependencySignal: number;
+    pathRelevance: number;
+};
+/**
+ * Rerank BM25 candidates using multi-signal scoring + quality gate.
+ * Returns only files that pass the quality threshold.
+ */
+declare function rerank(input: RerankInput): RerankResult;
 declare function countTokensTiktoken(text: string): number;
 declare function countTokensChars4(sizeInBytes: number): number;
 declare function estimateTokens(content: string, sizeInBytes: number, method?: 'chars4' | 'tiktoken'): number;
@@ -470,4 +1006,4 @@ interface AuditOptions {
 }
 declare function auditProject(projectPath: string, filePaths: string[], options?: AuditOptions): Promise<AuditResult>;
-export { CtoError, type CtoErrorCode, type DocumentVector, type LearnerBoost, type LearnerBoostInput, type LearnerModel, type LogEntry, type LogLevel, type Logger, type PatternStats, type SecretFinding, type SecretType, type SelectionInput, type SemanticMatch, type SemanticScore, type TfIdfIndex, analyzeProject, auditProject, bfsBidirectional, boostByPath, buildAdjacencyList, buildIndex, buildProjectGraph, calculateCoverage, classifyFileKind, countTokensChars4, countTokensTiktoken, createLogger, createProject, detectStack, estimateFileTokens, estimateTokens, extractPattern, freeEncoder, getLearnerBoosts, getLearnerStats, getPruneLevelForRisk, isCtoError, loadLearner, optimizeBudget, pruneFile, pruneFiles, query, recordSelection, sanitizeContent, saveLearner, scanContentForSecrets, scanFileForSecrets, scanProjectForSecrets, scoreAllFiles, scoreFile, selectContext, setJsonLogging, setLogLevel, similarity, tokenize, walkProject, wrapError };
+export { type AssignmentResult, type ContextPipelineInput, type ContextPipelineResult, CtoError, type CtoErrorCode, type DocumentVector, type Experiment, type ExperimentConclusion, type ExperimentGroup, type FilteredFile, type GroupMetrics, type ImportSpec, type IndexCacheStats, type LearnerBoost, type LearnerBoostInput, type LearnerModel, type LogEntry, type LogLevel, type Logger, type MultiRepoResult, type PatternStats, type RerankInput, type RerankResult, type RerankedFile, type SecretFinding, type SecretType, type SelectionInput, type SemanticMatch, type SemanticScore, type SiblingMatch, type SiblingRepo, type SignificanceResult, type SupportedLanguage, type TfIdfIndex, analyzeProject, assignGroup, auditProject, bfsBidirectional, boostByPath, buildAdjacencyList, buildIndex, buildIndexCached, buildProjectGraph, calculateCoverage, classifyFileKind, countTokensChars4, countTokensTiktoken, createExperiment, createLogger, createProject, detectLanguage, detectStack, discoverSiblingRepos, estimateComplexity, estimateFileTokens, estimateTokens, extractPattern, freeEncoder, getActiveExperiment, getCacheInfo, getConcludedExperiments, getLearnerBoosts, getLearnerStats, getPruneLevelForRisk, invalidateCache, isCtoError, loadExperiments, loadLearner, optimizeBudget, parseAllPolyglotImports, parseImports, parseSiblingPaths, pruneFile, pruneFiles, query, querySiblingRepos, recordOutcome, recordSelection, renderExperimentSummary, renderMultiRepoSummary, rerank, runContextPipeline, sanitizeContent, saveExperiments, saveLearner, scanContentForSecrets, scanFileForSecrets, scanProjectForSecrets, scoreAllFiles, scoreFile, selectContext, setJsonLogging, setLogLevel, similarity, testSignificance, tokenize, walkProject, wrapError };