npm - commit-analyzer - Versions diffs - 1.0.1 - Mend

commit-analyzer 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/.claude/settings.local.json +12 -0
package/README.md +243 -0
package/csv-to-report-prompt.md +97 -0
package/package.json +39 -0
package/prompt.md +69 -0
package/src/cli.ts +143 -0
package/src/csv-reader.ts +180 -0
package/src/csv.ts +40 -0
package/src/errors.ts +49 -0
package/src/git.ts +112 -0
package/src/index.ts +283 -0
package/src/llm.ts +283 -0
package/src/progress.ts +77 -0
package/src/report-generator.ts +286 -0
package/src/types.ts +24 -0
package/tsconfig.json +19 -0

package/src/llm.ts ADDED Viewed

@@ -0,0 +1,283 @@
+import { execSync } from "child_process"
+import { CommitInfo, LLMAnalysis } from "./types"
+export class LLMService {
+  private static model: string
+  /**
+   * Detect available LLM models by checking CLI commands.
+   */
+  static detectAvailableModels(): string[] {
+    const models = ["claude", "gemini", "codex"]
+    return models.filter((model) => {
+      try {
+        execSync(`command -v ${model}`, { stdio: "ignore" })
+        return true
+      } catch {
+        return false
+      }
+    })
+  }
+  /**
+   * Determine default LLM model based on availability.
+   */
+  static detectDefaultModel(): string {
+    const available = this.detectAvailableModels()
+    if (available.length === 0) {
+      throw new Error(
+        "No supported LLM models found. Please install claude, gemini, or codex.",
+      )
+    }
+    // Default to sonnet if claude is available
+    if (available.includes('claude')) {
+      return 'claude --model sonnet'
+    }
+    return available[0]
+  }
+  /**
+   * Set the LLM model command to use.
+   */
+  static setModel(model: string): void {
+    this.model = model
+  }
+  /**
+   * Get the configured LLM model or detect default.
+   */
+  static getModel(): string {
+    if (!this.model) {
+      this.model = this.detectDefaultModel()
+    }
+    return this.model
+  }
+  private static readonly MAX_RETRIES = parseInt(
+    process.env.LLM_MAX_RETRIES || "3",
+    10,
+  )
+  private static readonly INITIAL_RETRY_DELAY = parseInt(
+    process.env.LLM_INITIAL_RETRY_DELAY || "5000",
+    10,
+  )
+  private static readonly MAX_RETRY_DELAY = parseInt(
+    process.env.LLM_MAX_RETRY_DELAY || "30000",
+    10,
+  )
+  private static readonly RETRY_MULTIPLIER = parseFloat(
+    process.env.LLM_RETRY_MULTIPLIER || "2",
+  )
+  // Claude-specific configuration with backward compatibility
+  private static readonly CLAUDE_MAX_PROMPT_LENGTH = parseInt(
+    process.env.CLAUDE_MAX_PROMPT_LENGTH || process.env.LLM_MAX_PROMPT_LENGTH || "100000",
+    10,
+  )
+  private static readonly CLAUDE_MAX_DIFF_LENGTH = parseInt(
+    process.env.CLAUDE_MAX_DIFF_LENGTH || process.env.LLM_MAX_DIFF_LENGTH || "80000",
+    10,
+  )
+  static async analyzeCommit(commit: CommitInfo): Promise<LLMAnalysis> {
+    const currentModel = this.getModel()
+    const prompt = this.buildPrompt(commit.message, commit.diff, currentModel)
+    // Log prompt length for debugging - only for Claude models
+    if (this.isClaudeModel(currentModel)) {
+      console.log(`  - Prompt length: ${prompt.length} characters`)
+      if (prompt.length > this.CLAUDE_MAX_PROMPT_LENGTH) {
+        console.log(`  - Warning: Prompt exceeds Claude max length (${this.CLAUDE_MAX_PROMPT_LENGTH})`)
+      }
+    }
+    let lastError: Error | null = null
+    for (let attempt = 1; attempt <= this.MAX_RETRIES; attempt++) {
+      try {
+        const output = execSync(currentModel, {
+          input: prompt,
+          encoding: "utf8",
+          stdio: ["pipe", "pipe", "pipe"],
+          timeout: 60000,
+        })
+        return this.parseResponse(output)
+      } catch (error) {
+        lastError = error instanceof Error ? error : new Error("Unknown error")
+        // Log detailed error information for debugging
+        console.log(`  - Error details for commit ${commit.hash.substring(0, 8)}:`)
+        console.log(`    Command: ${currentModel}`)
+        console.log(`    Error message: ${lastError.message}`)
+        if (this.isClaudeModel(currentModel)) {
+          console.log(`    Prompt length: ${prompt.length} characters`)
+        }
+        // If it's an exec error, log additional details
+        if (error && typeof error === 'object' && 'stderr' in error) {
+          const execError = error as any
+          console.log(`    Exit code: ${execError.status || 'unknown'}`)
+          console.log(`    Signal: ${execError.signal || 'none'}`)
+          console.log(`    Stderr: ${execError.stderr || 'none'}`)
+          console.log(`    Stdout: ${execError.stdout || 'none'}`)
+        }
+        if (attempt < this.MAX_RETRIES) {
+          const delay = Math.min(
+            this.INITIAL_RETRY_DELAY *
+              Math.pow(this.RETRY_MULTIPLIER, attempt - 1),
+            this.MAX_RETRY_DELAY,
+          )
+          console.log(
+            `  - Attempt ${attempt}/${this.MAX_RETRIES} failed for commit ${commit.hash.substring(0, 8)}. Retrying in ${delay / 1000}s...`,
+          )
+          await this.sleep(delay)
+        }
+      }
+    }
+    throw new Error(
+      `Failed to analyze commit ${commit.hash} after ${this.MAX_RETRIES} attempts: ${lastError?.message || "Unknown error"}`,
+    )
+  }
+  private static sleep(ms: number): Promise<void> {
+    return new Promise((resolve) => setTimeout(resolve, ms))
+  }
+  static getMaxRetries(): number {
+    return this.MAX_RETRIES
+  }
+  /**
+   * Check if the current model is Claude-based
+   */
+  private static isClaudeModel(model?: string): boolean {
+    const currentModel = model || this.getModel()
+    return currentModel.toLowerCase().includes('claude')
+  }
+  private static buildPrompt(commitMessage: string, diff: string, model: string): string {
+    // Only truncate for Claude models
+    let truncatedDiff = diff
+    let diffTruncated = false
+    if (this.isClaudeModel(model) && diff.length > this.CLAUDE_MAX_DIFF_LENGTH) {
+      truncatedDiff = diff.substring(0, this.CLAUDE_MAX_DIFF_LENGTH) + "\n\n[DIFF TRUNCATED - Original length: " + diff.length + " characters]"
+      diffTruncated = true
+    }
+    const basePrompt = `Analyze this git commit and provide a categorization:
+COMMIT MESSAGE:
+${commitMessage}
+COMMIT DIFF:
+${truncatedDiff}
+Based on the commit message and code changes, categorize this commit as one of:
+- "tweak": Minor adjustments, bug fixes, small improvements
+- "feature": New functionality, major additions
+- "process": Build system, CI/CD, tooling, configuration changes
+Provide:
+1. Category: [tweak|feature|process]
+2. Summary: One-line description (max 80 chars)
+3. Description: Detailed explanation (2-3 sentences)
+Format as JSON:
+\`\`\`json
+{
+  "category": "...",
+  "summary": "...",
+  "description": "..."
+}
+\`\`\``
+    // Final length check - only for Claude models
+    if (this.isClaudeModel(model) && basePrompt.length > this.CLAUDE_MAX_PROMPT_LENGTH) {
+      // Further truncate the diff if needed
+      const overhead = basePrompt.length - this.CLAUDE_MAX_PROMPT_LENGTH
+      const newDiffLength = Math.max(1000, this.CLAUDE_MAX_DIFF_LENGTH - overhead - 200) // Keep at least 1000 chars, subtract extra for safety
+      truncatedDiff = diff.substring(0, newDiffLength) + "\n\n[DIFF HEAVILY TRUNCATED - Original length: " + diff.length + " characters]"
+      return `Analyze this git commit and provide a categorization:
+COMMIT MESSAGE:
+${commitMessage}
+COMMIT DIFF:
+${truncatedDiff}
+Based on the commit message and code changes, categorize this commit as one of:
+- "tweak": Minor adjustments, bug fixes, small improvements
+- "feature": New functionality, major additions
+- "process": Build system, CI/CD, tooling, configuration changes
+Provide:
+1. Category: [tweak|feature|process]
+2. Summary: One-line description (max 80 chars)
+3. Description: Detailed explanation (2-3 sentences)
+Format as JSON:
+\`\`\`json
+{
+  "category": "...",
+  "summary": "...",
+  "description": "..."
+}
+\`\`\``
+    }
+    return basePrompt
+  }
+  private static parseResponse(response: string): LLMAnalysis {
+    try {
+      const jsonMatch = response.match(/```json\s*([\s\S]*?)\s*```/)
+      if (!jsonMatch) {
+        throw new Error("No JSON block found in response")
+      }
+      const parsed = JSON.parse(jsonMatch[1])
+      if (!this.isValidCategory(parsed.category)) {
+        throw new Error(`Invalid category: ${parsed.category}`)
+      }
+      if (!parsed.summary || !parsed.description) {
+        throw new Error("Missing required fields in response")
+      }
+      return {
+        category: parsed.category,
+        summary: parsed.summary.substring(0, 80),
+        description: parsed.description,
+      }
+    } catch (error) {
+      // Log the raw response for debugging
+      console.log(`  - Raw LLM response (first 1000 chars): ${response.substring(0, 1000)}`)
+      if (response.length > 1000) {
+        console.log(`  - Response truncated (total length: ${response.length} chars)`)
+      }
+      // Try to extract and log the JSON block if it exists but is malformed
+      const jsonMatch = response.match(/```json\s*([\s\S]*?)\s*```/)
+      if (jsonMatch) {
+        console.log(`  - Extracted JSON block: ${jsonMatch[1]}`)
+      }
+      throw new Error(
+        `Failed to parse LLM response: ${error instanceof Error ? error.message : "Unknown error"}`,
+      )
+    }
+  }
+  private static isValidCategory(
+    category: string,
+  ): category is "tweak" | "feature" | "process" {
+    return ["tweak", "feature", "process"].includes(category)
+  }
+}

package/src/progress.ts ADDED Viewed

@@ -0,0 +1,77 @@
+import { writeFileSync, readFileSync, existsSync, unlinkSync } from "fs"
+import { AnalyzedCommit } from "./types"
+interface ProgressState {
+  totalCommits: string[]
+  processedCommits: string[]
+  analyzedCommits: AnalyzedCommit[]
+  lastProcessedIndex: number
+  startTime: string
+  outputFile: string
+}
+export class ProgressTracker {
+  private static readonly CHECKPOINT_FILE = ".commit-analyzer-progress.json"
+  static saveProgress(
+    totalCommits: string[],
+    processedCommits: string[],
+    analyzedCommits: AnalyzedCommit[],
+    outputFile: string,
+  ): void {
+    const state: ProgressState = {
+      totalCommits,
+      processedCommits,
+      analyzedCommits,
+      lastProcessedIndex: processedCommits.length - 1,
+      startTime: new Date().toISOString(),
+      outputFile,
+    }
+    writeFileSync(this.CHECKPOINT_FILE, JSON.stringify(state, null, 2))
+  }
+  static loadProgress(): ProgressState | null {
+    if (!existsSync(this.CHECKPOINT_FILE)) {
+      return null
+    }
+    try {
+      const content = readFileSync(this.CHECKPOINT_FILE, "utf8")
+      return JSON.parse(content)
+    } catch (error) {
+      console.error("Failed to load progress file:", error)
+      return null
+    }
+  }
+  static hasProgress(): boolean {
+    return existsSync(this.CHECKPOINT_FILE)
+  }
+  static clearProgress(): void {
+    if (existsSync(this.CHECKPOINT_FILE)) {
+      unlinkSync(this.CHECKPOINT_FILE)
+    }
+  }
+  static getRemainingCommits(state: ProgressState): string[] {
+    const processedSet = new Set(state.processedCommits)
+    return state.totalCommits.filter(hash => !processedSet.has(hash))
+  }
+  static formatProgressSummary(state: ProgressState): string {
+    const processed = state.processedCommits.length
+    const total = state.totalCommits.length
+    const remaining = total - processed
+    const percentComplete = Math.round((processed / total) * 100)
+    return `
+Previous session:
+  - Started: ${new Date(state.startTime).toLocaleString()}
+  - Progress: ${processed}/${total} commits (${percentComplete}%)
+  - Remaining: ${remaining} commits
+  - Output file: ${state.outputFile}
+    `.trim()
+  }
+}

package/src/report-generator.ts ADDED Viewed

@@ -0,0 +1,286 @@
+import { writeFileSync } from "fs"
+import { CSVReaderService, ParsedCSVRow } from "./csv-reader"
+import { LLMService } from "./llm"
+export class MarkdownReportGenerator {
+  /**
+   * Generate a markdown report from CSV data
+   */
+  static async generateReport(
+    csvFilePath: string,
+    outputPath: string,
+  ): Promise<void> {
+    console.log(`Reading CSV data from ${csvFilePath}...`)
+    // Read and parse CSV data
+    const csvData = CSVReaderService.readCSV(csvFilePath)
+    if (csvData.length === 0) {
+      throw new Error("No data found in CSV file")
+    }
+    // Get statistics for logging
+    const stats = CSVReaderService.getStatistics(csvData)
+    console.log(
+      `Found ${stats.totalRows} commits spanning ${stats.yearRange.min}-${stats.yearRange.max}`,
+    )
+    console.log(
+      `Categories: ${stats.categoryBreakdown.feature} features, ${stats.categoryBreakdown.process} process, ${stats.categoryBreakdown.tweak} tweaks`,
+    )
+    console.log("Generating condensed report...")
+    // Generate commit analysis section programmatically
+    const analysisSection = this.generateAnalysisSection(csvData)
+    // Convert CSV data to string format for LLM (for content summarization only)
+    const csvContent = this.convertToCSVString(csvData)
+    // Generate detailed yearly summaries using LLM
+    const yearlyContent = await this.generateYearlySummariesWithLLM(csvContent)
+    // Combine analysis section with LLM-generated content
+    const reportContent = `# Development Summary Report\n\n${analysisSection}\n\n${yearlyContent}`
+    // Write to output file
+    writeFileSync(outputPath, reportContent, "utf8")
+    console.log(`Report generated: ${outputPath}`)
+  }
+  /**
+   * Convert parsed CSV data back to CSV string format for LLM consumption
+   */
+  private static convertToCSVString(data: ParsedCSVRow[]): string {
+    const header = "year,category,summary,description"
+    const rows = data.map((row) =>
+      [
+        row.year,
+        this.escapeCsvField(row.category),
+        this.escapeCsvField(row.summary),
+        this.escapeCsvField(row.description),
+      ].join(","),
+    )
+    return [header, ...rows].join("\n")
+  }
+  /**
+   * Escape CSV fields that contain commas, quotes, or newlines
+   */
+  private static escapeCsvField(field: string): string {
+    if (field.includes(",") || field.includes('"') || field.includes("\n")) {
+      return `"${field.replace(/"/g, '""')}"`
+    }
+    return field
+  }
+  /**
+   * Generate commit analysis section with accurate counts
+   */
+  private static generateAnalysisSection(csvData: ParsedCSVRow[]): string {
+    const stats = CSVReaderService.getStatistics(csvData)
+    // Group data by year for detailed breakdown
+    const yearlyStats = csvData.reduce(
+      (acc, row) => {
+        if (!acc[row.year]) {
+          acc[row.year] = { tweak: 0, feature: 0, process: 0, total: 0 }
+        }
+        acc[row.year][row.category]++
+        acc[row.year].total++
+        return acc
+      },
+      {} as Record<
+        number,
+        { tweak: number; feature: number; process: number; total: number }
+      >,
+    )
+    // Sort years in descending order
+    const sortedYears = Object.keys(yearlyStats)
+      .map(Number)
+      .sort((a, b) => b - a)
+    let analysisContent = `## Commit Analysis\n`
+    analysisContent += `- **Total Commits**: ${stats.totalRows} commits across ${stats.yearRange.min}-${stats.yearRange.max}\n`
+    // Add year-by-year breakdown
+    for (const year of sortedYears) {
+      const yearData = yearlyStats[year]
+      analysisContent += `- **${year}**: ${yearData.total} commits (${yearData.feature} features, ${yearData.process} process, ${yearData.tweak} tweaks)\n`
+    }
+    return analysisContent
+  }
+  /**
+   * Generate yearly summaries using LLM service
+   */
+  private static async generateYearlySummariesWithLLM(
+    csvContent: string,
+  ): Promise<string> {
+    const prompt = this.buildReportPrompt(csvContent)
+    try {
+      // Use the same retry logic as commit analysis
+      const response = await this.callLLMWithRetry(prompt)
+      return this.parseReportResponse(response)
+    } catch (error) {
+      throw new Error(
+        `Failed to generate report: ${error instanceof Error ? error.message : "Unknown error"}`,
+      )
+    }
+  }
+  /**
+   * Build the prompt for report generation based on the template
+   */
+  private static buildReportPrompt(csvContent: string): string {
+    return `Analyze the following CSV data containing git commit analysis results and generate a condensed markdown development summary report.
+CSV DATA:
+${csvContent}
+INSTRUCTIONS:
+1. Group the data by year (descending order, most recent first)
+2. Within each year, group by category: Features, Process Improvements, and Tweaks & Bug Fixes
+3. Consolidate similar items within each category to create readable summaries
+4. Focus on what was accomplished rather than individual commit details
+5. Use clear, professional language appropriate for stakeholders
+CATEGORY MAPPING:
+- "feature" → "Features" section
+- "process" → "Processes" section
+- "tweak" → "Tweaks & Bug Fixes" section
+CONSOLIDATION GUIDELINES:
+- Group similar features together (e.g., "authentication system improvements")
+- Combine related bug fixes (e.g., "resolved 8 authentication issues")
+- Summarize process changes by theme (e.g., "CI/CD pipeline enhancements")
+- Use bullet points for individual items within categories
+- Aim for 3-7 bullet points per category per year
+- Include specific numbers when relevant (e.g., "15 bug fixes", "3 new features")
+OUTPUT FORMAT:
+Generate yearly summary sections with this exact structure (DO NOT include the main title or commit analysis section):
+\`\`\`markdown
+## [YEAR]
+### Features
+- [Consolidated feature summary 1]
+- [Consolidated feature summary 2]
+- [Additional features as needed]
+### Processes
+- [Consolidated process improvement 1]
+- [Consolidated process improvement 2]
+- [Additional process items as needed]
+### Tweaks & Bug Fixes
+- [Consolidated tweak/fix summary 1]
+- [Consolidated tweak/fix summary 2]
+- [Additional tweaks/fixes as needed]
+## [PREVIOUS YEAR]
+[Repeat structure for each year in the data]
+\`\`\`
+QUALITY REQUIREMENTS:
+- Keep summaries concise but informative
+- Use active voice and clear language
+- Avoid technical jargon where possible
+- Ensure each bullet point represents meaningful work
+- Make the report valuable for both technical and non-technical readers
+Generate the markdown report now:`
+  }
+  /**
+   * Call LLM with retry logic similar to commit analysis
+   */
+  private static async callLLMWithRetry(prompt: string): Promise<string> {
+    const maxRetries = LLMService.getMaxRetries()
+    let lastError: Error | null = null
+    for (let attempt = 1; attempt <= maxRetries; attempt++) {
+      try {
+        // Create a mock commit object for the LLM service
+        const mockCommit = {
+          hash: "report-generation",
+          message: "Generate report from CSV data",
+          date: new Date(),
+          diff: prompt,
+          year: new Date().getFullYear(),
+        }
+        // Use the existing LLM service but intercept the response
+        const { execSync } = require("child_process")
+        const currentModel = LLMService.getModel()
+        console.log(`  - Using model: ${currentModel}`)
+        console.log(
+          `  - Processing ${prompt.split("\n").length} lines of CSV data`,
+        )
+        const output = execSync(currentModel, {
+          input: prompt,
+          encoding: "utf8",
+          stdio: ["pipe", "pipe", "pipe"],
+          timeout: 120000, // Longer timeout for report generation
+        })
+        return output.trim()
+      } catch (error) {
+        lastError = error instanceof Error ? error : new Error("Unknown error")
+        console.log(`  - Error generating report:`)
+        console.log(`    Attempt: ${attempt}/${maxRetries}`)
+        console.log(`    Error: ${lastError.message}`)
+        if (attempt < maxRetries) {
+          const delay = Math.min(5000 * Math.pow(2, attempt - 1), 30000)
+          console.log(`    Retrying in ${delay / 1000}s...`)
+          await this.sleep(delay)
+        }
+      }
+    }
+    throw new Error(
+      `Failed to generate report after ${maxRetries} attempts: ${lastError?.message || "Unknown error"}`,
+    )
+  }
+  /**
+   * Parse the LLM response to extract the yearly summary content
+   */
+  private static parseReportResponse(response: string): string {
+    // Look for markdown block first
+    const markdownMatch = response.match(/```markdown\s*([\s\S]*?)\s*```/)
+    if (markdownMatch) {
+      return markdownMatch[1].trim()
+    }
+    // If no markdown block, look for content starting with "##" (yearly sections)
+    const yearSectionMatch = response.match(/^(##\s+\d{4}[\s\S]*)/m)
+    if (yearSectionMatch) {
+      return yearSectionMatch[1].trim()
+    }
+    // If no clear structure found, return the entire response but log a warning
+    console.log(
+      "  - Warning: Could not find structured yearly sections in LLM response",
+    )
+    console.log(`  - Response preview: ${response.substring(0, 200)}...`)
+    return response.trim()
+  }
+  /**
+   * Sleep utility for retry delays
+   */
+  private static sleep(ms: number): Promise<void> {
+    return new Promise((resolve) => setTimeout(resolve, ms))
+  }
+}

package/src/types.ts ADDED Viewed

@@ -0,0 +1,24 @@
+export interface CommitInfo {
+  hash: string
+  message: string
+  date: Date
+  diff: string
+  year: number
+}
+export interface LLMAnalysis {
+  category: "tweak" | "feature" | "process"
+  summary: string
+  description: string
+}
+export interface AnalyzedCommit extends CommitInfo {
+  analysis: LLMAnalysis
+}
+export interface CSVRow {
+  year: number
+  category: string
+  summary: string
+  description: string
+}

package/tsconfig.json ADDED Viewed

@@ -0,0 +1,19 @@
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "lib": ["ES2020"],
+    "module": "commonjs",
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "resolveJsonModule": true,
+    "declaration": true,
+    "declarationMap": true,
+    "sourceMap": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist"]
+}