codecritique 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +82 -114
  2. package/package.json +10 -9
  3. package/src/content-retrieval.test.js +775 -0
  4. package/src/custom-documents.test.js +440 -0
  5. package/src/feedback-loader.test.js +529 -0
  6. package/src/llm.test.js +256 -0
  7. package/src/project-analyzer.test.js +747 -0
  8. package/src/rag-analyzer.js +12 -0
  9. package/src/rag-analyzer.test.js +1109 -0
  10. package/src/rag-review.test.js +317 -0
  11. package/src/setupTests.js +131 -0
  12. package/src/zero-shot-classifier-open.test.js +278 -0
  13. package/src/embeddings/cache-manager.js +0 -364
  14. package/src/embeddings/constants.js +0 -40
  15. package/src/embeddings/database.js +0 -921
  16. package/src/embeddings/errors.js +0 -208
  17. package/src/embeddings/factory.js +0 -447
  18. package/src/embeddings/file-processor.js +0 -851
  19. package/src/embeddings/model-manager.js +0 -337
  20. package/src/embeddings/similarity-calculator.js +0 -97
  21. package/src/embeddings/types.js +0 -113
  22. package/src/pr-history/analyzer.js +0 -579
  23. package/src/pr-history/bot-detector.js +0 -123
  24. package/src/pr-history/cli-utils.js +0 -204
  25. package/src/pr-history/comment-processor.js +0 -549
  26. package/src/pr-history/database.js +0 -819
  27. package/src/pr-history/github-client.js +0 -629
  28. package/src/technology-keywords.json +0 -753
  29. package/src/utils/command.js +0 -48
  30. package/src/utils/constants.js +0 -263
  31. package/src/utils/context-inference.js +0 -364
  32. package/src/utils/document-detection.js +0 -105
  33. package/src/utils/file-validation.js +0 -271
  34. package/src/utils/git.js +0 -232
  35. package/src/utils/language-detection.js +0 -170
  36. package/src/utils/logging.js +0 -24
  37. package/src/utils/markdown.js +0 -132
  38. package/src/utils/mobilebert-tokenizer.js +0 -141
  39. package/src/utils/pr-chunking.js +0 -276
  40. package/src/utils/string-utils.js +0 -28
@@ -1,170 +0,0 @@
1
- /**
2
- * Language Detection Module
3
- *
4
- * This module provides utilities for detecting programming languages
5
- * and file types from file extensions and content analysis.
6
- */
7
-
8
- import path from 'path';
9
- import { EXTENSION_TO_LANGUAGE_MAP, ALL_SUPPORTED_EXTENSIONS } from './constants.js';
10
-
11
- /**
12
- * Detect programming language from file extension
13
- *
14
- * @param {string} extension - File extension (including the dot)
15
- * @returns {string|null} Detected language or null if unknown
16
- *
17
- * @example
18
- * const language = detectLanguageFromExtension('.ts');
19
- * // Returns: 'typescript'
20
- */
21
- export function detectLanguageFromExtension(extension) {
22
- // Normalize extension to lowercase with leading dot
23
- const normalizedExt = extension.toLowerCase();
24
- if (!normalizedExt.startsWith('.')) {
25
- extension = `.${normalizedExt}`;
26
- } else {
27
- extension = normalizedExt;
28
- }
29
-
30
- // Check if the extension is supported
31
- if (!ALL_SUPPORTED_EXTENSIONS.includes(extension)) {
32
- return 'unknown';
33
- }
34
-
35
- // Use the centralized extension-to-language mapping from constants
36
- return EXTENSION_TO_LANGUAGE_MAP[extension] || 'unknown';
37
- }
38
-
39
- /**
40
- * Detect file type and framework from file path and content
41
- *
42
- * @param {string} filePath - Path to the file
43
- * @param {string} content - Content of the file (optional)
44
- * @returns {Object} File type information including language, framework, and flags
45
- *
46
- * @example
47
- * const fileInfo = detectFileType('src/components/Button.tsx', 'import React from "react"');
48
- * // Returns: { path: '...', extension: '.tsx', language: 'typescript', framework: 'react', ... }
49
- */
50
- export function detectFileType(filePath, content = '') {
51
- // Get file extension and base name
52
- const extension = path.extname(filePath);
53
- const baseName = path.basename(filePath);
54
-
55
- // Detect language from extension
56
- const language = detectLanguageFromExtension(extension);
57
-
58
- // Initialize result object
59
- const result = {
60
- path: filePath,
61
- extension,
62
- language,
63
- type: 'unknown',
64
- framework: null,
65
- isConfig: false,
66
- isTest: false,
67
- isTypeDefinition: false,
68
- };
69
-
70
- // Detect file type based on name patterns
71
- if (baseName.endsWith('.d.ts')) {
72
- result.type = 'type-definition';
73
- result.isTypeDefinition = true;
74
- } else if (baseName.match(/\.test\.|\.spec\.|_test\.|_spec\./)) {
75
- result.type = 'test';
76
- result.isTest = true;
77
- } else if (baseName.match(/^test.*\.|^spec.*\./)) {
78
- result.type = 'test';
79
- result.isTest = true;
80
- } else if (baseName.match(/config|conf|settings|\.rc$/)) {
81
- result.type = 'config';
82
- result.isConfig = true;
83
- } else if (language) {
84
- result.type = language;
85
- }
86
-
87
- // If content is provided, perform deeper analysis
88
- if (content && content.length > 0) {
89
- // Detect React
90
- if (
91
- extension === '.jsx' ||
92
- extension === '.tsx' ||
93
- content.includes('import React') ||
94
- content.includes('from "react"') ||
95
- content.includes("from 'react'")
96
- ) {
97
- result.framework = 'react';
98
-
99
- // Check for specific React patterns
100
- if (content.includes('useState') || content.includes('useEffect') || content.includes('useContext')) {
101
- result.isHook = content.match(/^\s*function\s+use[A-Z]/m) !== null;
102
- result.isComponent = content.match(/^\s*function\s+[A-Z]/m) !== null || content.match(/^\s*const\s+[A-Z]\w+\s*=\s*\(/m) !== null;
103
- }
104
- }
105
-
106
- // Detect Vue
107
- else if (extension === '.vue' || (content.includes('<template>') && content.includes('<script>'))) {
108
- result.framework = 'vue';
109
- }
110
-
111
- // Detect Angular
112
- else if (
113
- content.includes('@Component') ||
114
- content.includes('@NgModule') ||
115
- content.includes('from "@angular/core"') ||
116
- content.includes("from '@angular/core'")
117
- ) {
118
- result.framework = 'angular';
119
- }
120
-
121
- // Detect Express.js
122
- else if (
123
- content.includes('express()') ||
124
- content.includes('require("express")') ||
125
- content.includes("require('express')") ||
126
- content.includes('from "express"') ||
127
- content.includes("from 'express'")
128
- ) {
129
- result.framework = 'express';
130
- }
131
-
132
- // Detect Next.js
133
- else if (
134
- content.includes('from "next"') ||
135
- content.includes("from 'next'") ||
136
- content.includes('next/app') ||
137
- content.includes('next/document')
138
- ) {
139
- result.framework = 'nextjs';
140
- }
141
-
142
- // Detect Django (Python)
143
- else if (language === 'python' && (content.includes('from django') || content.includes('import django'))) {
144
- result.framework = 'django';
145
- }
146
-
147
- // Detect Flask (Python)
148
- else if (language === 'python' && (content.includes('from flask import') || content.includes('import flask'))) {
149
- result.framework = 'flask';
150
- }
151
-
152
- // Detect Rails (Ruby)
153
- else if (language === 'ruby' && (content.includes('Rails') || content.includes('ActiveRecord'))) {
154
- result.framework = 'rails';
155
- }
156
-
157
- // Detect Spring (Java)
158
- else if (
159
- language === 'java' &&
160
- (content.includes('@Controller') ||
161
- content.includes('@Service') ||
162
- content.includes('@Repository') ||
163
- content.includes('@SpringBootApplication'))
164
- ) {
165
- result.framework = 'spring';
166
- }
167
- }
168
-
169
- return result;
170
- }
@@ -1,24 +0,0 @@
1
- /**
2
- * Logging Module
3
- *
4
- * This module provides debugging and logging utilities with support
5
- * for environment-based and command-line argument-based log level control.
6
- */
7
-
8
- import chalk from 'chalk';
9
-
10
- /**
11
- * Debug function for conditional logging based on environment variables and command line arguments
12
- *
13
- * @param {string} message - Debug message to log
14
- *
15
- * @example
16
- * debug('Processing file: example.js');
17
- * // Only logs if DEBUG=true, VERBOSE=true, or --verbose flag is present
18
- */
19
- export function debug(message) {
20
- const DEBUG = process.env.DEBUG || false;
21
- if (DEBUG || process.env.VERBOSE === 'true' || process.argv.includes('--verbose')) {
22
- console.log(chalk.cyan(`[DEBUG] ${message}`));
23
- }
24
- }
@@ -1,132 +0,0 @@
1
- /**
2
- * Markdown Processing Module
3
- *
4
- * This module provides utilities for processing markdown content,
5
- * including chunk extraction, heading analysis, and content parsing.
6
- */
7
-
8
- import path from 'path';
9
-
10
- /**
11
- * Extracts chunks from Markdown content based on H2 and H3 headings,
12
- * and also extracts the first H1 heading as the document title.
13
- *
14
- * @param {string} filePath - The absolute path to the file
15
- * @param {string} content - The Markdown content of the file
16
- * @param {string} relativePath - The relative path of the file
17
- * @returns {Object} An object containing `chunks` (Array) and `documentH1` (string|null).
18
- * Each chunk object contains:
19
- * `content`, `heading` (H2/H3 text),
20
- * `original_document_path`, `start_line_in_doc`, `language`.
21
- *
22
- * @example
23
- * const result = extractMarkdownChunks('/path/to/file.md', '# Title\n## Section\nContent...', 'docs/file.md');
24
- * // Returns: { chunks: [{ content: '...', heading: 'Section', ... }], documentH1: 'Title' }
25
- */
26
- export function extractMarkdownChunks(filePath, content, relativePath) {
27
- const chunks = [];
28
- let documentH1 = null;
29
- if (!content || typeof content !== 'string') return { chunks, documentH1 };
30
-
31
- const lines = content.split('\n');
32
- let currentChunkLines = [];
33
- let currentH2H3Heading = null; // Stores the H2 or H3 heading for the current chunk
34
- let chunkStartLine = 1;
35
- let inCodeBlock = false;
36
- let h1Found = false;
37
- let linesProcessedForH1 = 0; // Debug counter
38
-
39
- const h1Regex = /^#\s*(.*)/; // Regex for H1 (allow zero or more spaces after #)
40
- const h2h3Regex = /^(##|###)\s+(.*)/; // Regex for H2 or H3
41
-
42
- for (let i = 0; i < lines.length; i++) {
43
- const line = lines[i];
44
- const trimmedLine = line.trim();
45
-
46
- if (trimmedLine.startsWith('```')) {
47
- inCodeBlock = !inCodeBlock;
48
- }
49
-
50
- if (!h1Found && linesProcessedForH1 < 5) {
51
- linesProcessedForH1++;
52
- // Check for H1 heading in first few lines
53
- if (filePath.includes('README.md') || filePath.includes('RUNBOOK.md')) {
54
- // Log only for specific files to reduce noise
55
- console.log(`[extractMarkdownChunks DEBUG] File: ${filePath}, Line ${i + 1} (trimmed): "${trimmedLine}", Attempting H1 match.`);
56
- }
57
- const h1Match = trimmedLine.match(h1Regex);
58
- if (h1Match) {
59
- documentH1 = h1Match[1].trim();
60
- h1Found = true;
61
- console.log(`[extractMarkdownChunks DEBUG] H1 FOUND for ${filePath}: "${documentH1}" on line ${i + 1}`);
62
- } else if (filePath.includes('README.md') || filePath.includes('RUNBOOK.md')) {
63
- if (linesProcessedForH1 <= 5 && trimmedLine.startsWith('#')) {
64
- // If it starts with # but didn't match
65
- console.log(
66
- `[extractMarkdownChunks DEBUG] File: ${filePath}, Line ${i + 1}: Starts with # but H1Regex DID NOT match "${trimmedLine}"`
67
- );
68
- }
69
- }
70
- }
71
-
72
- const h2h3Match = !inCodeBlock && trimmedLine.match(h2h3Regex);
73
-
74
- if (h2h3Match) {
75
- // Found an H2 or H3 heading, finalize the previous chunk if it has content
76
- if (currentChunkLines.length > 0 && currentChunkLines.join('\n').trim().length > 0) {
77
- chunks.push({
78
- content: currentChunkLines.join('\n').trim(),
79
- heading: currentH2H3Heading, // Heading of the *previous* H2/H3 chunk
80
- original_document_path: relativePath,
81
- start_line_in_doc: chunkStartLine,
82
- language: 'markdown',
83
- });
84
- }
85
- // Start a new H2/H3 chunk
86
- currentH2H3Heading = h2h3Match[2].trim();
87
- currentChunkLines = [line]; // Include H2/H3 heading line in the new chunk's content
88
- chunkStartLine = i + 1;
89
- } else {
90
- // Not an H1 or H2/H3 heading line (or H1 already found), add to current chunk
91
- // This also correctly captures content before the first H2/H3 heading (under an H1 or if no H1).
92
- currentChunkLines.push(line);
93
- }
94
- }
95
-
96
- // Add the last processed chunk if it has content
97
- if (currentChunkLines.length > 0 && currentChunkLines.join('\n').trim().length > 0) {
98
- chunks.push({
99
- content: currentChunkLines.join('\n').trim(),
100
- heading: currentH2H3Heading, // H2/H3 heading of the last chunk
101
- original_document_path: relativePath,
102
- start_line_in_doc: chunkStartLine,
103
- language: 'markdown',
104
- });
105
- }
106
-
107
- // If no H2/H3 chunks were created (e.g., file has only H1 and paragraphs, or just paragraphs)
108
- // treat the whole file content (that wasn't part of H1 line itself if H1 was first line) as a single chunk.
109
- if (chunks.length === 0 && content.trim().length > 0) {
110
- let initialContent = content.trim();
111
- // If H1 was the very first line and we captured it, remove it from this single chunk content
112
- if (documentH1 && lines.length > 0 && lines[0].trim().match(h1Regex)) {
113
- initialContent = lines.slice(1).join('\n').trim();
114
- }
115
- if (initialContent.length > 0) {
116
- chunks.push({
117
- content: initialContent,
118
- heading: null, // No H2/H3 heading for this single chunk
119
- original_document_path: relativePath,
120
- start_line_in_doc: h1Found && lines.length > 0 && lines[0].trim().match(h1Regex) ? 2 : 1,
121
- language: 'markdown',
122
- });
123
- }
124
- }
125
-
126
- if (!documentH1) {
127
- documentH1 = path.basename(filePath).replace(path.extname(filePath), '');
128
- console.log(`[extractMarkdownChunks DEBUG] H1 NOT FOUND for ${filePath}. Using fallback title: "${documentH1}"`);
129
- }
130
-
131
- return { chunks: chunks.filter((chunk) => chunk.content.length > 0), documentH1 };
132
- }
@@ -1,141 +0,0 @@
1
- /**
2
- * MobileBERT Tokenizer Utility
3
- *
4
- * Shared tokenizer functionality for MobileBERT models to handle token counting
5
- * and text truncation while staying within the 512 token limit.
6
- */
7
-
8
- import { AutoTokenizer } from '@huggingface/transformers';
9
- import chalk from 'chalk';
10
-
11
- // Shared tokenizer instance and initialization state
12
- let tokenizer = null;
13
- let isInitializing = false;
14
- let initializationPromise = null;
15
-
16
- /**
17
- * Initialize and get the MobileBERT tokenizer (singleton pattern)
18
- * @returns {Promise<AutoTokenizer|null>} Tokenizer instance or null if initialization fails
19
- */
20
- async function getTokenizer() {
21
- // If already initialized, return immediately
22
- if (tokenizer) return tokenizer;
23
-
24
- // If currently initializing, wait for the existing initialization
25
- if (isInitializing && initializationPromise) {
26
- return await initializationPromise;
27
- }
28
-
29
- // Start initialization
30
- isInitializing = true;
31
- initializationPromise = _initializeTokenizer();
32
-
33
- try {
34
- tokenizer = await initializationPromise;
35
- return tokenizer;
36
- } finally {
37
- isInitializing = false;
38
- initializationPromise = null;
39
- }
40
- }
41
-
42
- /**
43
- * Internal tokenizer initialization
44
- * @returns {Promise<AutoTokenizer|null>}
45
- */
46
- async function _initializeTokenizer() {
47
- try {
48
- console.log(chalk.blue('Initializing MobileBERT tokenizer...'));
49
- const tok = await AutoTokenizer.from_pretrained('Xenova/mobilebert-uncased-mnli');
50
- console.log(chalk.green('✓ MobileBERT tokenizer initialized successfully'));
51
- return tok;
52
- } catch (error) {
53
- console.warn(chalk.yellow('⚠ Failed to initialize tokenizer, falling back to character estimation'), error.message);
54
- return null;
55
- }
56
- }
57
-
58
- /**
59
- * Count exact tokens for MobileBERT model
60
- * @param {string} text - Text to count tokens for
61
- * @returns {Promise<number>} Number of tokens
62
- */
63
- async function countTokens(text) {
64
- if (!text || typeof text !== 'string') {
65
- return 0;
66
- }
67
-
68
- try {
69
- const tok = await getTokenizer();
70
- if (!tok) {
71
- // Fallback to character estimation if tokenizer fails
72
- return Math.ceil(text.length / 3); // Conservative estimate for MobileBERT
73
- }
74
-
75
- const encoded = await tok.encode(text);
76
- return encoded.length;
77
- } catch (error) {
78
- console.warn(chalk.gray('Token counting failed, using character estimation'), error.message);
79
- return Math.ceil(text.length / 3);
80
- }
81
- }
82
-
83
- /**
84
- * Truncate text to fit within token limit while preserving important content
85
- * @param {string} text - Text to truncate
86
- * @param {number} maxTokens - Maximum tokens allowed (default: 450 for MobileBERT safety)
87
- * @returns {Promise<string>} Truncated text
88
- */
89
- export async function truncateToTokenLimit(text, maxTokens = 450) {
90
- if (!text) return '';
91
-
92
- const currentTokens = await countTokens(text);
93
- if (currentTokens <= maxTokens) {
94
- return text;
95
- }
96
-
97
- // Binary search to find the right length
98
- let left = 0;
99
- let right = text.length;
100
- let bestLength = 0;
101
-
102
- while (left <= right) {
103
- const mid = Math.floor((left + right) / 2);
104
- const truncated = text.substring(0, mid);
105
- const tokens = await countTokens(truncated);
106
-
107
- if (tokens <= maxTokens) {
108
- bestLength = mid;
109
- left = mid + 1;
110
- } else {
111
- right = mid - 1;
112
- }
113
- }
114
-
115
- // Ensure we don't cut off in the middle of a word
116
- let result = text.substring(0, bestLength);
117
- const lastSpaceIndex = result.lastIndexOf(' ');
118
- if (lastSpaceIndex > bestLength * 0.8) {
119
- result = result.substring(0, lastSpaceIndex);
120
- }
121
-
122
- return result;
123
- }
124
-
125
- /**
126
- * Clean up tokenizer resources
127
- */
128
- export async function cleanupTokenizer() {
129
- if (tokenizer) {
130
- try {
131
- if (typeof tokenizer.dispose === 'function') {
132
- await tokenizer.dispose();
133
- }
134
- tokenizer = null;
135
- console.log(chalk.green('✓ MobileBERT tokenizer resources cleaned up'));
136
- } catch (error) {
137
- console.warn(chalk.yellow('⚠ Error cleaning up tokenizer:'), error.message);
138
- tokenizer = null;
139
- }
140
- }
141
- }