codecritique 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +1145 -0
- package/package.json +98 -0
- package/src/content-retrieval.js +747 -0
- package/src/custom-documents.js +597 -0
- package/src/embeddings/cache-manager.js +364 -0
- package/src/embeddings/constants.js +40 -0
- package/src/embeddings/database.js +921 -0
- package/src/embeddings/errors.js +208 -0
- package/src/embeddings/factory.js +447 -0
- package/src/embeddings/file-processor.js +851 -0
- package/src/embeddings/model-manager.js +337 -0
- package/src/embeddings/similarity-calculator.js +97 -0
- package/src/embeddings/types.js +113 -0
- package/src/feedback-loader.js +384 -0
- package/src/index.js +1418 -0
- package/src/llm.js +123 -0
- package/src/pr-history/analyzer.js +579 -0
- package/src/pr-history/bot-detector.js +123 -0
- package/src/pr-history/cli-utils.js +204 -0
- package/src/pr-history/comment-processor.js +549 -0
- package/src/pr-history/database.js +819 -0
- package/src/pr-history/github-client.js +629 -0
- package/src/project-analyzer.js +955 -0
- package/src/rag-analyzer.js +2764 -0
- package/src/rag-review.js +566 -0
- package/src/technology-keywords.json +753 -0
- package/src/utils/command.js +48 -0
- package/src/utils/constants.js +263 -0
- package/src/utils/context-inference.js +364 -0
- package/src/utils/document-detection.js +105 -0
- package/src/utils/file-validation.js +271 -0
- package/src/utils/git.js +232 -0
- package/src/utils/language-detection.js +170 -0
- package/src/utils/logging.js +24 -0
- package/src/utils/markdown.js +132 -0
- package/src/utils/mobilebert-tokenizer.js +141 -0
- package/src/utils/pr-chunking.js +276 -0
- package/src/utils/string-utils.js +28 -0
- package/src/zero-shot-classifier-open.js +392 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bot Detection Utility
|
|
3
|
+
*
|
|
4
|
+
* Detects and filters out bot comments from PR analysis.
|
|
5
|
+
* Bots provide automated feedback that isn't useful for human review pattern analysis.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Common bot patterns found in GitHub usernames and comment content
|
|
10
|
+
*/
|
|
11
|
+
const BOT_PATTERNS = {
|
|
12
|
+
// Username patterns
|
|
13
|
+
usernames: [
|
|
14
|
+
/\[bot\]$/i, // e.g., sonarqubecloud[bot], dependabot[bot]
|
|
15
|
+
/^bot-/i, // e.g., bot-reviewer
|
|
16
|
+
/-bot$/i, // e.g., review-bot
|
|
17
|
+
/^dependabot/i, // Dependabot variations
|
|
18
|
+
/^renovate/i, // Renovate bot variations
|
|
19
|
+
/^github-actions/i, // GitHub Actions bot
|
|
20
|
+
/^codecov/i, // Codecov bot
|
|
21
|
+
/^sonarcloud/i, // SonarCloud variations
|
|
22
|
+
/^sonarqube/i, // SonarQube variations
|
|
23
|
+
/^snyk/i, // Snyk security bot
|
|
24
|
+
/^greenkeeper/i, // Greenkeeper bot
|
|
25
|
+
/^semantic-release/i, // Semantic release bot
|
|
26
|
+
/^allcontributors/i, // All contributors bot
|
|
27
|
+
/^stale/i, // Stale bot
|
|
28
|
+
/^mergify/i, // Mergify bot
|
|
29
|
+
/^auto-merge/i, // Auto-merge bots
|
|
30
|
+
/^ci-bot/i, // CI bots
|
|
31
|
+
/^deploy-bot/i, // Deploy bots
|
|
32
|
+
],
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Known bot usernames (exact matches)
|
|
37
|
+
*/
|
|
38
|
+
const KNOWN_BOTS = new Set([
|
|
39
|
+
'dependabot[bot]',
|
|
40
|
+
'renovate[bot]',
|
|
41
|
+
'github-actions[bot]',
|
|
42
|
+
'codecov[bot]',
|
|
43
|
+
'sonarqubecloud[bot]',
|
|
44
|
+
'sonarcloud[bot]',
|
|
45
|
+
'snyk[bot]',
|
|
46
|
+
'greenkeeper[bot]',
|
|
47
|
+
'semantic-release-bot',
|
|
48
|
+
'allcontributors[bot]',
|
|
49
|
+
'stale[bot]',
|
|
50
|
+
'mergify[bot]',
|
|
51
|
+
'auto-merge-bot',
|
|
52
|
+
'ci-bot',
|
|
53
|
+
'deploy-bot',
|
|
54
|
+
'vercel[bot]',
|
|
55
|
+
'netlify[bot]',
|
|
56
|
+
'heroku[bot]',
|
|
57
|
+
'circleci[bot]',
|
|
58
|
+
'travis[bot]',
|
|
59
|
+
'jenkins[bot]',
|
|
60
|
+
'azure-pipelines[bot]',
|
|
61
|
+
'gitpod[bot]',
|
|
62
|
+
'codesandbox[bot]',
|
|
63
|
+
'deepsource[bot]',
|
|
64
|
+
'codeclimate[bot]',
|
|
65
|
+
'codebeat[bot]',
|
|
66
|
+
'codacy[bot]',
|
|
67
|
+
'houndci-bot',
|
|
68
|
+
'danger[bot]',
|
|
69
|
+
'prettier[bot]',
|
|
70
|
+
'eslint[bot]',
|
|
71
|
+
'typescript[bot]',
|
|
72
|
+
]);
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Check if a username indicates a bot account
|
|
76
|
+
* @param {string} username - GitHub username to check
|
|
77
|
+
* @returns {boolean} True if username appears to be a bot
|
|
78
|
+
*/
|
|
79
|
+
function isBotUsername(username) {
|
|
80
|
+
if (!username || typeof username !== 'string') {
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const normalizedUsername = username.toLowerCase().trim();
|
|
85
|
+
|
|
86
|
+
// Check exact matches first (most reliable)
|
|
87
|
+
if (KNOWN_BOTS.has(username) || KNOWN_BOTS.has(normalizedUsername)) {
|
|
88
|
+
return true;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Check username patterns
|
|
92
|
+
return BOT_PATTERNS.usernames.some((pattern) => pattern.test(username));
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Bot detection for a comment based only on username
|
|
97
|
+
* @param {Object} comment - Comment object with user and body properties
|
|
98
|
+
* @returns {boolean} True if comment appears to be from a bot
|
|
99
|
+
*/
|
|
100
|
+
function isBotComment(comment) {
|
|
101
|
+
if (!comment) {
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Only check username (most reliable indicator)
|
|
106
|
+
const username = comment.user?.login || comment.author_login || comment.author;
|
|
107
|
+
return username ? isBotUsername(username) : false;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Filter out bot comments from an array of comments
|
|
112
|
+
* @param {Array<Object>} comments - Array of comment objects
|
|
113
|
+
* @returns {Array<Object>} Filtered array with bot comments removed
|
|
114
|
+
*/
|
|
115
|
+
export function filterBotComments(comments) {
|
|
116
|
+
if (!Array.isArray(comments)) {
|
|
117
|
+
return [];
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const filtered = comments.filter((comment) => !isBotComment(comment));
|
|
121
|
+
|
|
122
|
+
return filtered;
|
|
123
|
+
}
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI Utilities for PR History Analysis
|
|
3
|
+
*
|
|
4
|
+
* Provides utility functions for GitHub repository detection,
|
|
5
|
+
* project path handling, and CLI integration.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { execSync } from 'child_process';
|
|
9
|
+
import fs from 'node:fs';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
import chalk from 'chalk';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Detect GitHub repository from git remote origin
|
|
15
|
+
* @param {string} projectPath - Project directory path
|
|
16
|
+
* @returns {string|null} Repository in format "owner/repo" or null if not found
|
|
17
|
+
*/
|
|
18
|
+
function detectGitHubRepository(projectPath) {
|
|
19
|
+
try {
|
|
20
|
+
const gitDir = path.join(projectPath, '.git');
|
|
21
|
+
if (!fs.existsSync(gitDir)) {
|
|
22
|
+
return null;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Get remote origin URL
|
|
26
|
+
const remoteUrl = execSync('git remote get-url origin', {
|
|
27
|
+
cwd: projectPath,
|
|
28
|
+
encoding: 'utf8',
|
|
29
|
+
}).trim();
|
|
30
|
+
|
|
31
|
+
// Parse GitHub repository from various URL formats
|
|
32
|
+
const patterns = [
|
|
33
|
+
/github\.com[:/]([^/]+)\/([^/.]+)(?:\.git)?$/, // SSH or HTTPS
|
|
34
|
+
/github\.com\/([^/]+)\/([^/.]+)(?:\.git)?$/, // HTTPS
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
for (const pattern of patterns) {
|
|
38
|
+
const match = remoteUrl.match(pattern);
|
|
39
|
+
if (match) {
|
|
40
|
+
return `${match[1]}/${match[2]}`;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return null;
|
|
45
|
+
} catch (error) {
|
|
46
|
+
console.warn(chalk.yellow(`Warning: Could not detect GitHub repository: ${error.message}`));
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Get GitHub token from options or environment
|
|
53
|
+
* @param {Object} options - CLI options
|
|
54
|
+
* @returns {string|null} GitHub token or null if not found
|
|
55
|
+
*/
|
|
56
|
+
function getGitHubToken(options) {
|
|
57
|
+
return options.token || process.env.GITHUB_TOKEN || process.env.GH_TOKEN || null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Resolve project path following the same strategy as embeddings.js
|
|
62
|
+
* @param {string} directory - Directory option from CLI
|
|
63
|
+
* @returns {string} Resolved project path
|
|
64
|
+
*/
|
|
65
|
+
function resolveProjectPath(directory) {
|
|
66
|
+
return directory ? path.resolve(directory) : process.cwd();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Validate GitHub repository format
|
|
71
|
+
* @param {string} repository - Repository string
|
|
72
|
+
* @returns {boolean} True if valid format
|
|
73
|
+
*/
|
|
74
|
+
function isValidRepositoryFormat(repository) {
|
|
75
|
+
if (!repository || typeof repository !== 'string') {
|
|
76
|
+
return false;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Check for "owner/repo" format
|
|
80
|
+
const parts = repository.split('/');
|
|
81
|
+
return parts.length === 2 && parts[0].length > 0 && parts[1].length > 0;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Get repository and project path from CLI options
|
|
86
|
+
* @param {Object} options - CLI options
|
|
87
|
+
* @returns {Object} Object with repository and projectPath
|
|
88
|
+
*/
|
|
89
|
+
export function getRepositoryAndProjectPath(options) {
|
|
90
|
+
// Determine project path using the same strategy as embeddings.js
|
|
91
|
+
const projectPath = resolveProjectPath(options.directory);
|
|
92
|
+
|
|
93
|
+
// Determine repository
|
|
94
|
+
let repository = options.repository;
|
|
95
|
+
if (!repository) {
|
|
96
|
+
repository = detectGitHubRepository(projectPath);
|
|
97
|
+
if (!repository) {
|
|
98
|
+
throw new Error(
|
|
99
|
+
'Could not detect GitHub repository. Please specify repository with --repository option or ensure you are in a Git repository with GitHub remote.'
|
|
100
|
+
);
|
|
101
|
+
}
|
|
102
|
+
console.log(chalk.green(`Auto-detected repository: ${repository}`));
|
|
103
|
+
} else {
|
|
104
|
+
if (!isValidRepositoryFormat(repository)) {
|
|
105
|
+
throw new Error('Invalid repository format. Please use "owner/repo" format.');
|
|
106
|
+
}
|
|
107
|
+
console.log(chalk.cyan(`Using specified repository: ${repository}`));
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return { repository, projectPath };
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Validate GitHub token
|
|
115
|
+
* @param {Object} options - CLI options
|
|
116
|
+
* @returns {string} GitHub token
|
|
117
|
+
* @throws {Error} If token is not found
|
|
118
|
+
*/
|
|
119
|
+
export function validateGitHubToken(options) {
|
|
120
|
+
const token = getGitHubToken(options);
|
|
121
|
+
if (!token) {
|
|
122
|
+
throw new Error('GitHub token is required. Please provide token with --token option or set GITHUB_TOKEN environment variable.');
|
|
123
|
+
}
|
|
124
|
+
return token;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Display progress information
|
|
129
|
+
* @param {Object} progress - Progress object
|
|
130
|
+
* @param {boolean} verbose - Whether to show verbose output
|
|
131
|
+
*/
|
|
132
|
+
export function displayProgress(progress, verbose) {
|
|
133
|
+
if (verbose) {
|
|
134
|
+
console.log(chalk.blue(`[${progress.stage}] ${progress.message} (${progress.current}/${progress.total})`));
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Display analysis results summary
|
|
140
|
+
* @param {Object} results - Analysis results
|
|
141
|
+
* @param {number} duration - Duration in seconds
|
|
142
|
+
*/
|
|
143
|
+
export function displayAnalysisResults(results, duration) {
|
|
144
|
+
console.log(chalk.green(`\nAnalysis completed in ${duration}s`));
|
|
145
|
+
console.log(chalk.green(`Repository: ${results.repository}`));
|
|
146
|
+
console.log(chalk.green(`Total PRs: ${results.total_prs}`));
|
|
147
|
+
console.log(chalk.green(`Total Comments: ${results.total_comments}`));
|
|
148
|
+
|
|
149
|
+
if (results.patterns && results.patterns.length > 0) {
|
|
150
|
+
console.log(chalk.blue('\nTop Patterns:'));
|
|
151
|
+
results.patterns.slice(0, 10).forEach((pattern) => {
|
|
152
|
+
console.log(chalk.cyan(` ${pattern.type}: ${pattern.name} (${pattern.count} - ${pattern.percentage}%)`));
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (results.top_authors && results.top_authors.length > 0) {
|
|
157
|
+
console.log(chalk.blue('\nTop Authors:'));
|
|
158
|
+
results.top_authors.slice(0, 5).forEach((author) => {
|
|
159
|
+
console.log(chalk.cyan(` ${author.author}: ${author.count} comments`));
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Display status information
|
|
166
|
+
* @param {Object} status - Status object
|
|
167
|
+
*/
|
|
168
|
+
export function displayStatus(status) {
|
|
169
|
+
console.log(chalk.blue('\nAnalysis Status:'));
|
|
170
|
+
console.log(chalk.cyan(`Repository: ${status.repository}`));
|
|
171
|
+
console.log(chalk.cyan(`Status: ${status.status}`));
|
|
172
|
+
|
|
173
|
+
if (status.status !== 'not_started') {
|
|
174
|
+
console.log(chalk.cyan(`PRs: ${status.prs}`));
|
|
175
|
+
console.log(chalk.cyan(`Comments: ${status.comments}`));
|
|
176
|
+
|
|
177
|
+
if (status.failed_comments > 0) {
|
|
178
|
+
console.log(chalk.yellow(`Failed Comments: ${status.failed_comments}`));
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (status.errors > 0) {
|
|
182
|
+
console.log(chalk.red(`Errors: ${status.errors}`));
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if (status.elapsed) {
|
|
186
|
+
console.log(chalk.cyan(`Elapsed Time: ${status.elapsed}`));
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Display database statistics
|
|
193
|
+
* @param {Object} stats - Database statistics
|
|
194
|
+
* @param {boolean} hasComments - Whether comments exist in database
|
|
195
|
+
*/
|
|
196
|
+
export function displayDatabaseStats(stats, hasComments) {
|
|
197
|
+
if (hasComments) {
|
|
198
|
+
console.log(chalk.blue('\nStored Data:'));
|
|
199
|
+
console.log(chalk.cyan(`Total Comments in Database: ${stats.total_comments}`));
|
|
200
|
+
console.log(chalk.cyan(`Comment Types: ${Object.keys(stats.comment_types).join(', ')}`));
|
|
201
|
+
} else {
|
|
202
|
+
console.log(chalk.yellow('\nNo PR comments found in database for this repository.'));
|
|
203
|
+
}
|
|
204
|
+
}
|