codecritique 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +1145 -0
  3. package/package.json +98 -0
  4. package/src/content-retrieval.js +747 -0
  5. package/src/custom-documents.js +597 -0
  6. package/src/embeddings/cache-manager.js +364 -0
  7. package/src/embeddings/constants.js +40 -0
  8. package/src/embeddings/database.js +921 -0
  9. package/src/embeddings/errors.js +208 -0
  10. package/src/embeddings/factory.js +447 -0
  11. package/src/embeddings/file-processor.js +851 -0
  12. package/src/embeddings/model-manager.js +337 -0
  13. package/src/embeddings/similarity-calculator.js +97 -0
  14. package/src/embeddings/types.js +113 -0
  15. package/src/feedback-loader.js +384 -0
  16. package/src/index.js +1418 -0
  17. package/src/llm.js +123 -0
  18. package/src/pr-history/analyzer.js +579 -0
  19. package/src/pr-history/bot-detector.js +123 -0
  20. package/src/pr-history/cli-utils.js +204 -0
  21. package/src/pr-history/comment-processor.js +549 -0
  22. package/src/pr-history/database.js +819 -0
  23. package/src/pr-history/github-client.js +629 -0
  24. package/src/project-analyzer.js +955 -0
  25. package/src/rag-analyzer.js +2764 -0
  26. package/src/rag-review.js +566 -0
  27. package/src/technology-keywords.json +753 -0
  28. package/src/utils/command.js +48 -0
  29. package/src/utils/constants.js +263 -0
  30. package/src/utils/context-inference.js +364 -0
  31. package/src/utils/document-detection.js +105 -0
  32. package/src/utils/file-validation.js +271 -0
  33. package/src/utils/git.js +232 -0
  34. package/src/utils/language-detection.js +170 -0
  35. package/src/utils/logging.js +24 -0
  36. package/src/utils/markdown.js +132 -0
  37. package/src/utils/mobilebert-tokenizer.js +141 -0
  38. package/src/utils/pr-chunking.js +276 -0
  39. package/src/utils/string-utils.js +28 -0
  40. package/src/zero-shot-classifier-open.js +392 -0
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Bot Detection Utility
3
+ *
4
+ * Detects and filters out bot comments from PR analysis.
5
+ * Bots provide automated feedback that isn't useful for human review pattern analysis.
6
+ */
7
+
8
+ /**
9
+ * Common bot patterns found in GitHub usernames and comment content
10
+ */
11
+ const BOT_PATTERNS = {
12
+ // Username patterns
13
+ usernames: [
14
+ /\[bot\]$/i, // e.g., sonarqubecloud[bot], dependabot[bot]
15
+ /^bot-/i, // e.g., bot-reviewer
16
+ /-bot$/i, // e.g., review-bot
17
+ /^dependabot/i, // Dependabot variations
18
+ /^renovate/i, // Renovate bot variations
19
+ /^github-actions/i, // GitHub Actions bot
20
+ /^codecov/i, // Codecov bot
21
+ /^sonarcloud/i, // SonarCloud variations
22
+ /^sonarqube/i, // SonarQube variations
23
+ /^snyk/i, // Snyk security bot
24
+ /^greenkeeper/i, // Greenkeeper bot
25
+ /^semantic-release/i, // Semantic release bot
26
+ /^allcontributors/i, // All contributors bot
27
+ /^stale/i, // Stale bot
28
+ /^mergify/i, // Mergify bot
29
+ /^auto-merge/i, // Auto-merge bots
30
+ /^ci-bot/i, // CI bots
31
+ /^deploy-bot/i, // Deploy bots
32
+ ],
33
+ };
34
+
35
+ /**
36
+ * Known bot usernames (exact matches)
37
+ */
38
+ const KNOWN_BOTS = new Set([
39
+ 'dependabot[bot]',
40
+ 'renovate[bot]',
41
+ 'github-actions[bot]',
42
+ 'codecov[bot]',
43
+ 'sonarqubecloud[bot]',
44
+ 'sonarcloud[bot]',
45
+ 'snyk[bot]',
46
+ 'greenkeeper[bot]',
47
+ 'semantic-release-bot',
48
+ 'allcontributors[bot]',
49
+ 'stale[bot]',
50
+ 'mergify[bot]',
51
+ 'auto-merge-bot',
52
+ 'ci-bot',
53
+ 'deploy-bot',
54
+ 'vercel[bot]',
55
+ 'netlify[bot]',
56
+ 'heroku[bot]',
57
+ 'circleci[bot]',
58
+ 'travis[bot]',
59
+ 'jenkins[bot]',
60
+ 'azure-pipelines[bot]',
61
+ 'gitpod[bot]',
62
+ 'codesandbox[bot]',
63
+ 'deepsource[bot]',
64
+ 'codeclimate[bot]',
65
+ 'codebeat[bot]',
66
+ 'codacy[bot]',
67
+ 'houndci-bot',
68
+ 'danger[bot]',
69
+ 'prettier[bot]',
70
+ 'eslint[bot]',
71
+ 'typescript[bot]',
72
+ ]);
73
+
74
+ /**
75
+ * Check if a username indicates a bot account
76
+ * @param {string} username - GitHub username to check
77
+ * @returns {boolean} True if username appears to be a bot
78
+ */
79
+ function isBotUsername(username) {
80
+ if (!username || typeof username !== 'string') {
81
+ return false;
82
+ }
83
+
84
+ const normalizedUsername = username.toLowerCase().trim();
85
+
86
+ // Check exact matches first (most reliable)
87
+ if (KNOWN_BOTS.has(username) || KNOWN_BOTS.has(normalizedUsername)) {
88
+ return true;
89
+ }
90
+
91
+ // Check username patterns
92
+ return BOT_PATTERNS.usernames.some((pattern) => pattern.test(username));
93
+ }
94
+
95
+ /**
96
+ * Bot detection for a comment based only on username
97
+ * @param {Object} comment - Comment object with user and body properties
98
+ * @returns {boolean} True if comment appears to be from a bot
99
+ */
100
+ function isBotComment(comment) {
101
+ if (!comment) {
102
+ return false;
103
+ }
104
+
105
+ // Only check username (most reliable indicator)
106
+ const username = comment.user?.login || comment.author_login || comment.author;
107
+ return username ? isBotUsername(username) : false;
108
+ }
109
+
110
+ /**
111
+ * Filter out bot comments from an array of comments
112
+ * @param {Array<Object>} comments - Array of comment objects
113
+ * @returns {Array<Object>} Filtered array with bot comments removed
114
+ */
115
+ export function filterBotComments(comments) {
116
+ if (!Array.isArray(comments)) {
117
+ return [];
118
+ }
119
+
120
+ const filtered = comments.filter((comment) => !isBotComment(comment));
121
+
122
+ return filtered;
123
+ }
@@ -0,0 +1,204 @@
1
+ /**
2
+ * CLI Utilities for PR History Analysis
3
+ *
4
+ * Provides utility functions for GitHub repository detection,
5
+ * project path handling, and CLI integration.
6
+ */
7
+
8
+ import { execSync } from 'child_process';
9
+ import fs from 'node:fs';
10
+ import path from 'node:path';
11
+ import chalk from 'chalk';
12
+
13
+ /**
14
+ * Detect GitHub repository from git remote origin
15
+ * @param {string} projectPath - Project directory path
16
+ * @returns {string|null} Repository in format "owner/repo" or null if not found
17
+ */
18
+ function detectGitHubRepository(projectPath) {
19
+ try {
20
+ const gitDir = path.join(projectPath, '.git');
21
+ if (!fs.existsSync(gitDir)) {
22
+ return null;
23
+ }
24
+
25
+ // Get remote origin URL
26
+ const remoteUrl = execSync('git remote get-url origin', {
27
+ cwd: projectPath,
28
+ encoding: 'utf8',
29
+ }).trim();
30
+
31
+ // Parse GitHub repository from various URL formats
32
+ const patterns = [
33
+ /github\.com[:/]([^/]+)\/([^/.]+)(?:\.git)?$/, // SSH or HTTPS
34
+ /github\.com\/([^/]+)\/([^/.]+)(?:\.git)?$/, // HTTPS
35
+ ];
36
+
37
+ for (const pattern of patterns) {
38
+ const match = remoteUrl.match(pattern);
39
+ if (match) {
40
+ return `${match[1]}/${match[2]}`;
41
+ }
42
+ }
43
+
44
+ return null;
45
+ } catch (error) {
46
+ console.warn(chalk.yellow(`Warning: Could not detect GitHub repository: ${error.message}`));
47
+ return null;
48
+ }
49
+ }
50
+
51
+ /**
52
+ * Get GitHub token from options or environment
53
+ * @param {Object} options - CLI options
54
+ * @returns {string|null} GitHub token or null if not found
55
+ */
56
+ function getGitHubToken(options) {
57
+ return options.token || process.env.GITHUB_TOKEN || process.env.GH_TOKEN || null;
58
+ }
59
+
60
+ /**
61
+ * Resolve project path following the same strategy as embeddings.js
62
+ * @param {string} directory - Directory option from CLI
63
+ * @returns {string} Resolved project path
64
+ */
65
+ function resolveProjectPath(directory) {
66
+ return directory ? path.resolve(directory) : process.cwd();
67
+ }
68
+
69
+ /**
70
+ * Validate GitHub repository format
71
+ * @param {string} repository - Repository string
72
+ * @returns {boolean} True if valid format
73
+ */
74
+ function isValidRepositoryFormat(repository) {
75
+ if (!repository || typeof repository !== 'string') {
76
+ return false;
77
+ }
78
+
79
+ // Check for "owner/repo" format
80
+ const parts = repository.split('/');
81
+ return parts.length === 2 && parts[0].length > 0 && parts[1].length > 0;
82
+ }
83
+
84
+ /**
85
+ * Get repository and project path from CLI options
86
+ * @param {Object} options - CLI options
87
+ * @returns {Object} Object with repository and projectPath
88
+ */
89
+ export function getRepositoryAndProjectPath(options) {
90
+ // Determine project path using the same strategy as embeddings.js
91
+ const projectPath = resolveProjectPath(options.directory);
92
+
93
+ // Determine repository
94
+ let repository = options.repository;
95
+ if (!repository) {
96
+ repository = detectGitHubRepository(projectPath);
97
+ if (!repository) {
98
+ throw new Error(
99
+ 'Could not detect GitHub repository. Please specify repository with --repository option or ensure you are in a Git repository with GitHub remote.'
100
+ );
101
+ }
102
+ console.log(chalk.green(`Auto-detected repository: ${repository}`));
103
+ } else {
104
+ if (!isValidRepositoryFormat(repository)) {
105
+ throw new Error('Invalid repository format. Please use "owner/repo" format.');
106
+ }
107
+ console.log(chalk.cyan(`Using specified repository: ${repository}`));
108
+ }
109
+
110
+ return { repository, projectPath };
111
+ }
112
+
113
+ /**
114
+ * Validate GitHub token
115
+ * @param {Object} options - CLI options
116
+ * @returns {string} GitHub token
117
+ * @throws {Error} If token is not found
118
+ */
119
+ export function validateGitHubToken(options) {
120
+ const token = getGitHubToken(options);
121
+ if (!token) {
122
+ throw new Error('GitHub token is required. Please provide token with --token option or set GITHUB_TOKEN environment variable.');
123
+ }
124
+ return token;
125
+ }
126
+
127
+ /**
128
+ * Display progress information
129
+ * @param {Object} progress - Progress object
130
+ * @param {boolean} verbose - Whether to show verbose output
131
+ */
132
+ export function displayProgress(progress, verbose) {
133
+ if (verbose) {
134
+ console.log(chalk.blue(`[${progress.stage}] ${progress.message} (${progress.current}/${progress.total})`));
135
+ }
136
+ }
137
+
138
+ /**
139
+ * Display analysis results summary
140
+ * @param {Object} results - Analysis results
141
+ * @param {number} duration - Duration in seconds
142
+ */
143
+ export function displayAnalysisResults(results, duration) {
144
+ console.log(chalk.green(`\nAnalysis completed in ${duration}s`));
145
+ console.log(chalk.green(`Repository: ${results.repository}`));
146
+ console.log(chalk.green(`Total PRs: ${results.total_prs}`));
147
+ console.log(chalk.green(`Total Comments: ${results.total_comments}`));
148
+
149
+ if (results.patterns && results.patterns.length > 0) {
150
+ console.log(chalk.blue('\nTop Patterns:'));
151
+ results.patterns.slice(0, 10).forEach((pattern) => {
152
+ console.log(chalk.cyan(` ${pattern.type}: ${pattern.name} (${pattern.count} - ${pattern.percentage}%)`));
153
+ });
154
+ }
155
+
156
+ if (results.top_authors && results.top_authors.length > 0) {
157
+ console.log(chalk.blue('\nTop Authors:'));
158
+ results.top_authors.slice(0, 5).forEach((author) => {
159
+ console.log(chalk.cyan(` ${author.author}: ${author.count} comments`));
160
+ });
161
+ }
162
+ }
163
+
164
+ /**
165
+ * Display status information
166
+ * @param {Object} status - Status object
167
+ */
168
+ export function displayStatus(status) {
169
+ console.log(chalk.blue('\nAnalysis Status:'));
170
+ console.log(chalk.cyan(`Repository: ${status.repository}`));
171
+ console.log(chalk.cyan(`Status: ${status.status}`));
172
+
173
+ if (status.status !== 'not_started') {
174
+ console.log(chalk.cyan(`PRs: ${status.prs}`));
175
+ console.log(chalk.cyan(`Comments: ${status.comments}`));
176
+
177
+ if (status.failed_comments > 0) {
178
+ console.log(chalk.yellow(`Failed Comments: ${status.failed_comments}`));
179
+ }
180
+
181
+ if (status.errors > 0) {
182
+ console.log(chalk.red(`Errors: ${status.errors}`));
183
+ }
184
+
185
+ if (status.elapsed) {
186
+ console.log(chalk.cyan(`Elapsed Time: ${status.elapsed}`));
187
+ }
188
+ }
189
+ }
190
+
191
+ /**
192
+ * Display database statistics
193
+ * @param {Object} stats - Database statistics
194
+ * @param {boolean} hasComments - Whether comments exist in database
195
+ */
196
+ export function displayDatabaseStats(stats, hasComments) {
197
+ if (hasComments) {
198
+ console.log(chalk.blue('\nStored Data:'));
199
+ console.log(chalk.cyan(`Total Comments in Database: ${stats.total_comments}`));
200
+ console.log(chalk.cyan(`Comment Types: ${Object.keys(stats.comment_types).join(', ')}`));
201
+ } else {
202
+ console.log(chalk.yellow('\nNo PR comments found in database for this repository.'));
203
+ }
204
+ }