codeflow-hook 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/rag.js ADDED
@@ -0,0 +1,312 @@
1
+ // RAG (Retrieval-Augmented Generation) implementation for codeflow-hook
2
+ // Provides local vector storage and semantic search capabilities
3
+
4
+ import fs from 'fs';
5
+ import path from 'path';
6
+ import axios from 'axios';
7
+ import crypto from 'crypto';
8
+
9
+ // Vector Store: Simple file-based implementation with cosine similarity
10
+ export class LocalVectorStore {
11
+ constructor(projectRoot) {
12
+ this.storePath = path.join(projectRoot, '.codeflow', 'index');
13
+ this.ensureDirectoryExists();
14
+ }
15
+
16
+ ensureDirectoryExists() {
17
+ if (!fs.existsSync(this.storePath)) {
18
+ fs.mkdirSync(this.storePath, { recursive: true });
19
+ }
20
+ }
21
+
22
+ // Save vectors and metadata to files
23
+ async saveVectors(vectors, metadata) {
24
+ const vectorFile = path.join(this.storePath, 'vectors.json');
25
+ const metadataFile = path.join(this.storePath, 'metadata.json');
26
+
27
+ fs.writeFileSync(vectorFile, JSON.stringify(vectors));
28
+ fs.writeFileSync(metadataFile, JSON.stringify(metadata));
29
+ }
30
+
31
+ // Load vectors and metadata from files
32
+ async loadVectors() {
33
+ const vectorFile = path.join(this.storePath, 'vectors.json');
34
+ const metadataFile = path.join(this.storePath, 'metadata.json');
35
+
36
+ if (!fs.existsSync(vectorFile) || !fs.existsSync(metadataFile)) {
37
+ return { vectors: [], metadata: [] };
38
+ }
39
+
40
+ const vectors = JSON.parse(fs.readFileSync(vectorFile, 'utf8'));
41
+ const metadata = JSON.parse(fs.readFileSync(metadataFile, 'utf8'));
42
+
43
+ return { vectors, metadata };
44
+ }
45
+
46
+ // Search for top-k similar vectors using cosine similarity
47
+ search(queryVector, k = 3) {
48
+ const { vectors, metadata } = this.loadVectors();
49
+
50
+ if (vectors.length === 0) {
51
+ return [];
52
+ }
53
+
54
+ // Calculate cosine similarity for all vectors
55
+ const similarities = vectors.map((vector, index) => ({
56
+ similarity: cosineSimilarity(queryVector, vector),
57
+ metadata: metadata[index],
58
+ index
59
+ }));
60
+
61
+ // Sort by similarity (descending) and return top-k
62
+ return similarities
63
+ .sort((a, b) => b.similarity - a.similarity)
64
+ .slice(0, k)
65
+ .map(item => ({
66
+ similarity: item.similarity,
67
+ ...item.metadata
68
+ }));
69
+ }
70
+ }
71
+
72
+ // Simple implementation of cosine similarity
73
+ function cosineSimilarity(vecA, vecB) {
74
+ if (vecA.length !== vecB.length) {
75
+ throw new Error('Vectors must be of same length');
76
+ }
77
+
78
+ let dotProduct = 0;
79
+ let normA = 0;
80
+ let normB = 0;
81
+
82
+ for (let i = 0; i < vecA.length; i++) {
83
+ dotProduct += vecA[i] * vecB[i];
84
+ normA += vecA[i] * vecA[i];
85
+ normB += vecB[i] * vecB[i];
86
+ }
87
+
88
+ normA = Math.sqrt(normA);
89
+ normB = Math.sqrt(normB);
90
+
91
+ if (normA === 0 || normB === 0) {
92
+ return 0;
93
+ }
94
+
95
+ return dotProduct / (normA * normB);
96
+ }
97
+
98
+ // Embedding generation using Gemini API
99
+ export class EmbeddingGenerator {
100
+ constructor(config) {
101
+ this.config = config;
102
+ }
103
+
104
+ async generateEmbedding(text) {
105
+ try {
106
+ const url = 'https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent?key=' + this.config.apiKey;
107
+
108
+ const response = await axios.post(url, {
109
+ content: {
110
+ parts: [{
111
+ text: text
112
+ }]
113
+ }
114
+ }, {
115
+ headers: {
116
+ 'Content-Type': 'application/json'
117
+ }
118
+ });
119
+
120
+ return response.data.embedding.values;
121
+ } catch (error) {
122
+ throw new Error(`Embedding generation failed: ${error.message}`);
123
+ }
124
+ }
125
+
126
+ // Generate embeddings for batch of texts
127
+ async generateBatchEmbeddings(texts) {
128
+ const embeddings = [];
129
+ for (const text of texts) {
130
+ embeddings.push(await this.generateEmbedding(text));
131
+ }
132
+ return embeddings;
133
+ }
134
+ }
135
+
136
+ // Text chunking utilities
137
+ export class TextChunker {
138
+ static chunkText(text, maxChunkSize = 1000, overlap = 200) {
139
+ const chunks = [];
140
+ let start = 0;
141
+
142
+ while (start < text.length) {
143
+ let end = start + maxChunkSize;
144
+
145
+ // Try to find a good breaking point (sentence end)
146
+ if (end < text.length) {
147
+ const lastPeriod = text.lastIndexOf('.', end);
148
+ const lastNewline = text.lastIndexOf('\n', end);
149
+
150
+ if (lastPeriod > start && lastPeriod > end - 100) {
151
+ end = lastPeriod + 1;
152
+ } else if (lastNewline > start && lastNewline > end - 100) {
153
+ end = lastNewline;
154
+ }
155
+ }
156
+
157
+ chunks.push(text.slice(start, Math.min(end, text.length)));
158
+ start = Math.max(start + maxChunkSize - overlap, 0);
159
+ }
160
+
161
+ return chunks;
162
+ }
163
+
164
+ static extractMetadata(filePath, content) {
165
+ const fileName = path.basename(filePath);
166
+ const extension = path.extname(filePath);
167
+ const relativePath = filePath.replace(process.cwd() + path.sep, '');
168
+
169
+ return {
170
+ filePath: relativePath,
171
+ fileName,
172
+ extension,
173
+ contentLength: content.length,
174
+ id: crypto.createHash('md5').update(relativePath + content).digest('hex')
175
+ };
176
+ }
177
+ }
178
+
179
+ // Main RAG indexer function
180
+ export async function indexProject(config, projectRoot = process.cwd()) {
181
+ const store = new LocalVectorStore(projectRoot);
182
+ const embedder = new EmbeddingGenerator(config);
183
+
184
+ // Identify key project files
185
+ const keyFiles = await findKeyFiles(projectRoot);
186
+
187
+ const allChunks = [];
188
+ const allMetadata = [];
189
+
190
+ for (const filePath of keyFiles) {
191
+ try {
192
+ const content = fs.readFileSync(filePath, 'utf8');
193
+ const chunks = TextChunker.chunkText(content);
194
+
195
+ for (let i = 0; i < chunks.length; i++) {
196
+ allChunks.push(chunks[i]);
197
+ allMetadata.push({
198
+ ...TextChunker.extractMetadata(filePath, chunks[i]),
199
+ chunkIndex: i,
200
+ totalChunks: chunks.length
201
+ });
202
+ }
203
+ } catch (error) {
204
+ console.warn(`Skipping ${filePath}: ${error.message}`);
205
+ }
206
+ }
207
+
208
+ // Generate embeddings
209
+ const embeddings = await embedder.generateBatchEmbeddings(allChunks);
210
+
211
+ // Save to vector store
212
+ await store.saveVectors(embeddings, allMetadata);
213
+
214
+ return {
215
+ indexedFiles: keyFiles.length,
216
+ totalChunks: allChunks.length
217
+ };
218
+ }
219
+
220
+ // Find key project files for indexing
221
+ async function findKeyFiles(projectRoot) {
222
+ const keyFiles = [];
223
+ const keyPatterns = [
224
+ 'README.md',
225
+ 'ARCHITECTURE.md',
226
+ 'package.json',
227
+ 'tsconfig.json',
228
+ 'jest.config.js',
229
+ 'jest.config.cjs',
230
+ 'webpack.config.js',
231
+ 'Dockerfile'
232
+ ];
233
+
234
+ // Check for exact key files
235
+ for (const pattern of keyPatterns) {
236
+ const fullPath = path.join(projectRoot, pattern);
237
+ if (fs.existsSync(fullPath)) {
238
+ keyFiles.push(fullPath);
239
+ }
240
+ }
241
+
242
+ // Find source files
243
+ const sourceDirs = ['src', 'components', 'lib', 'utils', 'types'];
244
+ for (const dir of sourceDirs) {
245
+ const fullDir = path.join(projectRoot, dir);
246
+ if (fs.existsSync(fullDir)) {
247
+ keyFiles.push(...findSourceFiles(fullDir));
248
+ }
249
+ }
250
+
251
+ // Find interface/config files in root
252
+ const configPatterns = ['*.ts', '*.js', '*.json'].filter(ext => {
253
+ return fs.readdirSync(projectRoot)
254
+ .filter(file => file.endsWith(ext))
255
+ .filter(file => !keyFiles.some(kf => kf.endsWith(file)))
256
+ .map(file => path.join(projectRoot, file));
257
+ });
258
+
259
+ keyFiles.push(...configPatterns.flat());
260
+
261
+ return [...new Set(keyFiles)]; // Remove duplicates
262
+ }
263
+
264
+ function findSourceFiles(dir) {
265
+ const files = [];
266
+ const extensions = ['.ts', '.tsx', '.js', '.jsx', '.json', '.md'];
267
+
268
+ const items = fs.readdirSync(dir);
269
+
270
+ for (const item of items) {
271
+ const fullPath = path.join(dir, item);
272
+ const stat = fs.statSync(fullPath);
273
+
274
+ if (stat.isDirectory() && !item.startsWith('.') && item !== 'node_modules') {
275
+ files.push(...findSourceFiles(fullPath));
276
+ } else if (stat.isFile() && extensions.some(ext => item.endsWith(ext))) {
277
+ files.push(fullPath);
278
+ }
279
+ }
280
+
281
+ return files;
282
+ }
283
+
284
+ // Context retrieval function
285
+ export async function retrieveContext(codeBlock, config, projectRoot = process.cwd(), k = 3) {
286
+ const store = new LocalVectorStore(projectRoot);
287
+
288
+ // Check if index exists
289
+ const { vectors } = store.loadVectors();
290
+ if (vectors.length === 0) {
291
+ return ''; // No context available
292
+ }
293
+
294
+ const embedder = new EmbeddingGenerator(config);
295
+
296
+ try {
297
+ // Generate embedding for the code block
298
+ const queryVector = await embedder.generateEmbedding(codeBlock);
299
+
300
+ // Search for similar chunks
301
+ const results = store.search(queryVector, k);
302
+
303
+ // Combine top results into context string
304
+ return results
305
+ .map(result => `From ${result.filePath}:\n${result.content}`)
306
+ .join('\n\n---\n\n');
307
+
308
+ } catch (error) {
309
+ console.warn(`Context retrieval failed: ${error.message}`);
310
+ return '';
311
+ }
312
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codeflow-hook",
3
- "version": "1.1.0",
3
+ "version": "1.3.0",
4
4
  "description": "An interactive CI/CD simulator and lightweight pre-push code reviewer using Gemini AI",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -31,6 +31,9 @@
31
31
  "chalk": "^5.3.0",
32
32
  "ora": "^7.0.1"
33
33
  },
34
+ "devDependencies": {
35
+ "@types/node": "^18.0.0"
36
+ },
34
37
  "engines": {
35
38
  "node": ">=16.0.0"
36
39
  }