prjct-cli 1.16.0 → 1.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,250 @@
1
+ /**
2
+ * Git Co-Change Analyzer
3
+ *
4
+ * Analyzes git history to find files that frequently change together.
5
+ * If middleware.ts appears in 8 of 10 commits that touch auth.ts,
6
+ * they're a cluster — when one is relevant, include the other.
7
+ *
8
+ * Uses Jaccard similarity: |A ∩ B| / |A ∪ B| for each file pair.
9
+ *
10
+ * Zero API calls — pure math on git log data.
11
+ *
12
+ * @module domain/git-cochange
13
+ * @version 1.0.0
14
+ */
15
+
16
+ import { exec as execCallback } from 'node:child_process'
17
+ import { promisify } from 'node:util'
18
+ import prjctDb from '../storage/database'
19
+
20
+ const exec = promisify(execCallback)
21
+
22
+ // =============================================================================
23
+ // Types
24
+ // =============================================================================
25
+
26
+ /** Co-change matrix: file → { related_file: similarity_score } */
27
+ export type CoChangeMatrix = Record<string, Record<string, number>>
28
+
29
+ export interface CoChangeIndex {
30
+ /** The co-change similarity matrix */
31
+ matrix: CoChangeMatrix
32
+ /** Number of commits analyzed */
33
+ commitsAnalyzed: number
34
+ /** Total unique files seen */
35
+ filesAnalyzed: number
36
+ /** Build timestamp */
37
+ builtAt: string
38
+ }
39
+
40
+ export interface CoChangeScore {
41
+ path: string
42
+ score: number
43
+ }
44
+
45
+ // =============================================================================
46
+ // Constants
47
+ // =============================================================================
48
+
49
+ /** Minimum Jaccard similarity to include in the matrix */
50
+ const MIN_SIMILARITY = 0.1
51
+
52
+ /** Minimum times a file must appear in commits to be included */
53
+ const MIN_FILE_OCCURRENCES = 2
54
+
55
+ /** Maximum number of files in a single commit to consider (skip merges/bulk) */
56
+ const MAX_FILES_PER_COMMIT = 30
57
+
58
+ // =============================================================================
59
+ // Git Log Parsing
60
+ // =============================================================================
61
+
62
+ /**
63
+ * Parse git log to extract commit → files mapping.
64
+ *
65
+ * @param projectPath - Project root path
66
+ * @param maxCommits - Maximum number of commits to analyze (default: 100)
67
+ * @returns Array of file sets, one per commit
68
+ */
69
+ async function parseGitLog(projectPath: string, maxCommits = 100): Promise<Set<string>[]> {
70
+ try {
71
+ const { stdout } = await exec(
72
+ `git log --name-only --pretty=format:'---COMMIT---' -${maxCommits}`,
73
+ { cwd: projectPath, maxBuffer: 10 * 1024 * 1024 }
74
+ )
75
+
76
+ const commits: Set<string>[] = []
77
+ let currentFiles: Set<string> | null = null
78
+
79
+ for (const line of stdout.split('\n')) {
80
+ const trimmed = line.trim()
81
+ if (trimmed === '---COMMIT---') {
82
+ if (currentFiles && currentFiles.size > 0 && currentFiles.size <= MAX_FILES_PER_COMMIT) {
83
+ commits.push(currentFiles)
84
+ }
85
+ currentFiles = new Set()
86
+ } else if (trimmed && currentFiles) {
87
+ // Only include source files (skip binaries, lockfiles, etc.)
88
+ if (isSourceFile(trimmed)) {
89
+ currentFiles.add(trimmed)
90
+ }
91
+ }
92
+ }
93
+
94
+ // Don't forget the last commit
95
+ if (currentFiles && currentFiles.size > 0 && currentFiles.size <= MAX_FILES_PER_COMMIT) {
96
+ commits.push(currentFiles)
97
+ }
98
+
99
+ return commits
100
+ } catch {
101
+ return []
102
+ }
103
+ }
104
+
105
+ /**
106
+ * Check if a path looks like a source file worth tracking.
107
+ */
108
+ function isSourceFile(filePath: string): boolean {
109
+ const sourceExtensions = /\.(ts|tsx|js|jsx|mjs|cjs|py|go|rs|java|cs|rb|php|vue|svelte)$/i
110
+ return sourceExtensions.test(filePath) && !filePath.includes('node_modules/')
111
+ }
112
+
113
+ // =============================================================================
114
+ // Co-Change Matrix
115
+ // =============================================================================
116
+
117
+ /**
118
+ * Build a co-change matrix from git history.
119
+ *
120
+ * For each pair of files that appear in the same commit,
121
+ * calculate Jaccard similarity = |commits_both| / |commits_either|.
122
+ *
123
+ * Performance target: <2 seconds for 100 commits.
124
+ */
125
+ export async function buildMatrix(projectPath: string, maxCommits = 100): Promise<CoChangeIndex> {
126
+ const commitSets = await parseGitLog(projectPath, maxCommits)
127
+
128
+ // Count how many commits each file appears in
129
+ const fileCommitCount = new Map<string, number>()
130
+ // Count how many commits each pair appears in together
131
+ const pairCount = new Map<string, number>()
132
+
133
+ for (const files of commitSets) {
134
+ const fileArray = Array.from(files)
135
+
136
+ for (const file of fileArray) {
137
+ fileCommitCount.set(file, (fileCommitCount.get(file) || 0) + 1)
138
+ }
139
+
140
+ // Count co-occurrences for each pair
141
+ for (let i = 0; i < fileArray.length; i++) {
142
+ for (let j = i + 1; j < fileArray.length; j++) {
143
+ const key = pairKey(fileArray[i], fileArray[j])
144
+ pairCount.set(key, (pairCount.get(key) || 0) + 1)
145
+ }
146
+ }
147
+ }
148
+
149
+ // Build Jaccard similarity matrix
150
+ const matrix: CoChangeMatrix = {}
151
+
152
+ for (const [key, count] of pairCount) {
153
+ const [fileA, fileB] = key.split('\0')
154
+ const countA = fileCommitCount.get(fileA) || 0
155
+ const countB = fileCommitCount.get(fileB) || 0
156
+
157
+ // Skip rare files
158
+ if (countA < MIN_FILE_OCCURRENCES || countB < MIN_FILE_OCCURRENCES) continue
159
+
160
+ // Jaccard similarity
161
+ const unionCount = countA + countB - count
162
+ const similarity = unionCount > 0 ? count / unionCount : 0
163
+
164
+ if (similarity < MIN_SIMILARITY) continue
165
+
166
+ // Store bidirectionally
167
+ if (!matrix[fileA]) matrix[fileA] = {}
168
+ if (!matrix[fileB]) matrix[fileB] = {}
169
+ matrix[fileA][fileB] = similarity
170
+ matrix[fileB][fileA] = similarity
171
+ }
172
+
173
+ return {
174
+ matrix,
175
+ commitsAnalyzed: commitSets.length,
176
+ filesAnalyzed: fileCommitCount.size,
177
+ builtAt: new Date().toISOString(),
178
+ }
179
+ }
180
+
181
+ /** Create a canonical pair key (sorted to avoid duplicates) */
182
+ function pairKey(a: string, b: string): string {
183
+ return a < b ? `${a}\0${b}` : `${b}\0${a}`
184
+ }
185
+
186
+ // =============================================================================
187
+ // Scoring
188
+ // =============================================================================
189
+
190
+ /**
191
+ * Given a set of seed files, find co-changed files and their scores.
192
+ *
193
+ * @param seedFiles - Files already identified as relevant
194
+ * @param index - The co-change index
195
+ * @returns Scored files NOT in the seed set
196
+ */
197
+ export function scoreFromSeeds(seedFiles: string[], index: CoChangeIndex): CoChangeScore[] {
198
+ const seedSet = new Set(seedFiles)
199
+ const scores = new Map<string, number>()
200
+
201
+ for (const seed of seedFiles) {
202
+ const related = index.matrix[seed]
203
+ if (!related) continue
204
+
205
+ for (const [file, similarity] of Object.entries(related)) {
206
+ if (seedSet.has(file)) continue
207
+
208
+ // Take the max similarity across all seed connections
209
+ const existing = scores.get(file) || 0
210
+ if (similarity > existing) {
211
+ scores.set(file, similarity)
212
+ }
213
+ }
214
+ }
215
+
216
+ return Array.from(scores.entries())
217
+ .map(([p, s]) => ({ path: p, score: s }))
218
+ .sort((a, b) => b.score - a.score)
219
+ }
220
+
221
+ // =============================================================================
222
+ // SQLite Persistence
223
+ // =============================================================================
224
+
225
+ const INDEX_KEY = 'cochange-index'
226
+
227
+ export function saveMatrix(projectId: string, index: CoChangeIndex): void {
228
+ prjctDb.setDoc(projectId, INDEX_KEY, index)
229
+ }
230
+
231
+ export function loadMatrix(projectId: string): CoChangeIndex | null {
232
+ return prjctDb.getDoc<CoChangeIndex>(projectId, INDEX_KEY)
233
+ }
234
+
235
+ // =============================================================================
236
+ // High-level API
237
+ // =============================================================================
238
+
239
+ /**
240
+ * Build and persist the co-change matrix for a project.
241
+ */
242
+ export async function indexCoChanges(
243
+ projectPath: string,
244
+ projectId: string,
245
+ maxCommits = 100
246
+ ): Promise<CoChangeIndex> {
247
+ const index = await buildMatrix(projectPath, maxCommits)
248
+ saveMatrix(projectId, index)
249
+ return index
250
+ }
@@ -0,0 +1,315 @@
1
+ /**
2
+ * Import Graph Builder
3
+ *
4
+ * Builds a directed dependency graph from TypeScript/JavaScript imports.
5
+ * Uses the existing imports-tool parser to extract import relationships.
6
+ *
7
+ * When BM25 identifies a file as relevant, this follows the import chain
8
+ * N levels deep (default: 2) to include closely related files.
9
+ *
10
+ * Score = 1 / (depth + 1) for each reachable file.
11
+ * Direct imports get 0.5, 2nd-level imports get 0.33.
12
+ *
13
+ * @module domain/import-graph
14
+ * @version 1.0.0
15
+ */
16
+
17
+ import fs from 'node:fs/promises'
18
+ import path from 'node:path'
19
+ import prjctDb from '../storage/database'
20
+
21
+ // =============================================================================
22
+ // Types
23
+ // =============================================================================
24
+
25
+ /** Adjacency list: file → list of files it imports (resolved paths) */
26
+ export type ImportAdjacency = Record<string, string[]>
27
+
28
+ /** Reverse adjacency: file → list of files that import it */
29
+ export type ReverseAdjacency = Record<string, string[]>
30
+
31
+ export interface ImportGraph {
32
+ /** Forward edges: file imports these files */
33
+ forward: ImportAdjacency
34
+ /** Reverse edges: file is imported by these files */
35
+ reverse: ReverseAdjacency
36
+ /** Total number of files in the graph */
37
+ fileCount: number
38
+ /** Total number of edges */
39
+ edgeCount: number
40
+ /** Build timestamp */
41
+ builtAt: string
42
+ }
43
+
44
+ export interface ImportScore {
45
+ path: string
46
+ score: number
47
+ depth: number
48
+ }
49
+
50
+ // =============================================================================
51
+ // Constants
52
+ // =============================================================================
53
+
54
+ const INDEXABLE_EXTENSIONS = new Set(['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'])
55
+
56
+ const SKIP_DIRS = new Set([
57
+ 'node_modules',
58
+ '.git',
59
+ 'dist',
60
+ 'build',
61
+ 'out',
62
+ '.next',
63
+ 'coverage',
64
+ '.cache',
65
+ '.turbo',
66
+ '.vercel',
67
+ ])
68
+
69
+ /** Extensions to try when resolving imports */
70
+ const RESOLVE_EXTENSIONS = ['', '.ts', '.tsx', '.js', '.jsx', '/index.ts', '/index.js']
71
+
72
+ // =============================================================================
73
+ // Import Extraction (lightweight — no dep on imports-tool for build speed)
74
+ // =============================================================================
75
+
76
+ const IMPORT_REGEX = /(?:import|from)\s+['"]([^'"]+)['"]/g
77
+
78
+ /**
79
+ * Extract internal import paths from file content.
80
+ * Only resolves relative imports (starting with . or @/).
81
+ */
82
+ function extractImportSources(content: string): string[] {
83
+ const sources: string[] = []
84
+ let match: RegExpExecArray | null
85
+ const regex = new RegExp(IMPORT_REGEX.source, 'g')
86
+ while ((match = regex.exec(content)) !== null) {
87
+ const source = match[1]
88
+ if (source.startsWith('.') || source.startsWith('@/')) {
89
+ sources.push(source)
90
+ }
91
+ }
92
+ return sources
93
+ }
94
+
95
+ /**
96
+ * Try to resolve an import source to an actual file path.
97
+ */
98
+ async function resolveImport(
99
+ source: string,
100
+ fromFile: string,
101
+ projectPath: string
102
+ ): Promise<string | null> {
103
+ let basePath: string
104
+
105
+ if (source.startsWith('@/')) {
106
+ basePath = path.join(projectPath, 'src', source.slice(2))
107
+ } else {
108
+ const fromDir = path.dirname(path.join(projectPath, fromFile))
109
+ basePath = path.resolve(fromDir, source)
110
+ }
111
+
112
+ for (const ext of RESOLVE_EXTENSIONS) {
113
+ const fullPath = basePath + ext
114
+ try {
115
+ const stat = await fs.stat(fullPath)
116
+ if (stat.isFile()) {
117
+ return path.relative(projectPath, fullPath)
118
+ }
119
+ } catch {
120
+ // Extension not found, try next
121
+ }
122
+ }
123
+ return null
124
+ }
125
+
126
+ // =============================================================================
127
+ // Graph Building
128
+ // =============================================================================
129
+
130
+ /**
131
+ * Recursively list all indexable files.
132
+ */
133
+ async function listFiles(dir: string, projectPath: string): Promise<string[]> {
134
+ const files: string[] = []
135
+ const entries = await fs.readdir(dir, { withFileTypes: true })
136
+
137
+ for (const entry of entries) {
138
+ if (SKIP_DIRS.has(entry.name)) continue
139
+
140
+ const fullPath = path.join(dir, entry.name)
141
+ if (entry.isDirectory()) {
142
+ files.push(...(await listFiles(fullPath, projectPath)))
143
+ } else if (entry.isFile()) {
144
+ const ext = path.extname(entry.name).toLowerCase()
145
+ if (INDEXABLE_EXTENSIONS.has(ext)) {
146
+ files.push(path.relative(projectPath, fullPath))
147
+ }
148
+ }
149
+ }
150
+ return files
151
+ }
152
+
153
+ /**
154
+ * Build the import graph for a project.
155
+ *
156
+ * Performance target: <3 seconds for 500-file project.
157
+ */
158
+ export async function buildGraph(projectPath: string): Promise<ImportGraph> {
159
+ const files = await listFiles(projectPath, projectPath)
160
+ const forward: ImportAdjacency = {}
161
+ const reverse: ReverseAdjacency = {}
162
+ let edgeCount = 0
163
+
164
+ // Process files in parallel batches
165
+ const BATCH_SIZE = 50
166
+ for (let i = 0; i < files.length; i += BATCH_SIZE) {
167
+ const batch = files.slice(i, i + BATCH_SIZE)
168
+ const results = await Promise.all(
169
+ batch.map(async (filePath) => {
170
+ try {
171
+ const content = await fs.readFile(path.join(projectPath, filePath), 'utf-8')
172
+ const sources = extractImportSources(content)
173
+ const resolved: string[] = []
174
+
175
+ for (const source of sources) {
176
+ const target = await resolveImport(source, filePath, projectPath)
177
+ if (target && target !== filePath) {
178
+ resolved.push(target)
179
+ }
180
+ }
181
+
182
+ return { filePath, imports: resolved }
183
+ } catch {
184
+ return { filePath, imports: [] as string[] }
185
+ }
186
+ })
187
+ )
188
+
189
+ for (const { filePath, imports } of results) {
190
+ if (imports.length === 0) continue
191
+
192
+ forward[filePath] = imports
193
+ edgeCount += imports.length
194
+
195
+ for (const target of imports) {
196
+ if (!reverse[target]) reverse[target] = []
197
+ reverse[target].push(filePath)
198
+ }
199
+ }
200
+ }
201
+
202
+ return {
203
+ forward,
204
+ reverse,
205
+ fileCount: files.length,
206
+ edgeCount,
207
+ builtAt: new Date().toISOString(),
208
+ }
209
+ }
210
+
211
+ // =============================================================================
212
+ // Graph Scoring
213
+ // =============================================================================
214
+
215
+ /**
216
+ * Given a set of seed files (e.g., from BM25), follow import chains
217
+ * and score connected files by proximity.
218
+ *
219
+ * Score = 1 / (depth + 1):
220
+ * - Seed file itself: not scored (already scored by BM25)
221
+ * - Direct import/importer: 0.5 (depth=1)
222
+ * - 2nd-level: 0.33 (depth=2)
223
+ *
224
+ * Follows both forward (imports) and reverse (imported-by) edges.
225
+ *
226
+ * @param seedFiles - Files already identified as relevant
227
+ * @param graph - The import graph
228
+ * @param maxDepth - Maximum depth to follow (default: 2)
229
+ * @returns Scored files NOT in the seed set
230
+ */
231
+ export function scoreFromSeeds(
232
+ seedFiles: string[],
233
+ graph: ImportGraph,
234
+ maxDepth = 2
235
+ ): ImportScore[] {
236
+ const seedSet = new Set(seedFiles)
237
+ const visited = new Map<string, { score: number; depth: number }>()
238
+
239
+ // BFS from each seed
240
+ const queue: Array<{ file: string; depth: number }> = []
241
+
242
+ for (const seed of seedFiles) {
243
+ // Add direct neighbors at depth 1
244
+ const forwardEdges = graph.forward[seed] || []
245
+ const reverseEdges = graph.reverse[seed] || []
246
+
247
+ for (const neighbor of [...forwardEdges, ...reverseEdges]) {
248
+ if (!seedSet.has(neighbor)) {
249
+ queue.push({ file: neighbor, depth: 1 })
250
+ }
251
+ }
252
+ }
253
+
254
+ // Process queue
255
+ while (queue.length > 0) {
256
+ const { file, depth } = queue.shift()!
257
+ if (depth > maxDepth) continue
258
+
259
+ const score = 1 / (depth + 1)
260
+ const existing = visited.get(file)
261
+
262
+ if (existing) {
263
+ // Keep the better (higher) score
264
+ if (score > existing.score) {
265
+ visited.set(file, { score, depth })
266
+ }
267
+ continue
268
+ }
269
+
270
+ visited.set(file, { score, depth })
271
+
272
+ // Continue BFS for next level
273
+ if (depth < maxDepth) {
274
+ const forwardEdges = graph.forward[file] || []
275
+ const reverseEdges = graph.reverse[file] || []
276
+
277
+ for (const neighbor of [...forwardEdges, ...reverseEdges]) {
278
+ if (!seedSet.has(neighbor) && !visited.has(neighbor)) {
279
+ queue.push({ file: neighbor, depth: depth + 1 })
280
+ }
281
+ }
282
+ }
283
+ }
284
+
285
+ return Array.from(visited.entries())
286
+ .map(([p, { score, depth }]) => ({ path: p, score, depth }))
287
+ .sort((a, b) => b.score - a.score)
288
+ }
289
+
290
+ // =============================================================================
291
+ // SQLite Persistence
292
+ // =============================================================================
293
+
294
+ const INDEX_KEY = 'import-graph'
295
+
296
+ export function saveGraph(projectId: string, graph: ImportGraph): void {
297
+ prjctDb.setDoc(projectId, INDEX_KEY, graph)
298
+ }
299
+
300
+ export function loadGraph(projectId: string): ImportGraph | null {
301
+ return prjctDb.getDoc<ImportGraph>(projectId, INDEX_KEY)
302
+ }
303
+
304
+ // =============================================================================
305
+ // High-level API
306
+ // =============================================================================
307
+
308
+ /**
309
+ * Build and persist the import graph for a project.
310
+ */
311
+ export async function indexImports(projectPath: string, projectId: string): Promise<ImportGraph> {
312
+ const graph = await buildGraph(projectPath)
313
+ saveGraph(projectId, graph)
314
+ return graph
315
+ }
@@ -27,6 +27,9 @@ import {
27
27
  type ProjectContext,
28
28
  resolveToolIds,
29
29
  } from '../ai-tools'
30
+ import { indexProject } from '../domain/bm25'
31
+ import { indexCoChanges } from '../domain/git-cochange'
32
+ import { indexImports } from '../domain/import-graph'
30
33
  import { getErrorMessage } from '../errors'
31
34
  import commandInstaller from '../infrastructure/command-installer'
32
35
  import configManager from '../infrastructure/config-manager'
@@ -138,6 +141,20 @@ class SyncService {
138
141
  this.detectStack(),
139
142
  ])
140
143
 
144
+ // 3b. Build file-ranking indexes IN PARALLEL (BM25, import graph, co-change)
145
+ // These are independent and run after directory setup
146
+ try {
147
+ await Promise.all([
148
+ indexProject(this.projectPath, this.projectId!),
149
+ indexImports(this.projectPath, this.projectId!),
150
+ indexCoChanges(this.projectPath, this.projectId!),
151
+ ])
152
+ } catch (error) {
153
+ log.debug('File ranking index build failed (non-critical)', {
154
+ error: getErrorMessage(error),
155
+ })
156
+ }
157
+
141
158
  // 4. Generate all files (depends on gathered data)
142
159
  const agents = await this.generateAgents(stack, stats)
143
160
  const skills = this.configureSkills(agents)