prjct-cli 0.44.1 → 0.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,584 @@
1
+ /**
2
+ * Files Tool - Find relevant files for a task
3
+ *
4
+ * Scoring algorithm:
5
+ * - 60% Keywords in path/filename
6
+ * - 20% Domain patterns (frontend/backend/etc)
7
+ * - 15% Git recency (recently modified files)
8
+ * - 5% Import distance (proximity to entry points)
9
+ *
10
+ * @module context-tools/files-tool
11
+ * @version 1.0.0
12
+ */
13
+
14
+ import fs from 'fs/promises'
15
+ import path from 'path'
16
+ import { exec as execCallback } from 'child_process'
17
+ import { promisify } from 'util'
18
+ import type { FilesToolOutput, ScoredFile, ScoreReason } from './types'
19
+ import { isNotFoundError } from '../types/fs'
20
+
21
+ const exec = promisify(execCallback)
22
+
23
+ // =============================================================================
24
+ // Domain Keywords
25
+ // =============================================================================
26
+
27
+ /**
28
+ * Domain keywords for classification
29
+ * Used to match file paths against domain patterns
30
+ */
31
+ const DOMAIN_KEYWORDS: Record<string, string[]> = {
32
+ frontend: [
33
+ 'component',
34
+ 'page',
35
+ 'view',
36
+ 'ui',
37
+ 'layout',
38
+ 'style',
39
+ 'css',
40
+ 'scss',
41
+ 'sass',
42
+ 'hook',
43
+ 'context',
44
+ 'store',
45
+ 'redux',
46
+ 'zustand',
47
+ 'react',
48
+ 'vue',
49
+ 'svelte',
50
+ 'angular',
51
+ 'next',
52
+ 'nuxt',
53
+ 'app',
54
+ 'client',
55
+ ],
56
+ backend: [
57
+ 'api',
58
+ 'route',
59
+ 'controller',
60
+ 'service',
61
+ 'middleware',
62
+ 'handler',
63
+ 'resolver',
64
+ 'schema',
65
+ 'model',
66
+ 'entity',
67
+ 'repository',
68
+ 'server',
69
+ 'socket',
70
+ 'graphql',
71
+ 'rest',
72
+ 'trpc',
73
+ ],
74
+ database: [
75
+ 'migration',
76
+ 'seed',
77
+ 'schema',
78
+ 'model',
79
+ 'entity',
80
+ 'repository',
81
+ 'prisma',
82
+ 'drizzle',
83
+ 'sequelize',
84
+ 'typeorm',
85
+ 'mongoose',
86
+ 'knex',
87
+ 'sql',
88
+ 'db',
89
+ ],
90
+ auth: [
91
+ 'auth',
92
+ 'login',
93
+ 'logout',
94
+ 'session',
95
+ 'token',
96
+ 'jwt',
97
+ 'oauth',
98
+ 'passport',
99
+ 'credential',
100
+ 'permission',
101
+ 'role',
102
+ 'user',
103
+ 'account',
104
+ ],
105
+ testing: [
106
+ 'test',
107
+ 'spec',
108
+ 'e2e',
109
+ 'integration',
110
+ 'unit',
111
+ 'mock',
112
+ 'fixture',
113
+ 'stub',
114
+ 'jest',
115
+ 'vitest',
116
+ 'cypress',
117
+ 'playwright',
118
+ ],
119
+ config: [
120
+ 'config',
121
+ 'env',
122
+ 'setting',
123
+ 'constant',
124
+ 'option',
125
+ 'tsconfig',
126
+ 'eslint',
127
+ 'prettier',
128
+ 'vite',
129
+ 'webpack',
130
+ 'rollup',
131
+ ],
132
+ infra: [
133
+ 'docker',
134
+ 'compose',
135
+ 'kubernetes',
136
+ 'k8s',
137
+ 'ci',
138
+ 'cd',
139
+ 'github',
140
+ 'gitlab',
141
+ 'jenkins',
142
+ 'terraform',
143
+ 'ansible',
144
+ 'deploy',
145
+ ],
146
+ util: ['util', 'helper', 'lib', 'common', 'shared', 'core', 'base', 'abstract'],
147
+ }
148
+
149
+ /**
150
+ * Common code file extensions
151
+ */
152
+ const CODE_EXTENSIONS = new Set([
153
+ '.ts',
154
+ '.tsx',
155
+ '.js',
156
+ '.jsx',
157
+ '.mjs',
158
+ '.cjs',
159
+ '.py',
160
+ '.go',
161
+ '.rs',
162
+ '.java',
163
+ '.kt',
164
+ '.swift',
165
+ '.rb',
166
+ '.php',
167
+ '.c',
168
+ '.cpp',
169
+ '.h',
170
+ '.hpp',
171
+ '.cs',
172
+ '.vue',
173
+ '.svelte',
174
+ ])
175
+
176
+ /**
177
+ * Directories to ignore
178
+ */
179
+ const IGNORE_DIRS = new Set([
180
+ 'node_modules',
181
+ '.git',
182
+ 'dist',
183
+ 'build',
184
+ '.next',
185
+ '.nuxt',
186
+ '.output',
187
+ 'coverage',
188
+ '.cache',
189
+ '__pycache__',
190
+ '.pytest_cache',
191
+ 'vendor',
192
+ 'target',
193
+ '.turbo',
194
+ '.vercel',
195
+ ])
196
+
197
+ // =============================================================================
198
+ // Main Function
199
+ // =============================================================================
200
+
201
+ /**
202
+ * Find files relevant to a task description
203
+ *
204
+ * @param taskDescription - Natural language description of the task
205
+ * @param projectPath - Path to the project root
206
+ * @param options - Configuration options
207
+ * @returns Scored files sorted by relevance
208
+ */
209
+ export async function findRelevantFiles(
210
+ taskDescription: string,
211
+ projectPath: string,
212
+ options: {
213
+ maxFiles?: number
214
+ minScore?: number
215
+ includeTests?: boolean
216
+ } = {}
217
+ ): Promise<FilesToolOutput> {
218
+ const startTime = Date.now()
219
+ const maxFiles = options.maxFiles ?? 30
220
+ const minScore = options.minScore ?? 0.1
221
+ const includeTests = options.includeTests ?? false
222
+
223
+ // Extract keywords from task description
224
+ const keywords = extractKeywords(taskDescription)
225
+
226
+ // Get all code files
227
+ const allFiles = await getAllCodeFiles(projectPath)
228
+
229
+ // Get git recency data
230
+ const gitRecency = await getGitRecency(projectPath)
231
+
232
+ // Score each file
233
+ const scoredFiles: ScoredFile[] = []
234
+
235
+ for (const filePath of allFiles) {
236
+ // Skip test files if not requested
237
+ if (!includeTests && isTestFile(filePath)) {
238
+ continue
239
+ }
240
+
241
+ const score = scoreFile(filePath, keywords, gitRecency)
242
+
243
+ if (score.score >= minScore) {
244
+ scoredFiles.push(score)
245
+ }
246
+ }
247
+
248
+ // Sort by score descending
249
+ scoredFiles.sort((a, b) => b.score - a.score)
250
+
251
+ // Limit results
252
+ const topFiles = scoredFiles.slice(0, maxFiles)
253
+
254
+ return {
255
+ files: topFiles,
256
+ metrics: {
257
+ filesScanned: allFiles.length,
258
+ filesReturned: topFiles.length,
259
+ scanDuration: Date.now() - startTime,
260
+ },
261
+ }
262
+ }
263
+
264
+ // =============================================================================
265
+ // Helper Functions
266
+ // =============================================================================
267
+
268
+ /**
269
+ * Extract keywords from task description
270
+ */
271
+ function extractKeywords(description: string): string[] {
272
+ // Convert to lowercase and split by non-word characters
273
+ const words = description.toLowerCase().split(/[^a-z0-9]+/).filter(Boolean)
274
+
275
+ // Remove common stop words
276
+ const stopWords = new Set([
277
+ 'a',
278
+ 'an',
279
+ 'the',
280
+ 'and',
281
+ 'or',
282
+ 'but',
283
+ 'is',
284
+ 'are',
285
+ 'was',
286
+ 'were',
287
+ 'be',
288
+ 'been',
289
+ 'being',
290
+ 'have',
291
+ 'has',
292
+ 'had',
293
+ 'do',
294
+ 'does',
295
+ 'did',
296
+ 'will',
297
+ 'would',
298
+ 'could',
299
+ 'should',
300
+ 'may',
301
+ 'might',
302
+ 'must',
303
+ 'shall',
304
+ 'can',
305
+ 'need',
306
+ 'to',
307
+ 'of',
308
+ 'in',
309
+ 'for',
310
+ 'on',
311
+ 'with',
312
+ 'at',
313
+ 'by',
314
+ 'from',
315
+ 'as',
316
+ 'into',
317
+ 'through',
318
+ 'during',
319
+ 'before',
320
+ 'after',
321
+ 'above',
322
+ 'below',
323
+ 'between',
324
+ 'under',
325
+ 'again',
326
+ 'further',
327
+ 'then',
328
+ 'once',
329
+ 'here',
330
+ 'there',
331
+ 'when',
332
+ 'where',
333
+ 'why',
334
+ 'how',
335
+ 'all',
336
+ 'each',
337
+ 'few',
338
+ 'more',
339
+ 'most',
340
+ 'other',
341
+ 'some',
342
+ 'such',
343
+ 'no',
344
+ 'nor',
345
+ 'not',
346
+ 'only',
347
+ 'own',
348
+ 'same',
349
+ 'so',
350
+ 'than',
351
+ 'too',
352
+ 'very',
353
+ 'just',
354
+ 'add',
355
+ 'create',
356
+ 'make',
357
+ 'implement',
358
+ 'fix',
359
+ 'update',
360
+ 'change',
361
+ 'modify',
362
+ 'remove',
363
+ 'delete',
364
+ 'new',
365
+ ])
366
+
367
+ return words.filter((w) => !stopWords.has(w) && w.length > 2)
368
+ }
369
+
370
+ /**
371
+ * Get all code files in the project
372
+ */
373
+ async function getAllCodeFiles(projectPath: string): Promise<string[]> {
374
+ const files: string[] = []
375
+
376
+ async function walk(dir: string, relativePath: string = ''): Promise<void> {
377
+ try {
378
+ const entries = await fs.readdir(dir, { withFileTypes: true })
379
+
380
+ for (const entry of entries) {
381
+ const fullPath = path.join(dir, entry.name)
382
+ const relPath = path.join(relativePath, entry.name)
383
+
384
+ if (entry.isDirectory()) {
385
+ // Skip ignored directories
386
+ if (IGNORE_DIRS.has(entry.name) || entry.name.startsWith('.')) {
387
+ continue
388
+ }
389
+ await walk(fullPath, relPath)
390
+ } else if (entry.isFile()) {
391
+ const ext = path.extname(entry.name).toLowerCase()
392
+ if (CODE_EXTENSIONS.has(ext)) {
393
+ files.push(relPath)
394
+ }
395
+ }
396
+ }
397
+ } catch (error) {
398
+ if (!isNotFoundError(error)) {
399
+ // Log but continue on permission errors, etc.
400
+ }
401
+ }
402
+ }
403
+
404
+ await walk(projectPath)
405
+ return files
406
+ }
407
+
408
+ /**
409
+ * Get git recency information
410
+ */
411
+ async function getGitRecency(
412
+ projectPath: string
413
+ ): Promise<Map<string, { commits: number; daysAgo: number }>> {
414
+ const recency = new Map<string, { commits: number; daysAgo: number }>()
415
+
416
+ try {
417
+ // Get files changed in last 30 commits with their commit counts
418
+ const { stdout } = await exec(
419
+ `git log -30 --pretty=format:"%H %ct" --name-only | awk '
420
+ /^[a-f0-9]{40}/ { commit=$1; timestamp=$2; next }
421
+ NF { files[$0]++; if (!lastmod[$0]) lastmod[$0]=timestamp }
422
+ END { for (f in files) print files[f], lastmod[f], f }
423
+ '`,
424
+ { cwd: projectPath, maxBuffer: 10 * 1024 * 1024 }
425
+ )
426
+
427
+ const now = Math.floor(Date.now() / 1000)
428
+ const lines = stdout.trim().split('\n').filter(Boolean)
429
+
430
+ for (const line of lines) {
431
+ const match = line.match(/^(\d+)\s+(\d+)\s+(.+)$/)
432
+ if (match) {
433
+ const commits = parseInt(match[1])
434
+ const timestamp = parseInt(match[2])
435
+ const file = match[3]
436
+ const daysAgo = Math.floor((now - timestamp) / 86400)
437
+ recency.set(file, { commits, daysAgo })
438
+ }
439
+ }
440
+ } catch (error) {
441
+ // Git not available or not a git repo
442
+ }
443
+
444
+ return recency
445
+ }
446
+
447
+ /**
448
+ * Score a file based on relevance
449
+ */
450
+ function scoreFile(
451
+ filePath: string,
452
+ keywords: string[],
453
+ gitRecency: Map<string, { commits: number; daysAgo: number }>
454
+ ): ScoredFile {
455
+ const reasons: ScoreReason[] = []
456
+ let keywordScore = 0
457
+ let domainScore = 0
458
+ let recencyScore = 0
459
+ let importScore = 0
460
+
461
+ const pathLower = filePath.toLowerCase()
462
+ const pathParts = pathLower.split('/').join(' ').split(/[^a-z0-9]+/)
463
+
464
+ // Keyword matching (60% weight)
465
+ for (const keyword of keywords) {
466
+ if (pathLower.includes(keyword)) {
467
+ keywordScore += 0.3
468
+ reasons.push(`keyword:${keyword}`)
469
+ }
470
+ // Partial match in path parts
471
+ for (const part of pathParts) {
472
+ if (part.includes(keyword) || keyword.includes(part)) {
473
+ keywordScore += 0.15
474
+ break
475
+ }
476
+ }
477
+ }
478
+ keywordScore = Math.min(1, keywordScore)
479
+
480
+ // Domain matching (20% weight)
481
+ for (const [domain, domainKeywords] of Object.entries(DOMAIN_KEYWORDS)) {
482
+ for (const domainKw of domainKeywords) {
483
+ if (pathLower.includes(domainKw)) {
484
+ // Check if any task keyword relates to this domain
485
+ const taskRelatesToDomain = keywords.some(
486
+ (k) =>
487
+ domainKeywords.includes(k) ||
488
+ k.includes(domain) ||
489
+ domain.includes(k)
490
+ )
491
+ if (taskRelatesToDomain) {
492
+ domainScore += 0.4
493
+ reasons.push(`domain:${domain}`)
494
+ break
495
+ }
496
+ }
497
+ }
498
+ }
499
+ domainScore = Math.min(1, domainScore)
500
+
501
+ // Git recency (15% weight)
502
+ const recencyData = gitRecency.get(filePath)
503
+ if (recencyData) {
504
+ // More recent = higher score
505
+ if (recencyData.daysAgo <= 1) {
506
+ recencyScore = 1.0
507
+ reasons.push('recent:1d')
508
+ } else if (recencyData.daysAgo <= 3) {
509
+ recencyScore = 0.8
510
+ reasons.push('recent:3d')
511
+ } else if (recencyData.daysAgo <= 7) {
512
+ recencyScore = 0.6
513
+ reasons.push('recent:1w')
514
+ } else if (recencyData.daysAgo <= 30) {
515
+ recencyScore = 0.3
516
+ reasons.push('recent:1m')
517
+ }
518
+
519
+ // Bonus for frequently changed files
520
+ if (recencyData.commits >= 5) {
521
+ recencyScore = Math.min(1, recencyScore + 0.2)
522
+ }
523
+ }
524
+
525
+ // Import distance - simplified heuristic (5% weight)
526
+ // Entry points (index, main, app) get bonus
527
+ const filename = path.basename(filePath).toLowerCase()
528
+ if (
529
+ filename.includes('index') ||
530
+ filename.includes('main') ||
531
+ filename.includes('app') ||
532
+ filename.includes('entry')
533
+ ) {
534
+ importScore = 0.5
535
+ reasons.push('import:0')
536
+ }
537
+ // Core/shared files get some bonus
538
+ if (
539
+ pathLower.includes('/core/') ||
540
+ pathLower.includes('/shared/') ||
541
+ pathLower.includes('/lib/')
542
+ ) {
543
+ importScore = Math.max(importScore, 0.3)
544
+ if (!reasons.some((r) => r.startsWith('import:'))) {
545
+ reasons.push('import:1')
546
+ }
547
+ }
548
+
549
+ // Calculate weighted score
550
+ const score =
551
+ keywordScore * 0.6 +
552
+ domainScore * 0.2 +
553
+ recencyScore * 0.15 +
554
+ importScore * 0.05
555
+
556
+ return {
557
+ path: filePath,
558
+ score: Math.min(1, score),
559
+ reasons: [...new Set(reasons)], // Dedupe
560
+ }
561
+ }
562
+
563
+ /**
564
+ * Check if a file is a test file
565
+ */
566
+ function isTestFile(filePath: string): boolean {
567
+ const lower = filePath.toLowerCase()
568
+ return (
569
+ lower.includes('.test.') ||
570
+ lower.includes('.spec.') ||
571
+ lower.includes('__tests__') ||
572
+ lower.includes('__mocks__') ||
573
+ lower.includes('/tests/') ||
574
+ lower.includes('/test/') ||
575
+ lower.endsWith('_test.go') ||
576
+ lower.endsWith('_test.py')
577
+ )
578
+ }
579
+
580
+ // =============================================================================
581
+ // Exports
582
+ // =============================================================================
583
+
584
+ export default { findRelevantFiles }