skill-any-code 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +48 -0
  2. package/dist/cli.js +319 -0
  3. package/dist/index.js +22 -0
  4. package/jest.config.js +27 -0
  5. package/package.json +59 -0
  6. package/src/adapters/command.schemas.ts +21 -0
  7. package/src/application/analysis.app.service.ts +272 -0
  8. package/src/application/bootstrap.ts +35 -0
  9. package/src/application/services/llm.analysis.service.ts +237 -0
  10. package/src/cli.ts +297 -0
  11. package/src/common/config.ts +209 -0
  12. package/src/common/constants.ts +8 -0
  13. package/src/common/errors.ts +34 -0
  14. package/src/common/logger.ts +82 -0
  15. package/src/common/types.ts +385 -0
  16. package/src/common/ui.ts +228 -0
  17. package/src/common/utils.ts +81 -0
  18. package/src/domain/index.ts +1 -0
  19. package/src/domain/interfaces.ts +188 -0
  20. package/src/domain/services/analysis.service.ts +735 -0
  21. package/src/domain/services/incremental.service.ts +50 -0
  22. package/src/index.ts +6 -0
  23. package/src/infrastructure/blacklist.service.ts +37 -0
  24. package/src/infrastructure/cache/file.hash.cache.ts +119 -0
  25. package/src/infrastructure/git/git.service.ts +120 -0
  26. package/src/infrastructure/git.service.ts +121 -0
  27. package/src/infrastructure/index.service.ts +94 -0
  28. package/src/infrastructure/llm/llm.usage.tracker.ts +65 -0
  29. package/src/infrastructure/llm/openai.client.ts +162 -0
  30. package/src/infrastructure/llm/prompt.template.ts +175 -0
  31. package/src/infrastructure/llm.service.ts +70 -0
  32. package/src/infrastructure/skill/skill.generator.ts +53 -0
  33. package/src/infrastructure/skill/templates/resolve.script.ts +97 -0
  34. package/src/infrastructure/skill/templates/skill.md.template.ts +45 -0
  35. package/src/infrastructure/splitter/code.splitter.ts +176 -0
  36. package/src/infrastructure/storage.service.ts +413 -0
  37. package/src/infrastructure/worker-pool/parse.worker.impl.ts +135 -0
  38. package/src/infrastructure/worker-pool/parse.worker.ts +9 -0
  39. package/src/infrastructure/worker-pool/worker-pool.service.ts +173 -0
  40. package/tsconfig.json +24 -0
  41. package/tsconfig.test.json +5 -0
@@ -0,0 +1,735 @@
1
+ import * as fs from 'fs-extra'
2
+ import * as path from 'path'
3
+ import { createHash } from 'crypto'
4
+ import { IAnalysisService, IGitService, IStorageService, IBlacklistService } from '../interfaces'
5
+ import {
6
+ FullAnalysisParams,
7
+ IncrementalAnalysisParams,
8
+ ResumeAnalysisParams,
9
+ AnalysisParams,
10
+ AnalysisResult,
11
+ FileAnalysis,
12
+ DirectoryAnalysis,
13
+ LLMConfig,
14
+ AnalysisObject,
15
+ ObjectResultMeta,
16
+ TokenUsageStats,
17
+ } from '../../common/types'
18
+ import { AppError, ErrorCode } from '../../common/errors'
19
+ import { logger } from '../../common/logger'
20
+ import { getFileOutputPath, getDirOutputPath, mapLimit } from '../../common/utils'
21
+ import { OpenAIClient } from '../../infrastructure/llm/openai.client'
22
+ import { LLMUsageTracker } from '../../infrastructure/llm/llm.usage.tracker'
23
+ import { CodeSplitter } from '../../infrastructure/splitter/code.splitter'
24
+ import { FileHashCache } from '../../infrastructure/cache/file.hash.cache'
25
+ import { LLMAnalysisService } from '../../application/services/llm.analysis.service'
26
+ import { WorkerPoolService } from '../../infrastructure/worker-pool/worker-pool.service'
27
+ import os from 'os'
28
+
29
+ type DirNode = {
30
+ absPath: string
31
+ relPath: string
32
+ depth: number
33
+ childDirs: string[]
34
+ childFiles: string[]
35
+ }
36
+
37
+ export class AnalysisService implements IAnalysisService {
38
+ private llmAnalysisService: LLMAnalysisService
39
+
40
+ private tracker: LLMUsageTracker
41
+
42
+ constructor(
43
+ private gitService: IGitService,
44
+ private storageService: IStorageService,
45
+ private blacklistService: IBlacklistService,
46
+ private projectSlug: string,
47
+ private currentCommit: string,
48
+ private llmConfig: LLMConfig,
49
+ private readonly onTokenUsageSnapshot?: (stats: TokenUsageStats) => void,
50
+ ) {
51
+ this.tracker = new LLMUsageTracker(this.onTokenUsageSnapshot)
52
+ const llmClient = new OpenAIClient(llmConfig, this.tracker);
53
+ const fileSplitter = new CodeSplitter(llmClient);
54
+ const homeDir = os.homedir()
55
+ const resolvedCacheDir = llmConfig.cache_dir.replace(/^~(?=\/|\\|$)/, homeDir)
56
+ const cache = new FileHashCache({
57
+ cacheDir: resolvedCacheDir,
58
+ maxSizeMb: llmConfig.cache_max_size_mb,
59
+ })
60
+ this.llmAnalysisService = new LLMAnalysisService(llmClient, fileSplitter, cache, llmConfig);
61
+ }
62
+
63
+ getTokenUsage() {
64
+ return this.tracker.getStats()
65
+ }
66
+
67
+ /**
68
+ * 统计将参与解析的对象总数(文件+目录),用于进度条 total。
69
+ * 与 fullAnalysis 使用相同的深度与黑名单规则。
70
+ */
71
+ async countObjects(projectRoot: string, depth: number = -1): Promise<number> {
72
+ const rootStat = await fs.stat(projectRoot)
73
+ if (rootStat.isFile()) return 1
74
+ let count = 0
75
+
76
+ const walk = async (dirPath: string, currentDepth: number): Promise<boolean> => {
77
+ if (depth >= 1 && currentDepth > depth) {
78
+ return false
79
+ }
80
+
81
+ const entries = await fs.readdir(dirPath, { withFileTypes: true })
82
+ const valid = entries.filter(entry => {
83
+ const fullPath = path.join(dirPath, entry.name)
84
+ const relativePath = path.relative(projectRoot, fullPath)
85
+ const key = entry.isDirectory() ? `${relativePath}/` : relativePath
86
+ return !this.blacklistService.isIgnored(key)
87
+ })
88
+
89
+ let hasContent = false
90
+ for (const entry of valid) {
91
+ if (entry.isFile()) {
92
+ count++
93
+ hasContent = true
94
+ continue
95
+ }
96
+ if (entry.isDirectory()) {
97
+ const childHas = await walk(path.join(dirPath, entry.name), currentDepth + 1)
98
+ if (childHas) {
99
+ hasContent = true
100
+ }
101
+ }
102
+ }
103
+
104
+ if (hasContent) {
105
+ count++
106
+ }
107
+ return hasContent
108
+ }
109
+
110
+ await walk(projectRoot, 1)
111
+ return count
112
+ }
113
+
114
+ // ---------------------------------------------------------------------------
115
+ // 私有工具方法
116
+ // ---------------------------------------------------------------------------
117
+
118
+ private async getFileGitMeta(projectRoot: string, relPath: string) {
119
+ const hasGitMeta =
120
+ typeof (this.gitService as any).getFileLastCommit === 'function' &&
121
+ typeof (this.gitService as any).isFileDirty === 'function'
122
+ const fileGitCommitId = hasGitMeta
123
+ ? await this.gitService.getFileLastCommit(projectRoot, relPath)
124
+ : null
125
+ const isDirty = hasGitMeta
126
+ ? await this.gitService.isFileDirty(projectRoot, relPath)
127
+ : false
128
+ return { fileGitCommitId, isDirty }
129
+ }
130
+
131
+ /**
132
+ * Phase 1:遍历目录树,构建完整的任务图。
133
+ * 全量和增量共享此扫描逻辑,黑名单和深度限制在此阶段统一应用。
134
+ */
135
+ private async scanProjectTree(
136
+ projectRoot: string,
137
+ depth: number | undefined,
138
+ onScanProgress?: (scanned: number) => void,
139
+ ) {
140
+ const depthEnabled = depth !== undefined && depth >= 1
141
+ const maxDepth = depthEnabled ? (depth as number) : Number.POSITIVE_INFINITY
142
+
143
+ const dirNodes = new Map<string, DirNode>()
144
+ const fileAbsByRel = new Map<string, string>()
145
+ let scannedObjectCount = 0
146
+
147
+ const rootRel = '.'
148
+ dirNodes.set(rootRel, {
149
+ absPath: projectRoot,
150
+ relPath: rootRel,
151
+ depth: 1,
152
+ childDirs: [],
153
+ childFiles: [],
154
+ })
155
+
156
+ const queue: Array<{ rel: string; abs: string; depth: number }> = [
157
+ { rel: rootRel, abs: projectRoot, depth: 1 },
158
+ ]
159
+
160
+ const scanConcurrency = Math.max(1, Math.min(8, os.cpus()?.length || 4))
161
+
162
+ const processDir = async (current: { rel: string; abs: string; depth: number }) => {
163
+ const node = dirNodes.get(current.rel)
164
+ if (!node) return
165
+ if (current.depth > maxDepth) return
166
+
167
+ const entries = await fs.readdir(current.abs, { withFileTypes: true })
168
+ const validEntries = entries
169
+ .filter(entry => {
170
+ const fullPath = path.join(current.abs, entry.name)
171
+ const relativePath = path.relative(projectRoot, fullPath)
172
+ const key = entry.isDirectory() ? `${relativePath}/` : relativePath
173
+ return !this.blacklistService.isIgnored(key)
174
+ })
175
+ .sort((a, b) => a.name.localeCompare(b.name))
176
+
177
+ for (const entry of validEntries) {
178
+ const fullPath = path.join(current.abs, entry.name)
179
+ const relPath = path.relative(projectRoot, fullPath) || entry.name
180
+
181
+ if (entry.isFile()) {
182
+ node.childFiles.push(relPath)
183
+ fileAbsByRel.set(relPath, fullPath)
184
+ scannedObjectCount++
185
+ if (scannedObjectCount % 10 === 0) onScanProgress?.(scannedObjectCount)
186
+ } else if (entry.isDirectory()) {
187
+ node.childDirs.push(relPath)
188
+ const childDepth = current.depth + 1
189
+ dirNodes.set(relPath, {
190
+ absPath: fullPath,
191
+ relPath: relPath,
192
+ depth: childDepth,
193
+ childDirs: [],
194
+ childFiles: [],
195
+ })
196
+ queue.push({ rel: relPath, abs: fullPath, depth: childDepth })
197
+ }
198
+ }
199
+ }
200
+
201
+ const runScanQueue = async () => {
202
+ const workers = Array.from({ length: scanConcurrency }, async () => {
203
+ // eslint-disable-next-line no-constant-condition
204
+ while (true) {
205
+ const current = queue.shift()
206
+ if (!current) return
207
+ await processDir(current)
208
+ }
209
+ })
210
+ await Promise.all(workers)
211
+ }
212
+
213
+ await runScanQueue()
214
+
215
+ // 目录剪枝:自底向上移除空目录
216
+ const allScannedDirs = Array.from(dirNodes.values())
217
+ const scannedDirsByDepthDesc = allScannedDirs
218
+ .slice()
219
+ .sort((a, b) => {
220
+ if (b.depth !== a.depth) return b.depth - a.depth
221
+ return a.relPath.localeCompare(b.relPath)
222
+ })
223
+
224
+ const keptDirs = new Set<string>()
225
+ for (const d of scannedDirsByDepthDesc) {
226
+ const node = dirNodes.get(d.relPath)
227
+ if (!node) continue
228
+ node.childDirs = node.childDirs.filter(child => keptDirs.has(child))
229
+ const hasContent = node.childFiles.length > 0 || node.childDirs.length > 0
230
+ if (hasContent) {
231
+ keptDirs.add(d.relPath)
232
+ scannedObjectCount++
233
+ if (scannedObjectCount % 10 === 0) onScanProgress?.(scannedObjectCount)
234
+ }
235
+ }
236
+
237
+ if (scannedObjectCount > 0 && scannedObjectCount % 10 !== 0) {
238
+ onScanProgress?.(scannedObjectCount)
239
+ }
240
+
241
+ return { dirNodes, fileAbsByRel, keptDirs }
242
+ }
243
+
244
+ /**
245
+ * 增量专用:扫描存储目录,找出"有解析结果但对应源文件/目录已不存在"的孤立条目并清理。
246
+ */
247
+ private async cleanOrphanedResults(
248
+ storageRoot: string,
249
+ projectRoot: string,
250
+ currentFileRels: Map<string, string>,
251
+ keptDirs: Set<string>,
252
+ removedSourcePaths: string[],
253
+ ) {
254
+ logger.info('Scanning storage directory for orphaned result files...')
255
+
256
+ // 构建当前源码树的预期结果路径集合
257
+ const expectedResultPaths = new Set<string>()
258
+ for (const relPath of currentFileRels.keys()) {
259
+ expectedResultPaths.add(path.resolve(getFileOutputPath(storageRoot, relPath)))
260
+ }
261
+ for (const dirRel of keptDirs) {
262
+ expectedResultPaths.add(path.resolve(getDirOutputPath(storageRoot, dirRel)))
263
+ }
264
+
265
+ const orphaned: string[] = []
266
+
267
+ const walk = async (dirAbs: string) => {
268
+ if (!(await fs.pathExists(dirAbs))) return
269
+ const entries = await fs.readdir(dirAbs, { withFileTypes: true })
270
+ for (const entry of entries) {
271
+ const fullPath = path.join(dirAbs, entry.name)
272
+ if (entry.isDirectory()) {
273
+ if (entry.name.startsWith('.')) continue
274
+ await walk(fullPath)
275
+ } else if (entry.isFile() && entry.name.endsWith('.md')) {
276
+ if (!expectedResultPaths.has(path.resolve(fullPath))) {
277
+ orphaned.push(fullPath)
278
+ }
279
+ }
280
+ }
281
+ }
282
+
283
+ await walk(storageRoot)
284
+
285
+ if (orphaned.length > 0) {
286
+ logger.info(`Found ${orphaned.length} orphaned result file(s). Cleaning up...`)
287
+ for (const p of orphaned) {
288
+ try {
289
+ const content = await fs.readFile(p, 'utf-8')
290
+ const match = content.match(/(?:^|\n)-\s*(?:Path|路径)\s*[::]\s*(.+)\s*$/m)
291
+ const sourcePath = match?.[1]?.trim()
292
+ if (sourcePath) {
293
+ removedSourcePaths.push(path.resolve(projectRoot, sourcePath))
294
+ }
295
+ } catch { /* 无法读取则仅删除 */ }
296
+ await fs.remove(p)
297
+ }
298
+
299
+ // 自底向上清理因删除 .md 而变为空的存储子目录
300
+ const resolvedStorageRoot = path.resolve(storageRoot)
301
+ const removeEmptyDirs = async (dirAbs: string): Promise<boolean> => {
302
+ if (!(await fs.pathExists(dirAbs))) return true
303
+ const entries = await fs.readdir(dirAbs, { withFileTypes: true })
304
+ for (const entry of entries) {
305
+ const fullPath = path.join(dirAbs, entry.name)
306
+ if (entry.isDirectory() && !entry.name.startsWith('.')) {
307
+ await removeEmptyDirs(fullPath)
308
+ }
309
+ }
310
+ // 重新读取:子目录可能刚被删除
311
+ const remaining = await fs.readdir(dirAbs)
312
+ if (remaining.length === 0 && path.resolve(dirAbs) !== resolvedStorageRoot) {
313
+ await fs.remove(dirAbs)
314
+ return true
315
+ }
316
+ return false
317
+ }
318
+ await removeEmptyDirs(storageRoot)
319
+ } else {
320
+ logger.info('No orphaned result files found')
321
+ }
322
+ }
323
+
324
+ // ---------------------------------------------------------------------------
325
+ // 统一解析入口
326
+ // ---------------------------------------------------------------------------
327
+
328
+ async analyze(params: AnalysisParams): Promise<AnalysisResult> {
329
+ const startTime = Date.now()
330
+ const errors: Array<{ path: string; message: string }> = []
331
+ const completedFiles: string[] = []
332
+ const completedDirs: string[] = []
333
+ const indexEntries: Array<{ sourcePath: string; resultPath: string; type: 'file' | 'directory' }> = []
334
+ const removedSourcePaths: string[] = []
335
+ const storageRoot = this.storageService.getStoragePath(this.projectSlug)
336
+
337
+ // --- 单文件特殊处理 ---
338
+ const rootStat = await fs.stat(params.projectRoot)
339
+ if (rootStat.isFile()) {
340
+ try {
341
+ const content = await fs.readFile(params.projectRoot, 'utf-8')
342
+ const fileHash = createHash('sha256').update(content).digest('hex')
343
+ const parseResult = await this.llmAnalysisService.analyzeFile(params.projectRoot, content, fileHash)
344
+
345
+ const relativePath = path.basename(params.projectRoot)
346
+ const fileResult: FileAnalysis = {
347
+ ...parseResult,
348
+ path: relativePath,
349
+ commitHash: params.commitHash,
350
+ }
351
+
352
+ await this.storageService.saveFileAnalysis(this.projectSlug, relativePath, fileResult)
353
+ completedFiles.push(relativePath)
354
+ params.onTotalKnown?.(1)
355
+ params.onObjectPlanned?.({ type: 'file', path: relativePath })
356
+ params.onObjectCompleted?.({ type: 'file', path: relativePath }, { status: 'parsed' })
357
+
358
+ const sourceAbsPath = path.resolve(params.projectRoot)
359
+ const resultAbsPath = path.resolve(storageRoot, getFileOutputPath(storageRoot, relativePath))
360
+ indexEntries.push({ sourcePath: sourceAbsPath, resultPath: resultAbsPath, type: 'file' })
361
+ } catch (e: unknown) {
362
+ errors.push({ path: params.projectRoot, message: (e as Error).message })
363
+ }
364
+
365
+ return {
366
+ success: errors.length === 0,
367
+ analyzedFilesCount: completedFiles.length,
368
+ analyzedDirsCount: 0,
369
+ duration: Date.now() - startTime,
370
+ errors,
371
+ projectSlug: this.projectSlug,
372
+ summaryPath: path.join(storageRoot, 'index.md'),
373
+ indexEntries,
374
+ removedSourcePaths: [],
375
+ }
376
+ }
377
+
378
+ // ===================================================================
379
+ // Phase 1:统一遍历目录树
380
+ // ===================================================================
381
+ logger.debug('Phase 1: scanning directory tree...')
382
+ const { dirNodes, fileAbsByRel, keptDirs } = await this.scanProjectTree(
383
+ params.projectRoot,
384
+ params.depth,
385
+ params.onScanProgress,
386
+ )
387
+ logger.debug(`Scan completed: ${fileAbsByRel.size} file(s), ${keptDirs.size} non-empty directory(ies)`)
388
+
389
+ // 清理被剪枝(空)目录的残留结果文件
390
+ for (const d of dirNodes.values()) {
391
+ if (!keptDirs.has(d.relPath) && d.relPath !== '.') {
392
+ const out = getDirOutputPath(storageRoot, d.relPath)
393
+ if (await fs.pathExists(out)) {
394
+ await fs.remove(out)
395
+ }
396
+ }
397
+ }
398
+
399
+ // ===================================================================
400
+ // Phase 2:应用文件过滤策略,构建文件任务队列
401
+ // ===================================================================
402
+ logger.debug(`Phase 2: applying file filter (mode=${params.mode})...`)
403
+ const includedFiles = new Set<string>()
404
+ const filterConcurrency = Math.max(1, Math.min(8, os.cpus()?.length || 4))
405
+ await mapLimit(Array.from(fileAbsByRel.entries()), filterConcurrency, async ([relPath, absPath]) => {
406
+ if (await params.fileFilter(relPath, absPath)) {
407
+ includedFiles.add(relPath)
408
+ }
409
+ })
410
+ logger.debug(`File filtering done: ${includedFiles.size}/${fileAbsByRel.size} file(s) queued`)
411
+
412
+ // ===================================================================
413
+ // Phase 2.5:增量模式 — 清理孤立的解析结果
414
+ // ===================================================================
415
+ if (params.mode === 'incremental') {
416
+ await this.cleanOrphanedResults(storageRoot, params.projectRoot, fileAbsByRel, keptDirs, removedSourcePaths)
417
+ }
418
+
419
+ // ===================================================================
420
+ // Phase 3:构建目录任务队列
421
+ // 规则:
422
+ // - 至少有 1 个子项(文件或子目录)在任务队列中 → 目录进入队列
423
+ // - 目录自身的结果 md 缺失 → 目录进入队列
424
+ // - 自底向上传播:底层文件变更会驱动整条祖先链重新聚合
425
+ // ===================================================================
426
+ logger.debug('Phase 3: building directory task queue...')
427
+ const includedDirs = new Set<string>()
428
+ const allKeptDirsSorted = Array.from(dirNodes.values())
429
+ .filter(d => keptDirs.has(d.relPath))
430
+ .sort((a, b) => {
431
+ if (b.depth !== a.depth) return b.depth - a.depth
432
+ return a.relPath.localeCompare(b.relPath)
433
+ })
434
+
435
+ for (const dir of allKeptDirsSorted) {
436
+ let shouldInclude =
437
+ dir.childFiles.some(f => includedFiles.has(f)) ||
438
+ dir.childDirs.some(d => includedDirs.has(d))
439
+
440
+ if (!shouldInclude) {
441
+ const dirMdPath = getDirOutputPath(storageRoot, dir.relPath)
442
+ shouldInclude = !(await fs.pathExists(dirMdPath))
443
+ }
444
+
445
+ if (shouldInclude) {
446
+ includedDirs.add(dir.relPath)
447
+ }
448
+ }
449
+ logger.debug(`Directory filtering done: ${includedDirs.size}/${keptDirs.size} directory(ies) queued`)
450
+
451
+ // ===================================================================
452
+ // Phase 4:排序 + 通知总数
453
+ // ===================================================================
454
+ const plannedFiles = Array.from(includedFiles).sort((a, b) => a.localeCompare(b))
455
+ const plannedDirs = allKeptDirsSorted
456
+ .filter(d => includedDirs.has(d.relPath))
457
+ .map(d => d.relPath)
458
+
459
+ const totalObjects = plannedFiles.length + plannedDirs.length
460
+ params.onTotalKnown?.(totalObjects)
461
+
462
+ for (const f of plannedFiles) {
463
+ params.onObjectPlanned?.({ type: 'file', path: f })
464
+ }
465
+ for (const d of plannedDirs) {
466
+ params.onObjectPlanned?.({ type: 'directory', path: d })
467
+ }
468
+
469
+ if (totalObjects === 0) {
470
+ logger.info('No objects to (re)analyze')
471
+ return {
472
+ success: true,
473
+ analyzedFilesCount: 0,
474
+ analyzedDirsCount: 0,
475
+ duration: Date.now() - startTime,
476
+ errors: [],
477
+ projectSlug: this.projectSlug,
478
+ summaryPath: path.join(storageRoot, 'index.md'),
479
+ indexEntries: [],
480
+ removedSourcePaths,
481
+ }
482
+ }
483
+
484
+ // ===================================================================
485
+ // Phase 5:统一执行管线 — 文件解析 + 目录聚合
486
+ // ===================================================================
487
+ const workerPool = new WorkerPoolService(this.llmConfig, params.concurrency)
488
+ const fileResults = new Map<string, FileAnalysis>()
489
+ const dirResults = new Map<string, DirectoryAnalysis>()
490
+
491
+ try {
492
+ // --- 5a:文件解析 ---
493
+ await mapLimit(plannedFiles, params.concurrency, async (relPath) => {
494
+ const fileObj: AnalysisObject = { type: 'file', path: relPath }
495
+ try {
496
+ const absPath = fileAbsByRel.get(relPath)!
497
+ const content = await fs.readFile(absPath, 'utf-8')
498
+ const fileHash = createHash('sha256').update(content).digest('hex')
499
+ const { fileGitCommitId, isDirty } = await this.getFileGitMeta(params.projectRoot, relPath)
500
+
501
+ params.onObjectStarted?.(fileObj)
502
+ const workerRes: any = await workerPool.submitFileAnalysisTask(absPath, content, fileHash)
503
+ const parseResult: FileAnalysis = workerRes?.analysis ?? workerRes
504
+ if (workerRes?.usage) {
505
+ this.tracker.addTotals(workerRes.usage)
506
+ }
507
+
508
+ const fileResult: FileAnalysis = {
509
+ ...parseResult,
510
+ path: relPath,
511
+ commitHash: params.commitHash,
512
+ fileGitCommitId: fileGitCommitId ?? undefined,
513
+ isDirtyWhenAnalyzed: isDirty,
514
+ fileHashWhenAnalyzed: fileHash,
515
+ }
516
+
517
+ await this.storageService.saveFileAnalysis(this.projectSlug, relPath, fileResult)
518
+ completedFiles.push(relPath)
519
+ fileResults.set(relPath, fileResult)
520
+ params.onObjectCompleted?.(fileObj, { status: 'parsed' })
521
+
522
+ const sourceAbsPath = path.resolve(params.projectRoot, relPath)
523
+ const resultAbsPath = path.resolve(storageRoot, getFileOutputPath(storageRoot, relPath))
524
+ indexEntries.push({ sourcePath: sourceAbsPath, resultPath: resultAbsPath, type: 'file' })
525
+ } catch (e: any) {
526
+ errors.push({ path: relPath, message: (e as Error).message })
527
+ params.onObjectCompleted?.(fileObj, { status: 'failed', reason: (e as Error).message })
528
+ }
529
+ })
530
+
531
+ // --- 5b:目录聚合(叶子优先,按深度分组) ---
532
+ const dirsByDepth = new Map<number, string[]>()
533
+ for (const dirRel of plannedDirs) {
534
+ const node = dirNodes.get(dirRel)
535
+ if (!node) continue
536
+ const arr = dirsByDepth.get(node.depth) ?? []
537
+ arr.push(dirRel)
538
+ dirsByDepth.set(node.depth, arr)
539
+ }
540
+
541
+ const sortedDepths = Array.from(dirsByDepth.keys()).sort((a, b) => b - a)
542
+
543
+ for (const depth of sortedDepths) {
544
+ const batch = dirsByDepth.get(depth)!
545
+ await mapLimit(batch, params.concurrency, async (dirRel) => {
546
+ const dirObj: AnalysisObject = { type: 'directory', path: dirRel }
547
+ const node = dirNodes.get(dirRel)
548
+ if (!node) {
549
+ params.onObjectCompleted?.(dirObj, { status: 'skipped', reason: 'dir node not found' })
550
+ return
551
+ }
552
+
553
+ // 收集子项结果:优先从内存 Map 读取(刚解析的),回退到存储层(未变更的)
554
+ const childResults: Array<FileAnalysis | DirectoryAnalysis> = []
555
+ for (const f of node.childFiles) {
556
+ const fr = fileResults.get(f)
557
+ if (fr) {
558
+ childResults.push(fr)
559
+ } else {
560
+ const stored = await this.storageService.getFileAnalysis(this.projectSlug, f, 'summary')
561
+ if (stored) childResults.push(stored)
562
+ }
563
+ }
564
+ for (const d of node.childDirs) {
565
+ const dr = dirResults.get(d)
566
+ if (dr) {
567
+ childResults.push(dr)
568
+ } else {
569
+ const stored = await this.storageService.getDirectoryAnalysis(this.projectSlug, d, 'summary')
570
+ if (stored) childResults.push(stored)
571
+ }
572
+ }
573
+
574
+ const fileChildren = childResults.filter(c => c.type === 'file') as FileAnalysis[]
575
+ const dirChildren = childResults.filter(c => c.type === 'directory') as DirectoryAnalysis[]
576
+
577
+ if (fileChildren.length === 0 && dirChildren.length === 0) {
578
+ const out = getDirOutputPath(storageRoot, dirRel)
579
+ if (await fs.pathExists(out)) {
580
+ await fs.remove(out)
581
+ }
582
+ params.onObjectCompleted?.(dirObj, { status: 'skipped', reason: 'empty directory' })
583
+ return
584
+ }
585
+
586
+ const childrenDirsPayload = dirChildren.map(d => ({
587
+ name: d.name,
588
+ summary: d.summary,
589
+ description: d.description,
590
+ }))
591
+ const childrenFilesPayload = fileChildren.map(f => ({
592
+ name: f.name,
593
+ summary: f.summary,
594
+ description: f.description ?? f.summary,
595
+ }))
596
+
597
+ params.onObjectStarted?.(dirObj)
598
+
599
+ let description = ''
600
+ let summary = ''
601
+ try {
602
+ const llmRes: any = await workerPool.submitDirectoryAggregationTask(node.absPath, {
603
+ childrenDirs: childrenDirsPayload,
604
+ childrenFiles: childrenFilesPayload,
605
+ })
606
+ if (llmRes?.usage) {
607
+ this.tracker.addTotals(llmRes.usage)
608
+ }
609
+ description = llmRes.description
610
+ summary = llmRes.summary
611
+ } catch (e: any) {
612
+ const dirName = path.basename(dirRel)
613
+ const fileCount = fileChildren.length
614
+ const dirCount = dirChildren.length
615
+ const fallback = `The "${dirName}" directory contains ${fileCount} file(s) and ${dirCount} subdirectory(ies) and helps organize related source code and modules.`
616
+ description = fallback
617
+ summary = fallback
618
+ }
619
+
620
+ const dirResult: DirectoryAnalysis = {
621
+ type: 'directory',
622
+ path: dirRel,
623
+ name: path.basename(dirRel),
624
+ description,
625
+ summary,
626
+ childrenDirsCount: dirChildren.length,
627
+ childrenFilesCount: fileChildren.length,
628
+ structure: childResults.map(child => ({
629
+ name: child.name,
630
+ type: child.type,
631
+ description: child.summary,
632
+ })),
633
+ lastAnalyzedAt: new Date().toISOString(),
634
+ commitHash: params.commitHash,
635
+ }
636
+
637
+ try {
638
+ await this.storageService.saveDirectoryAnalysis(this.projectSlug, dirRel, dirResult)
639
+ dirResults.set(dirRel, dirResult)
640
+ completedDirs.push(dirRel)
641
+ const dirSourceAbsPath = path.resolve(params.projectRoot, dirRel)
642
+ const dirResultAbsPath = path.resolve(storageRoot, getDirOutputPath(storageRoot, dirRel))
643
+ indexEntries.push({ sourcePath: dirSourceAbsPath, resultPath: dirResultAbsPath, type: 'directory' })
644
+ params.onObjectCompleted?.(dirObj, { status: 'parsed' })
645
+ } catch (e: any) {
646
+ const msg = (e as Error)?.message ?? String(e)
647
+ errors.push({ path: dirRel, message: msg })
648
+ params.onObjectCompleted?.(dirObj, { status: 'failed', reason: msg })
649
+ return
650
+ }
651
+
652
+ // 内存回收:子项结果在上层目录仅需 summary/description,可安全释放
653
+ for (const f of node.childFiles) {
654
+ fileResults.delete(f)
655
+ }
656
+ for (const d of node.childDirs) {
657
+ dirResults.delete(d)
658
+ }
659
+ })
660
+ }
661
+ } catch (e: any) {
662
+ if (typeof (workerPool as any).terminate === 'function') {
663
+ await (workerPool as any).terminate(true).catch(() => {})
664
+ } else {
665
+ workerPool.cancelAll()
666
+ }
667
+ throw e
668
+ } finally {
669
+ if (typeof (workerPool as any).terminate === 'function') {
670
+ await (workerPool as any).terminate(true).catch(() => {})
671
+ } else {
672
+ workerPool.cancelAll()
673
+ }
674
+ }
675
+
676
+ const duration = Date.now() - startTime
677
+ const summaryPath = path.join(storageRoot, 'index.md')
678
+
679
+ return {
680
+ success: errors.length === 0,
681
+ analyzedFilesCount: completedFiles.length,
682
+ analyzedDirsCount: completedDirs.length,
683
+ duration,
684
+ errors,
685
+ projectSlug: this.projectSlug,
686
+ summaryPath,
687
+ indexEntries,
688
+ removedSourcePaths,
689
+ }
690
+ }
691
+
692
+ // ---------------------------------------------------------------------------
693
+ // 向后兼容包装
694
+ // ---------------------------------------------------------------------------
695
+
696
+ async fullAnalysis(params: FullAnalysisParams): Promise<AnalysisResult> {
697
+ return this.analyze({
698
+ projectRoot: params.projectRoot,
699
+ depth: params.depth,
700
+ concurrency: params.concurrency,
701
+ mode: 'full',
702
+ commitHash: this.currentCommit,
703
+ fileFilter: async () => true,
704
+ onObjectPlanned: params.onObjectPlanned,
705
+ onObjectStarted: params.onObjectStarted,
706
+ onObjectCompleted: params.onObjectCompleted,
707
+ onScanProgress: params.onScanProgress,
708
+ })
709
+ }
710
+
711
+ async incrementalAnalysis(params: IncrementalAnalysisParams): Promise<AnalysisResult> {
712
+ const changedFilesSet = new Set(params.changedFiles || [])
713
+ const storageRoot = this.storageService.getStoragePath(this.projectSlug)
714
+ const fileFilter = async (relPath: string, _absPath: string): Promise<boolean> => {
715
+ if (changedFilesSet.has(relPath)) return true
716
+ const resultPath = getFileOutputPath(storageRoot, relPath)
717
+ return !(await fs.pathExists(resultPath))
718
+ }
719
+ return this.analyze({
720
+ projectRoot: params.projectRoot,
721
+ concurrency: params.concurrency,
722
+ mode: 'incremental',
723
+ commitHash: params.targetCommit,
724
+ fileFilter,
725
+ onObjectPlanned: params.onObjectPlanned,
726
+ onObjectStarted: params.onObjectStarted,
727
+ onObjectCompleted: params.onObjectCompleted,
728
+ onScanProgress: params.onScanProgress,
729
+ })
730
+ }
731
+
732
+ async resumeAnalysis(params: ResumeAnalysisParams): Promise<AnalysisResult> {
733
+ throw new AppError(ErrorCode.ANALYSIS_EXCEPTION, 'Resume/checkpoint feature is not implemented yet')
734
+ }
735
+ }