skill-any-code 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -0
- package/dist/cli.js +319 -0
- package/dist/index.js +22 -0
- package/jest.config.js +27 -0
- package/package.json +59 -0
- package/src/adapters/command.schemas.ts +21 -0
- package/src/application/analysis.app.service.ts +272 -0
- package/src/application/bootstrap.ts +35 -0
- package/src/application/services/llm.analysis.service.ts +237 -0
- package/src/cli.ts +297 -0
- package/src/common/config.ts +209 -0
- package/src/common/constants.ts +8 -0
- package/src/common/errors.ts +34 -0
- package/src/common/logger.ts +82 -0
- package/src/common/types.ts +385 -0
- package/src/common/ui.ts +228 -0
- package/src/common/utils.ts +81 -0
- package/src/domain/index.ts +1 -0
- package/src/domain/interfaces.ts +188 -0
- package/src/domain/services/analysis.service.ts +735 -0
- package/src/domain/services/incremental.service.ts +50 -0
- package/src/index.ts +6 -0
- package/src/infrastructure/blacklist.service.ts +37 -0
- package/src/infrastructure/cache/file.hash.cache.ts +119 -0
- package/src/infrastructure/git/git.service.ts +120 -0
- package/src/infrastructure/git.service.ts +121 -0
- package/src/infrastructure/index.service.ts +94 -0
- package/src/infrastructure/llm/llm.usage.tracker.ts +65 -0
- package/src/infrastructure/llm/openai.client.ts +162 -0
- package/src/infrastructure/llm/prompt.template.ts +175 -0
- package/src/infrastructure/llm.service.ts +70 -0
- package/src/infrastructure/skill/skill.generator.ts +53 -0
- package/src/infrastructure/skill/templates/resolve.script.ts +97 -0
- package/src/infrastructure/skill/templates/skill.md.template.ts +45 -0
- package/src/infrastructure/splitter/code.splitter.ts +176 -0
- package/src/infrastructure/storage.service.ts +413 -0
- package/src/infrastructure/worker-pool/parse.worker.impl.ts +135 -0
- package/src/infrastructure/worker-pool/parse.worker.ts +9 -0
- package/src/infrastructure/worker-pool/worker-pool.service.ts +173 -0
- package/tsconfig.json +24 -0
- package/tsconfig.test.json +5 -0
|
@@ -0,0 +1,735 @@
|
|
|
1
|
+
import * as fs from 'fs-extra'
|
|
2
|
+
import * as path from 'path'
|
|
3
|
+
import { createHash } from 'crypto'
|
|
4
|
+
import { IAnalysisService, IGitService, IStorageService, IBlacklistService } from '../interfaces'
|
|
5
|
+
import {
|
|
6
|
+
FullAnalysisParams,
|
|
7
|
+
IncrementalAnalysisParams,
|
|
8
|
+
ResumeAnalysisParams,
|
|
9
|
+
AnalysisParams,
|
|
10
|
+
AnalysisResult,
|
|
11
|
+
FileAnalysis,
|
|
12
|
+
DirectoryAnalysis,
|
|
13
|
+
LLMConfig,
|
|
14
|
+
AnalysisObject,
|
|
15
|
+
ObjectResultMeta,
|
|
16
|
+
TokenUsageStats,
|
|
17
|
+
} from '../../common/types'
|
|
18
|
+
import { AppError, ErrorCode } from '../../common/errors'
|
|
19
|
+
import { logger } from '../../common/logger'
|
|
20
|
+
import { getFileOutputPath, getDirOutputPath, mapLimit } from '../../common/utils'
|
|
21
|
+
import { OpenAIClient } from '../../infrastructure/llm/openai.client'
|
|
22
|
+
import { LLMUsageTracker } from '../../infrastructure/llm/llm.usage.tracker'
|
|
23
|
+
import { CodeSplitter } from '../../infrastructure/splitter/code.splitter'
|
|
24
|
+
import { FileHashCache } from '../../infrastructure/cache/file.hash.cache'
|
|
25
|
+
import { LLMAnalysisService } from '../../application/services/llm.analysis.service'
|
|
26
|
+
import { WorkerPoolService } from '../../infrastructure/worker-pool/worker-pool.service'
|
|
27
|
+
import os from 'os'
|
|
28
|
+
|
|
29
|
+
type DirNode = {
|
|
30
|
+
absPath: string
|
|
31
|
+
relPath: string
|
|
32
|
+
depth: number
|
|
33
|
+
childDirs: string[]
|
|
34
|
+
childFiles: string[]
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export class AnalysisService implements IAnalysisService {
|
|
38
|
+
private llmAnalysisService: LLMAnalysisService
|
|
39
|
+
|
|
40
|
+
private tracker: LLMUsageTracker
|
|
41
|
+
|
|
42
|
+
constructor(
|
|
43
|
+
private gitService: IGitService,
|
|
44
|
+
private storageService: IStorageService,
|
|
45
|
+
private blacklistService: IBlacklistService,
|
|
46
|
+
private projectSlug: string,
|
|
47
|
+
private currentCommit: string,
|
|
48
|
+
private llmConfig: LLMConfig,
|
|
49
|
+
private readonly onTokenUsageSnapshot?: (stats: TokenUsageStats) => void,
|
|
50
|
+
) {
|
|
51
|
+
this.tracker = new LLMUsageTracker(this.onTokenUsageSnapshot)
|
|
52
|
+
const llmClient = new OpenAIClient(llmConfig, this.tracker);
|
|
53
|
+
const fileSplitter = new CodeSplitter(llmClient);
|
|
54
|
+
const homeDir = os.homedir()
|
|
55
|
+
const resolvedCacheDir = llmConfig.cache_dir.replace(/^~(?=\/|\\|$)/, homeDir)
|
|
56
|
+
const cache = new FileHashCache({
|
|
57
|
+
cacheDir: resolvedCacheDir,
|
|
58
|
+
maxSizeMb: llmConfig.cache_max_size_mb,
|
|
59
|
+
})
|
|
60
|
+
this.llmAnalysisService = new LLMAnalysisService(llmClient, fileSplitter, cache, llmConfig);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
getTokenUsage() {
|
|
64
|
+
return this.tracker.getStats()
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* 统计将参与解析的对象总数(文件+目录),用于进度条 total。
|
|
69
|
+
* 与 fullAnalysis 使用相同的深度与黑名单规则。
|
|
70
|
+
*/
|
|
71
|
+
async countObjects(projectRoot: string, depth: number = -1): Promise<number> {
|
|
72
|
+
const rootStat = await fs.stat(projectRoot)
|
|
73
|
+
if (rootStat.isFile()) return 1
|
|
74
|
+
let count = 0
|
|
75
|
+
|
|
76
|
+
const walk = async (dirPath: string, currentDepth: number): Promise<boolean> => {
|
|
77
|
+
if (depth >= 1 && currentDepth > depth) {
|
|
78
|
+
return false
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const entries = await fs.readdir(dirPath, { withFileTypes: true })
|
|
82
|
+
const valid = entries.filter(entry => {
|
|
83
|
+
const fullPath = path.join(dirPath, entry.name)
|
|
84
|
+
const relativePath = path.relative(projectRoot, fullPath)
|
|
85
|
+
const key = entry.isDirectory() ? `${relativePath}/` : relativePath
|
|
86
|
+
return !this.blacklistService.isIgnored(key)
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
let hasContent = false
|
|
90
|
+
for (const entry of valid) {
|
|
91
|
+
if (entry.isFile()) {
|
|
92
|
+
count++
|
|
93
|
+
hasContent = true
|
|
94
|
+
continue
|
|
95
|
+
}
|
|
96
|
+
if (entry.isDirectory()) {
|
|
97
|
+
const childHas = await walk(path.join(dirPath, entry.name), currentDepth + 1)
|
|
98
|
+
if (childHas) {
|
|
99
|
+
hasContent = true
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (hasContent) {
|
|
105
|
+
count++
|
|
106
|
+
}
|
|
107
|
+
return hasContent
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
await walk(projectRoot, 1)
|
|
111
|
+
return count
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// ---------------------------------------------------------------------------
|
|
115
|
+
// 私有工具方法
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
|
|
118
|
+
private async getFileGitMeta(projectRoot: string, relPath: string) {
|
|
119
|
+
const hasGitMeta =
|
|
120
|
+
typeof (this.gitService as any).getFileLastCommit === 'function' &&
|
|
121
|
+
typeof (this.gitService as any).isFileDirty === 'function'
|
|
122
|
+
const fileGitCommitId = hasGitMeta
|
|
123
|
+
? await this.gitService.getFileLastCommit(projectRoot, relPath)
|
|
124
|
+
: null
|
|
125
|
+
const isDirty = hasGitMeta
|
|
126
|
+
? await this.gitService.isFileDirty(projectRoot, relPath)
|
|
127
|
+
: false
|
|
128
|
+
return { fileGitCommitId, isDirty }
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Phase 1:遍历目录树,构建完整的任务图。
|
|
133
|
+
* 全量和增量共享此扫描逻辑,黑名单和深度限制在此阶段统一应用。
|
|
134
|
+
*/
|
|
135
|
+
private async scanProjectTree(
|
|
136
|
+
projectRoot: string,
|
|
137
|
+
depth: number | undefined,
|
|
138
|
+
onScanProgress?: (scanned: number) => void,
|
|
139
|
+
) {
|
|
140
|
+
const depthEnabled = depth !== undefined && depth >= 1
|
|
141
|
+
const maxDepth = depthEnabled ? (depth as number) : Number.POSITIVE_INFINITY
|
|
142
|
+
|
|
143
|
+
const dirNodes = new Map<string, DirNode>()
|
|
144
|
+
const fileAbsByRel = new Map<string, string>()
|
|
145
|
+
let scannedObjectCount = 0
|
|
146
|
+
|
|
147
|
+
const rootRel = '.'
|
|
148
|
+
dirNodes.set(rootRel, {
|
|
149
|
+
absPath: projectRoot,
|
|
150
|
+
relPath: rootRel,
|
|
151
|
+
depth: 1,
|
|
152
|
+
childDirs: [],
|
|
153
|
+
childFiles: [],
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
const queue: Array<{ rel: string; abs: string; depth: number }> = [
|
|
157
|
+
{ rel: rootRel, abs: projectRoot, depth: 1 },
|
|
158
|
+
]
|
|
159
|
+
|
|
160
|
+
const scanConcurrency = Math.max(1, Math.min(8, os.cpus()?.length || 4))
|
|
161
|
+
|
|
162
|
+
const processDir = async (current: { rel: string; abs: string; depth: number }) => {
|
|
163
|
+
const node = dirNodes.get(current.rel)
|
|
164
|
+
if (!node) return
|
|
165
|
+
if (current.depth > maxDepth) return
|
|
166
|
+
|
|
167
|
+
const entries = await fs.readdir(current.abs, { withFileTypes: true })
|
|
168
|
+
const validEntries = entries
|
|
169
|
+
.filter(entry => {
|
|
170
|
+
const fullPath = path.join(current.abs, entry.name)
|
|
171
|
+
const relativePath = path.relative(projectRoot, fullPath)
|
|
172
|
+
const key = entry.isDirectory() ? `${relativePath}/` : relativePath
|
|
173
|
+
return !this.blacklistService.isIgnored(key)
|
|
174
|
+
})
|
|
175
|
+
.sort((a, b) => a.name.localeCompare(b.name))
|
|
176
|
+
|
|
177
|
+
for (const entry of validEntries) {
|
|
178
|
+
const fullPath = path.join(current.abs, entry.name)
|
|
179
|
+
const relPath = path.relative(projectRoot, fullPath) || entry.name
|
|
180
|
+
|
|
181
|
+
if (entry.isFile()) {
|
|
182
|
+
node.childFiles.push(relPath)
|
|
183
|
+
fileAbsByRel.set(relPath, fullPath)
|
|
184
|
+
scannedObjectCount++
|
|
185
|
+
if (scannedObjectCount % 10 === 0) onScanProgress?.(scannedObjectCount)
|
|
186
|
+
} else if (entry.isDirectory()) {
|
|
187
|
+
node.childDirs.push(relPath)
|
|
188
|
+
const childDepth = current.depth + 1
|
|
189
|
+
dirNodes.set(relPath, {
|
|
190
|
+
absPath: fullPath,
|
|
191
|
+
relPath: relPath,
|
|
192
|
+
depth: childDepth,
|
|
193
|
+
childDirs: [],
|
|
194
|
+
childFiles: [],
|
|
195
|
+
})
|
|
196
|
+
queue.push({ rel: relPath, abs: fullPath, depth: childDepth })
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const runScanQueue = async () => {
|
|
202
|
+
const workers = Array.from({ length: scanConcurrency }, async () => {
|
|
203
|
+
// eslint-disable-next-line no-constant-condition
|
|
204
|
+
while (true) {
|
|
205
|
+
const current = queue.shift()
|
|
206
|
+
if (!current) return
|
|
207
|
+
await processDir(current)
|
|
208
|
+
}
|
|
209
|
+
})
|
|
210
|
+
await Promise.all(workers)
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
await runScanQueue()
|
|
214
|
+
|
|
215
|
+
// 目录剪枝:自底向上移除空目录
|
|
216
|
+
const allScannedDirs = Array.from(dirNodes.values())
|
|
217
|
+
const scannedDirsByDepthDesc = allScannedDirs
|
|
218
|
+
.slice()
|
|
219
|
+
.sort((a, b) => {
|
|
220
|
+
if (b.depth !== a.depth) return b.depth - a.depth
|
|
221
|
+
return a.relPath.localeCompare(b.relPath)
|
|
222
|
+
})
|
|
223
|
+
|
|
224
|
+
const keptDirs = new Set<string>()
|
|
225
|
+
for (const d of scannedDirsByDepthDesc) {
|
|
226
|
+
const node = dirNodes.get(d.relPath)
|
|
227
|
+
if (!node) continue
|
|
228
|
+
node.childDirs = node.childDirs.filter(child => keptDirs.has(child))
|
|
229
|
+
const hasContent = node.childFiles.length > 0 || node.childDirs.length > 0
|
|
230
|
+
if (hasContent) {
|
|
231
|
+
keptDirs.add(d.relPath)
|
|
232
|
+
scannedObjectCount++
|
|
233
|
+
if (scannedObjectCount % 10 === 0) onScanProgress?.(scannedObjectCount)
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if (scannedObjectCount > 0 && scannedObjectCount % 10 !== 0) {
|
|
238
|
+
onScanProgress?.(scannedObjectCount)
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return { dirNodes, fileAbsByRel, keptDirs }
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* 增量专用:扫描存储目录,找出"有解析结果但对应源文件/目录已不存在"的孤立条目并清理。
|
|
246
|
+
*/
|
|
247
|
+
private async cleanOrphanedResults(
|
|
248
|
+
storageRoot: string,
|
|
249
|
+
projectRoot: string,
|
|
250
|
+
currentFileRels: Map<string, string>,
|
|
251
|
+
keptDirs: Set<string>,
|
|
252
|
+
removedSourcePaths: string[],
|
|
253
|
+
) {
|
|
254
|
+
logger.info('Scanning storage directory for orphaned result files...')
|
|
255
|
+
|
|
256
|
+
// 构建当前源码树的预期结果路径集合
|
|
257
|
+
const expectedResultPaths = new Set<string>()
|
|
258
|
+
for (const relPath of currentFileRels.keys()) {
|
|
259
|
+
expectedResultPaths.add(path.resolve(getFileOutputPath(storageRoot, relPath)))
|
|
260
|
+
}
|
|
261
|
+
for (const dirRel of keptDirs) {
|
|
262
|
+
expectedResultPaths.add(path.resolve(getDirOutputPath(storageRoot, dirRel)))
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
const orphaned: string[] = []
|
|
266
|
+
|
|
267
|
+
const walk = async (dirAbs: string) => {
|
|
268
|
+
if (!(await fs.pathExists(dirAbs))) return
|
|
269
|
+
const entries = await fs.readdir(dirAbs, { withFileTypes: true })
|
|
270
|
+
for (const entry of entries) {
|
|
271
|
+
const fullPath = path.join(dirAbs, entry.name)
|
|
272
|
+
if (entry.isDirectory()) {
|
|
273
|
+
if (entry.name.startsWith('.')) continue
|
|
274
|
+
await walk(fullPath)
|
|
275
|
+
} else if (entry.isFile() && entry.name.endsWith('.md')) {
|
|
276
|
+
if (!expectedResultPaths.has(path.resolve(fullPath))) {
|
|
277
|
+
orphaned.push(fullPath)
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
await walk(storageRoot)
|
|
284
|
+
|
|
285
|
+
if (orphaned.length > 0) {
|
|
286
|
+
logger.info(`Found ${orphaned.length} orphaned result file(s). Cleaning up...`)
|
|
287
|
+
for (const p of orphaned) {
|
|
288
|
+
try {
|
|
289
|
+
const content = await fs.readFile(p, 'utf-8')
|
|
290
|
+
const match = content.match(/(?:^|\n)-\s*(?:Path|路径)\s*[::]\s*(.+)\s*$/m)
|
|
291
|
+
const sourcePath = match?.[1]?.trim()
|
|
292
|
+
if (sourcePath) {
|
|
293
|
+
removedSourcePaths.push(path.resolve(projectRoot, sourcePath))
|
|
294
|
+
}
|
|
295
|
+
} catch { /* 无法读取则仅删除 */ }
|
|
296
|
+
await fs.remove(p)
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// 自底向上清理因删除 .md 而变为空的存储子目录
|
|
300
|
+
const resolvedStorageRoot = path.resolve(storageRoot)
|
|
301
|
+
const removeEmptyDirs = async (dirAbs: string): Promise<boolean> => {
|
|
302
|
+
if (!(await fs.pathExists(dirAbs))) return true
|
|
303
|
+
const entries = await fs.readdir(dirAbs, { withFileTypes: true })
|
|
304
|
+
for (const entry of entries) {
|
|
305
|
+
const fullPath = path.join(dirAbs, entry.name)
|
|
306
|
+
if (entry.isDirectory() && !entry.name.startsWith('.')) {
|
|
307
|
+
await removeEmptyDirs(fullPath)
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
// 重新读取:子目录可能刚被删除
|
|
311
|
+
const remaining = await fs.readdir(dirAbs)
|
|
312
|
+
if (remaining.length === 0 && path.resolve(dirAbs) !== resolvedStorageRoot) {
|
|
313
|
+
await fs.remove(dirAbs)
|
|
314
|
+
return true
|
|
315
|
+
}
|
|
316
|
+
return false
|
|
317
|
+
}
|
|
318
|
+
await removeEmptyDirs(storageRoot)
|
|
319
|
+
} else {
|
|
320
|
+
logger.info('No orphaned result files found')
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// ---------------------------------------------------------------------------
|
|
325
|
+
// 统一解析入口
|
|
326
|
+
// ---------------------------------------------------------------------------
|
|
327
|
+
|
|
328
|
+
async analyze(params: AnalysisParams): Promise<AnalysisResult> {
|
|
329
|
+
const startTime = Date.now()
|
|
330
|
+
const errors: Array<{ path: string; message: string }> = []
|
|
331
|
+
const completedFiles: string[] = []
|
|
332
|
+
const completedDirs: string[] = []
|
|
333
|
+
const indexEntries: Array<{ sourcePath: string; resultPath: string; type: 'file' | 'directory' }> = []
|
|
334
|
+
const removedSourcePaths: string[] = []
|
|
335
|
+
const storageRoot = this.storageService.getStoragePath(this.projectSlug)
|
|
336
|
+
|
|
337
|
+
// --- 单文件特殊处理 ---
|
|
338
|
+
const rootStat = await fs.stat(params.projectRoot)
|
|
339
|
+
if (rootStat.isFile()) {
|
|
340
|
+
try {
|
|
341
|
+
const content = await fs.readFile(params.projectRoot, 'utf-8')
|
|
342
|
+
const fileHash = createHash('sha256').update(content).digest('hex')
|
|
343
|
+
const parseResult = await this.llmAnalysisService.analyzeFile(params.projectRoot, content, fileHash)
|
|
344
|
+
|
|
345
|
+
const relativePath = path.basename(params.projectRoot)
|
|
346
|
+
const fileResult: FileAnalysis = {
|
|
347
|
+
...parseResult,
|
|
348
|
+
path: relativePath,
|
|
349
|
+
commitHash: params.commitHash,
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
await this.storageService.saveFileAnalysis(this.projectSlug, relativePath, fileResult)
|
|
353
|
+
completedFiles.push(relativePath)
|
|
354
|
+
params.onTotalKnown?.(1)
|
|
355
|
+
params.onObjectPlanned?.({ type: 'file', path: relativePath })
|
|
356
|
+
params.onObjectCompleted?.({ type: 'file', path: relativePath }, { status: 'parsed' })
|
|
357
|
+
|
|
358
|
+
const sourceAbsPath = path.resolve(params.projectRoot)
|
|
359
|
+
const resultAbsPath = path.resolve(storageRoot, getFileOutputPath(storageRoot, relativePath))
|
|
360
|
+
indexEntries.push({ sourcePath: sourceAbsPath, resultPath: resultAbsPath, type: 'file' })
|
|
361
|
+
} catch (e: unknown) {
|
|
362
|
+
errors.push({ path: params.projectRoot, message: (e as Error).message })
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
return {
|
|
366
|
+
success: errors.length === 0,
|
|
367
|
+
analyzedFilesCount: completedFiles.length,
|
|
368
|
+
analyzedDirsCount: 0,
|
|
369
|
+
duration: Date.now() - startTime,
|
|
370
|
+
errors,
|
|
371
|
+
projectSlug: this.projectSlug,
|
|
372
|
+
summaryPath: path.join(storageRoot, 'index.md'),
|
|
373
|
+
indexEntries,
|
|
374
|
+
removedSourcePaths: [],
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// ===================================================================
|
|
379
|
+
// Phase 1:统一遍历目录树
|
|
380
|
+
// ===================================================================
|
|
381
|
+
logger.debug('Phase 1: scanning directory tree...')
|
|
382
|
+
const { dirNodes, fileAbsByRel, keptDirs } = await this.scanProjectTree(
|
|
383
|
+
params.projectRoot,
|
|
384
|
+
params.depth,
|
|
385
|
+
params.onScanProgress,
|
|
386
|
+
)
|
|
387
|
+
logger.debug(`Scan completed: ${fileAbsByRel.size} file(s), ${keptDirs.size} non-empty directory(ies)`)
|
|
388
|
+
|
|
389
|
+
// 清理被剪枝(空)目录的残留结果文件
|
|
390
|
+
for (const d of dirNodes.values()) {
|
|
391
|
+
if (!keptDirs.has(d.relPath) && d.relPath !== '.') {
|
|
392
|
+
const out = getDirOutputPath(storageRoot, d.relPath)
|
|
393
|
+
if (await fs.pathExists(out)) {
|
|
394
|
+
await fs.remove(out)
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// ===================================================================
|
|
400
|
+
// Phase 2:应用文件过滤策略,构建文件任务队列
|
|
401
|
+
// ===================================================================
|
|
402
|
+
logger.debug(`Phase 2: applying file filter (mode=${params.mode})...`)
|
|
403
|
+
const includedFiles = new Set<string>()
|
|
404
|
+
const filterConcurrency = Math.max(1, Math.min(8, os.cpus()?.length || 4))
|
|
405
|
+
await mapLimit(Array.from(fileAbsByRel.entries()), filterConcurrency, async ([relPath, absPath]) => {
|
|
406
|
+
if (await params.fileFilter(relPath, absPath)) {
|
|
407
|
+
includedFiles.add(relPath)
|
|
408
|
+
}
|
|
409
|
+
})
|
|
410
|
+
logger.debug(`File filtering done: ${includedFiles.size}/${fileAbsByRel.size} file(s) queued`)
|
|
411
|
+
|
|
412
|
+
// ===================================================================
|
|
413
|
+
// Phase 2.5:增量模式 — 清理孤立的解析结果
|
|
414
|
+
// ===================================================================
|
|
415
|
+
if (params.mode === 'incremental') {
|
|
416
|
+
await this.cleanOrphanedResults(storageRoot, params.projectRoot, fileAbsByRel, keptDirs, removedSourcePaths)
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
// ===================================================================
|
|
420
|
+
// Phase 3:构建目录任务队列
|
|
421
|
+
// 规则:
|
|
422
|
+
// - 至少有 1 个子项(文件或子目录)在任务队列中 → 目录进入队列
|
|
423
|
+
// - 目录自身的结果 md 缺失 → 目录进入队列
|
|
424
|
+
// - 自底向上传播:底层文件变更会驱动整条祖先链重新聚合
|
|
425
|
+
// ===================================================================
|
|
426
|
+
logger.debug('Phase 3: building directory task queue...')
|
|
427
|
+
const includedDirs = new Set<string>()
|
|
428
|
+
const allKeptDirsSorted = Array.from(dirNodes.values())
|
|
429
|
+
.filter(d => keptDirs.has(d.relPath))
|
|
430
|
+
.sort((a, b) => {
|
|
431
|
+
if (b.depth !== a.depth) return b.depth - a.depth
|
|
432
|
+
return a.relPath.localeCompare(b.relPath)
|
|
433
|
+
})
|
|
434
|
+
|
|
435
|
+
for (const dir of allKeptDirsSorted) {
|
|
436
|
+
let shouldInclude =
|
|
437
|
+
dir.childFiles.some(f => includedFiles.has(f)) ||
|
|
438
|
+
dir.childDirs.some(d => includedDirs.has(d))
|
|
439
|
+
|
|
440
|
+
if (!shouldInclude) {
|
|
441
|
+
const dirMdPath = getDirOutputPath(storageRoot, dir.relPath)
|
|
442
|
+
shouldInclude = !(await fs.pathExists(dirMdPath))
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
if (shouldInclude) {
|
|
446
|
+
includedDirs.add(dir.relPath)
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
logger.debug(`Directory filtering done: ${includedDirs.size}/${keptDirs.size} directory(ies) queued`)
|
|
450
|
+
|
|
451
|
+
// ===================================================================
|
|
452
|
+
// Phase 4:排序 + 通知总数
|
|
453
|
+
// ===================================================================
|
|
454
|
+
const plannedFiles = Array.from(includedFiles).sort((a, b) => a.localeCompare(b))
|
|
455
|
+
const plannedDirs = allKeptDirsSorted
|
|
456
|
+
.filter(d => includedDirs.has(d.relPath))
|
|
457
|
+
.map(d => d.relPath)
|
|
458
|
+
|
|
459
|
+
const totalObjects = plannedFiles.length + plannedDirs.length
|
|
460
|
+
params.onTotalKnown?.(totalObjects)
|
|
461
|
+
|
|
462
|
+
for (const f of plannedFiles) {
|
|
463
|
+
params.onObjectPlanned?.({ type: 'file', path: f })
|
|
464
|
+
}
|
|
465
|
+
for (const d of plannedDirs) {
|
|
466
|
+
params.onObjectPlanned?.({ type: 'directory', path: d })
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
if (totalObjects === 0) {
|
|
470
|
+
logger.info('No objects to (re)analyze')
|
|
471
|
+
return {
|
|
472
|
+
success: true,
|
|
473
|
+
analyzedFilesCount: 0,
|
|
474
|
+
analyzedDirsCount: 0,
|
|
475
|
+
duration: Date.now() - startTime,
|
|
476
|
+
errors: [],
|
|
477
|
+
projectSlug: this.projectSlug,
|
|
478
|
+
summaryPath: path.join(storageRoot, 'index.md'),
|
|
479
|
+
indexEntries: [],
|
|
480
|
+
removedSourcePaths,
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// ===================================================================
|
|
485
|
+
// Phase 5:统一执行管线 — 文件解析 + 目录聚合
|
|
486
|
+
// ===================================================================
|
|
487
|
+
const workerPool = new WorkerPoolService(this.llmConfig, params.concurrency)
|
|
488
|
+
const fileResults = new Map<string, FileAnalysis>()
|
|
489
|
+
const dirResults = new Map<string, DirectoryAnalysis>()
|
|
490
|
+
|
|
491
|
+
try {
|
|
492
|
+
// --- 5a:文件解析 ---
|
|
493
|
+
await mapLimit(plannedFiles, params.concurrency, async (relPath) => {
|
|
494
|
+
const fileObj: AnalysisObject = { type: 'file', path: relPath }
|
|
495
|
+
try {
|
|
496
|
+
const absPath = fileAbsByRel.get(relPath)!
|
|
497
|
+
const content = await fs.readFile(absPath, 'utf-8')
|
|
498
|
+
const fileHash = createHash('sha256').update(content).digest('hex')
|
|
499
|
+
const { fileGitCommitId, isDirty } = await this.getFileGitMeta(params.projectRoot, relPath)
|
|
500
|
+
|
|
501
|
+
params.onObjectStarted?.(fileObj)
|
|
502
|
+
const workerRes: any = await workerPool.submitFileAnalysisTask(absPath, content, fileHash)
|
|
503
|
+
const parseResult: FileAnalysis = workerRes?.analysis ?? workerRes
|
|
504
|
+
if (workerRes?.usage) {
|
|
505
|
+
this.tracker.addTotals(workerRes.usage)
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
const fileResult: FileAnalysis = {
|
|
509
|
+
...parseResult,
|
|
510
|
+
path: relPath,
|
|
511
|
+
commitHash: params.commitHash,
|
|
512
|
+
fileGitCommitId: fileGitCommitId ?? undefined,
|
|
513
|
+
isDirtyWhenAnalyzed: isDirty,
|
|
514
|
+
fileHashWhenAnalyzed: fileHash,
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
await this.storageService.saveFileAnalysis(this.projectSlug, relPath, fileResult)
|
|
518
|
+
completedFiles.push(relPath)
|
|
519
|
+
fileResults.set(relPath, fileResult)
|
|
520
|
+
params.onObjectCompleted?.(fileObj, { status: 'parsed' })
|
|
521
|
+
|
|
522
|
+
const sourceAbsPath = path.resolve(params.projectRoot, relPath)
|
|
523
|
+
const resultAbsPath = path.resolve(storageRoot, getFileOutputPath(storageRoot, relPath))
|
|
524
|
+
indexEntries.push({ sourcePath: sourceAbsPath, resultPath: resultAbsPath, type: 'file' })
|
|
525
|
+
} catch (e: any) {
|
|
526
|
+
errors.push({ path: relPath, message: (e as Error).message })
|
|
527
|
+
params.onObjectCompleted?.(fileObj, { status: 'failed', reason: (e as Error).message })
|
|
528
|
+
}
|
|
529
|
+
})
|
|
530
|
+
|
|
531
|
+
// --- 5b:目录聚合(叶子优先,按深度分组) ---
|
|
532
|
+
const dirsByDepth = new Map<number, string[]>()
|
|
533
|
+
for (const dirRel of plannedDirs) {
|
|
534
|
+
const node = dirNodes.get(dirRel)
|
|
535
|
+
if (!node) continue
|
|
536
|
+
const arr = dirsByDepth.get(node.depth) ?? []
|
|
537
|
+
arr.push(dirRel)
|
|
538
|
+
dirsByDepth.set(node.depth, arr)
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
const sortedDepths = Array.from(dirsByDepth.keys()).sort((a, b) => b - a)
|
|
542
|
+
|
|
543
|
+
for (const depth of sortedDepths) {
|
|
544
|
+
const batch = dirsByDepth.get(depth)!
|
|
545
|
+
await mapLimit(batch, params.concurrency, async (dirRel) => {
|
|
546
|
+
const dirObj: AnalysisObject = { type: 'directory', path: dirRel }
|
|
547
|
+
const node = dirNodes.get(dirRel)
|
|
548
|
+
if (!node) {
|
|
549
|
+
params.onObjectCompleted?.(dirObj, { status: 'skipped', reason: 'dir node not found' })
|
|
550
|
+
return
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
// 收集子项结果:优先从内存 Map 读取(刚解析的),回退到存储层(未变更的)
|
|
554
|
+
const childResults: Array<FileAnalysis | DirectoryAnalysis> = []
|
|
555
|
+
for (const f of node.childFiles) {
|
|
556
|
+
const fr = fileResults.get(f)
|
|
557
|
+
if (fr) {
|
|
558
|
+
childResults.push(fr)
|
|
559
|
+
} else {
|
|
560
|
+
const stored = await this.storageService.getFileAnalysis(this.projectSlug, f, 'summary')
|
|
561
|
+
if (stored) childResults.push(stored)
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
for (const d of node.childDirs) {
|
|
565
|
+
const dr = dirResults.get(d)
|
|
566
|
+
if (dr) {
|
|
567
|
+
childResults.push(dr)
|
|
568
|
+
} else {
|
|
569
|
+
const stored = await this.storageService.getDirectoryAnalysis(this.projectSlug, d, 'summary')
|
|
570
|
+
if (stored) childResults.push(stored)
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
const fileChildren = childResults.filter(c => c.type === 'file') as FileAnalysis[]
|
|
575
|
+
const dirChildren = childResults.filter(c => c.type === 'directory') as DirectoryAnalysis[]
|
|
576
|
+
|
|
577
|
+
if (fileChildren.length === 0 && dirChildren.length === 0) {
|
|
578
|
+
const out = getDirOutputPath(storageRoot, dirRel)
|
|
579
|
+
if (await fs.pathExists(out)) {
|
|
580
|
+
await fs.remove(out)
|
|
581
|
+
}
|
|
582
|
+
params.onObjectCompleted?.(dirObj, { status: 'skipped', reason: 'empty directory' })
|
|
583
|
+
return
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
const childrenDirsPayload = dirChildren.map(d => ({
|
|
587
|
+
name: d.name,
|
|
588
|
+
summary: d.summary,
|
|
589
|
+
description: d.description,
|
|
590
|
+
}))
|
|
591
|
+
const childrenFilesPayload = fileChildren.map(f => ({
|
|
592
|
+
name: f.name,
|
|
593
|
+
summary: f.summary,
|
|
594
|
+
description: f.description ?? f.summary,
|
|
595
|
+
}))
|
|
596
|
+
|
|
597
|
+
params.onObjectStarted?.(dirObj)
|
|
598
|
+
|
|
599
|
+
let description = ''
|
|
600
|
+
let summary = ''
|
|
601
|
+
try {
|
|
602
|
+
const llmRes: any = await workerPool.submitDirectoryAggregationTask(node.absPath, {
|
|
603
|
+
childrenDirs: childrenDirsPayload,
|
|
604
|
+
childrenFiles: childrenFilesPayload,
|
|
605
|
+
})
|
|
606
|
+
if (llmRes?.usage) {
|
|
607
|
+
this.tracker.addTotals(llmRes.usage)
|
|
608
|
+
}
|
|
609
|
+
description = llmRes.description
|
|
610
|
+
summary = llmRes.summary
|
|
611
|
+
} catch (e: any) {
|
|
612
|
+
const dirName = path.basename(dirRel)
|
|
613
|
+
const fileCount = fileChildren.length
|
|
614
|
+
const dirCount = dirChildren.length
|
|
615
|
+
const fallback = `The "${dirName}" directory contains ${fileCount} file(s) and ${dirCount} subdirectory(ies) and helps organize related source code and modules.`
|
|
616
|
+
description = fallback
|
|
617
|
+
summary = fallback
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
const dirResult: DirectoryAnalysis = {
|
|
621
|
+
type: 'directory',
|
|
622
|
+
path: dirRel,
|
|
623
|
+
name: path.basename(dirRel),
|
|
624
|
+
description,
|
|
625
|
+
summary,
|
|
626
|
+
childrenDirsCount: dirChildren.length,
|
|
627
|
+
childrenFilesCount: fileChildren.length,
|
|
628
|
+
structure: childResults.map(child => ({
|
|
629
|
+
name: child.name,
|
|
630
|
+
type: child.type,
|
|
631
|
+
description: child.summary,
|
|
632
|
+
})),
|
|
633
|
+
lastAnalyzedAt: new Date().toISOString(),
|
|
634
|
+
commitHash: params.commitHash,
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
try {
|
|
638
|
+
await this.storageService.saveDirectoryAnalysis(this.projectSlug, dirRel, dirResult)
|
|
639
|
+
dirResults.set(dirRel, dirResult)
|
|
640
|
+
completedDirs.push(dirRel)
|
|
641
|
+
const dirSourceAbsPath = path.resolve(params.projectRoot, dirRel)
|
|
642
|
+
const dirResultAbsPath = path.resolve(storageRoot, getDirOutputPath(storageRoot, dirRel))
|
|
643
|
+
indexEntries.push({ sourcePath: dirSourceAbsPath, resultPath: dirResultAbsPath, type: 'directory' })
|
|
644
|
+
params.onObjectCompleted?.(dirObj, { status: 'parsed' })
|
|
645
|
+
} catch (e: any) {
|
|
646
|
+
const msg = (e as Error)?.message ?? String(e)
|
|
647
|
+
errors.push({ path: dirRel, message: msg })
|
|
648
|
+
params.onObjectCompleted?.(dirObj, { status: 'failed', reason: msg })
|
|
649
|
+
return
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
// 内存回收:子项结果在上层目录仅需 summary/description,可安全释放
|
|
653
|
+
for (const f of node.childFiles) {
|
|
654
|
+
fileResults.delete(f)
|
|
655
|
+
}
|
|
656
|
+
for (const d of node.childDirs) {
|
|
657
|
+
dirResults.delete(d)
|
|
658
|
+
}
|
|
659
|
+
})
|
|
660
|
+
}
|
|
661
|
+
} catch (e: any) {
|
|
662
|
+
if (typeof (workerPool as any).terminate === 'function') {
|
|
663
|
+
await (workerPool as any).terminate(true).catch(() => {})
|
|
664
|
+
} else {
|
|
665
|
+
workerPool.cancelAll()
|
|
666
|
+
}
|
|
667
|
+
throw e
|
|
668
|
+
} finally {
|
|
669
|
+
if (typeof (workerPool as any).terminate === 'function') {
|
|
670
|
+
await (workerPool as any).terminate(true).catch(() => {})
|
|
671
|
+
} else {
|
|
672
|
+
workerPool.cancelAll()
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
const duration = Date.now() - startTime
|
|
677
|
+
const summaryPath = path.join(storageRoot, 'index.md')
|
|
678
|
+
|
|
679
|
+
return {
|
|
680
|
+
success: errors.length === 0,
|
|
681
|
+
analyzedFilesCount: completedFiles.length,
|
|
682
|
+
analyzedDirsCount: completedDirs.length,
|
|
683
|
+
duration,
|
|
684
|
+
errors,
|
|
685
|
+
projectSlug: this.projectSlug,
|
|
686
|
+
summaryPath,
|
|
687
|
+
indexEntries,
|
|
688
|
+
removedSourcePaths,
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
// ---------------------------------------------------------------------------
|
|
693
|
+
// 向后兼容包装
|
|
694
|
+
// ---------------------------------------------------------------------------
|
|
695
|
+
|
|
696
|
+
async fullAnalysis(params: FullAnalysisParams): Promise<AnalysisResult> {
|
|
697
|
+
return this.analyze({
|
|
698
|
+
projectRoot: params.projectRoot,
|
|
699
|
+
depth: params.depth,
|
|
700
|
+
concurrency: params.concurrency,
|
|
701
|
+
mode: 'full',
|
|
702
|
+
commitHash: this.currentCommit,
|
|
703
|
+
fileFilter: async () => true,
|
|
704
|
+
onObjectPlanned: params.onObjectPlanned,
|
|
705
|
+
onObjectStarted: params.onObjectStarted,
|
|
706
|
+
onObjectCompleted: params.onObjectCompleted,
|
|
707
|
+
onScanProgress: params.onScanProgress,
|
|
708
|
+
})
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
async incrementalAnalysis(params: IncrementalAnalysisParams): Promise<AnalysisResult> {
|
|
712
|
+
const changedFilesSet = new Set(params.changedFiles || [])
|
|
713
|
+
const storageRoot = this.storageService.getStoragePath(this.projectSlug)
|
|
714
|
+
const fileFilter = async (relPath: string, _absPath: string): Promise<boolean> => {
|
|
715
|
+
if (changedFilesSet.has(relPath)) return true
|
|
716
|
+
const resultPath = getFileOutputPath(storageRoot, relPath)
|
|
717
|
+
return !(await fs.pathExists(resultPath))
|
|
718
|
+
}
|
|
719
|
+
return this.analyze({
|
|
720
|
+
projectRoot: params.projectRoot,
|
|
721
|
+
concurrency: params.concurrency,
|
|
722
|
+
mode: 'incremental',
|
|
723
|
+
commitHash: params.targetCommit,
|
|
724
|
+
fileFilter,
|
|
725
|
+
onObjectPlanned: params.onObjectPlanned,
|
|
726
|
+
onObjectStarted: params.onObjectStarted,
|
|
727
|
+
onObjectCompleted: params.onObjectCompleted,
|
|
728
|
+
onScanProgress: params.onScanProgress,
|
|
729
|
+
})
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
async resumeAnalysis(params: ResumeAnalysisParams): Promise<AnalysisResult> {
|
|
733
|
+
throw new AppError(ErrorCode.ANALYSIS_EXCEPTION, 'Resume/checkpoint feature is not implemented yet')
|
|
734
|
+
}
|
|
735
|
+
}
|