spec-agent 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +256 -0
  2. package/bin/spec-agent.js +14 -0
  3. package/dist/commands/analyze.d.ts +16 -0
  4. package/dist/commands/analyze.d.ts.map +1 -0
  5. package/dist/commands/analyze.js +283 -0
  6. package/dist/commands/analyze.js.map +1 -0
  7. package/dist/commands/clean.d.ts +9 -0
  8. package/dist/commands/clean.d.ts.map +1 -0
  9. package/dist/commands/clean.js +109 -0
  10. package/dist/commands/clean.js.map +1 -0
  11. package/dist/commands/dispatch.d.ts +12 -0
  12. package/dist/commands/dispatch.d.ts.map +1 -0
  13. package/dist/commands/dispatch.js +232 -0
  14. package/dist/commands/dispatch.js.map +1 -0
  15. package/dist/commands/doctor.d.ts +9 -0
  16. package/dist/commands/doctor.d.ts.map +1 -0
  17. package/dist/commands/doctor.js +153 -0
  18. package/dist/commands/doctor.js.map +1 -0
  19. package/dist/commands/learn.d.ts +13 -0
  20. package/dist/commands/learn.d.ts.map +1 -0
  21. package/dist/commands/learn.js +234 -0
  22. package/dist/commands/learn.js.map +1 -0
  23. package/dist/commands/merge.d.ts +11 -0
  24. package/dist/commands/merge.d.ts.map +1 -0
  25. package/dist/commands/merge.js +335 -0
  26. package/dist/commands/merge.js.map +1 -0
  27. package/dist/commands/pipeline.d.ts +19 -0
  28. package/dist/commands/pipeline.d.ts.map +1 -0
  29. package/dist/commands/pipeline.js +266 -0
  30. package/dist/commands/pipeline.js.map +1 -0
  31. package/dist/commands/plan.d.ts +13 -0
  32. package/dist/commands/plan.d.ts.map +1 -0
  33. package/dist/commands/plan.js +314 -0
  34. package/dist/commands/plan.js.map +1 -0
  35. package/dist/commands/scan.d.ts +28 -0
  36. package/dist/commands/scan.d.ts.map +1 -0
  37. package/dist/commands/scan.js +488 -0
  38. package/dist/commands/scan.js.map +1 -0
  39. package/dist/commands/status.d.ts +8 -0
  40. package/dist/commands/status.d.ts.map +1 -0
  41. package/dist/commands/status.js +146 -0
  42. package/dist/commands/status.js.map +1 -0
  43. package/dist/index.d.ts +2 -0
  44. package/dist/index.d.ts.map +1 -0
  45. package/dist/index.js +126 -0
  46. package/dist/index.js.map +1 -0
  47. package/dist/services/document-parser.d.ts +49 -0
  48. package/dist/services/document-parser.d.ts.map +1 -0
  49. package/dist/services/document-parser.js +499 -0
  50. package/dist/services/document-parser.js.map +1 -0
  51. package/dist/services/llm.d.ts +61 -0
  52. package/dist/services/llm.d.ts.map +1 -0
  53. package/dist/services/llm.js +716 -0
  54. package/dist/services/llm.js.map +1 -0
  55. package/dist/types.d.ts +159 -0
  56. package/dist/types.d.ts.map +1 -0
  57. package/dist/types.js +4 -0
  58. package/dist/types.js.map +1 -0
  59. package/dist/utils/file.d.ts +10 -0
  60. package/dist/utils/file.d.ts.map +1 -0
  61. package/dist/utils/file.js +96 -0
  62. package/dist/utils/file.js.map +1 -0
  63. package/dist/utils/logger.d.ts +13 -0
  64. package/dist/utils/logger.d.ts.map +1 -0
  65. package/dist/utils/logger.js +55 -0
  66. package/dist/utils/logger.js.map +1 -0
  67. package/package.json +48 -0
  68. package/scripts/publish-npm.js +174 -0
  69. package/spec-agent-implementation.md +750 -0
  70. package/src/commands/analyze.ts +322 -0
  71. package/src/commands/clean.ts +88 -0
  72. package/src/commands/dispatch.ts +250 -0
  73. package/src/commands/doctor.ts +136 -0
  74. package/src/commands/learn.ts +261 -0
  75. package/src/commands/merge.ts +377 -0
  76. package/src/commands/pipeline.ts +306 -0
  77. package/src/commands/plan.ts +331 -0
  78. package/src/commands/scan.ts +568 -0
  79. package/src/commands/status.ts +129 -0
  80. package/src/index.ts +137 -0
  81. package/src/services/document-parser.ts +548 -0
  82. package/src/services/llm.ts +857 -0
  83. package/src/types.ts +161 -0
  84. package/src/utils/file.ts +60 -0
  85. package/src/utils/logger.ts +58 -0
  86. package/tsconfig.json +19 -0
@@ -0,0 +1,568 @@
1
+ import * as path from 'path';
2
+ import { Command } from 'commander';
3
+ import { Logger } from '../utils/logger';
4
+ import {
5
+ ensureDir,
6
+ fileExists,
7
+ getFileSize,
8
+ formatSize,
9
+ parseSize,
10
+ findFiles,
11
+ writeJson,
12
+ readFileContent
13
+ } from '../utils/file';
14
+ import { parseDocument, analyzeBase64Images } from '../services/document-parser';
15
+ import {
16
+ analyzeDocumentStructure,
17
+ splitByLLMStructure,
18
+ describeEmbeddedImages,
19
+ getLLMConfigForPurpose,
20
+ validateLLMConfig
21
+ } from '../services/llm';
22
+ import { Manifest, Chunk } from '../types';
23
+
24
+ interface ScanOptions {
25
+ input?: string;
26
+ stdin?: boolean;
27
+ output: string;
28
+ chunkSize: string;
29
+ format: string;
30
+ dryRun?: boolean;
31
+ yes?: boolean;
32
+ /**
33
+ * Minimum chunk size - chunks smaller than this will be merged with neighbors
34
+ * Default: 10KB
35
+ */
36
+ minChunkSize?: string;
37
+ /**
38
+ * Use LLM to analyze document structure for intelligent chunking
39
+ * Default: true
40
+ */
41
+ llmChunking?: boolean;
42
+ /**
43
+ * Fail immediately if LLM chunking fails (no fallback)
44
+ * Default: false
45
+ */
46
+ strictLlm?: boolean;
47
+ }
48
+
49
+ const SCAN_EXIT_CODE = {
50
+ INPUT_ERROR: 1,
51
+ CONFIG_ERROR: 2,
52
+ LLM_STRICT_ERROR: 3,
53
+ RUNTIME_ERROR: 10,
54
+ } as const;
55
+
56
+ export async function scanCommand(options: ScanOptions, command: Command): Promise<void> {
57
+ const logger = new Logger();
58
+
59
+ try {
60
+ // Validate input
61
+ if (!options.input && !options.stdin) {
62
+ logger.error('[E_SCAN_INPUT] --input is required (or use --stdin)');
63
+ logger.info(' spec-agent scan --input <path> --output <path>');
64
+ logger.info(" Run 'spec-agent scan --help' for details.");
65
+ process.exit(SCAN_EXIT_CODE.INPUT_ERROR);
66
+ }
67
+
68
+ let inputFiles: string[] = [];
69
+
70
+ if (options.stdin) {
71
+ // Read from stdin
72
+ const stdinContent = await readStdin();
73
+ inputFiles = stdinContent.split('\n').filter(f => f.trim());
74
+ } else if (options.input) {
75
+ const inputPath = path.resolve(options.input);
76
+
77
+ if (!(await fileExists(inputPath))) {
78
+ logger.error(`[E_SCAN_INPUT] Input path not found: ${options.input}`);
79
+ process.exit(SCAN_EXIT_CODE.INPUT_ERROR);
80
+ }
81
+
82
+ const stats = await ensureDir(inputPath).then(() =>
83
+ require('fs').promises.stat(inputPath)
84
+ ).catch(() => null);
85
+
86
+ if (!stats) {
87
+ logger.error(`[E_SCAN_INPUT] Cannot access path: ${options.input}`);
88
+ process.exit(SCAN_EXIT_CODE.INPUT_ERROR);
89
+ }
90
+
91
+ if (stats.isDirectory()) {
92
+ // Find all supported files in directory
93
+ const patterns = ['**/*.md', '**/*.pdf', '**/*.docx', '**/*.html', '**/*.txt'];
94
+ for (const pattern of patterns) {
95
+ const files = await findFiles(pattern, inputPath);
96
+ inputFiles.push(...files);
97
+ }
98
+ } else {
99
+ inputFiles = [inputPath];
100
+ }
101
+ }
102
+
103
+ if (inputFiles.length === 0) {
104
+ logger.error('[E_SCAN_INPUT] No input files found');
105
+ process.exit(SCAN_EXIT_CODE.INPUT_ERROR);
106
+ }
107
+
108
+ const chunkSizeBytes = parseSize(options.chunkSize);
109
+ const totalSize = (await Promise.all(
110
+ inputFiles.map(f => getFileSize(f))
111
+ )).reduce((sum, s) => sum + s, 0);
112
+
113
+ logger.info(`Found ${inputFiles.length} files, total size: ${formatSize(totalSize)}`);
114
+
115
+ // Parse documents and create content-based chunks
116
+ let rawChunks: Array<{ content: string; sourceFiles: string[]; title?: string }> = [];
117
+ const useLLMChunking = options.llmChunking !== false; // Default true
118
+ const llmConfig = useLLMChunking ? getLLMConfigForPurpose('scan') : null;
119
+ let llmChunkingSuccessFiles = 0;
120
+ let llmChunkingFallbackFiles = 0;
121
+ let imageAssetsDetected = 0;
122
+ let imageAssetsDescribed = 0;
123
+ if (useLLMChunking && llmConfig) {
124
+ // LLM chunking is enabled by default; fail fast so we don't silently degrade
125
+ // to regex-only behavior on complex documents.
126
+ try {
127
+ validateLLMConfig(llmConfig);
128
+ } catch (error) {
129
+ logger.error(`[E_SCAN_CONFIG] ${error instanceof Error ? error.message : String(error)}`);
130
+ process.exit(SCAN_EXIT_CODE.CONFIG_ERROR);
131
+ }
132
+ }
133
+
134
+ for (const filePath of inputFiles) {
135
+ logger.info(`Parsing ${path.basename(filePath)}...`);
136
+
137
+ try {
138
+ const parsed = await parseDocument(filePath);
139
+ let contentForChunking = parsed.content;
140
+ imageAssetsDetected += parsed.images?.length || 0;
141
+ let imageSummaries: Record<string, string> = {};
142
+
143
+ if (parsed.images && parsed.images.length > 0) {
144
+ logger.info(` 检测到 ${parsed.images.length} 张嵌入图片`);
145
+ if (llmConfig && llmConfig.apiKey) {
146
+ imageSummaries = await describeEmbeddedImages(parsed.images, llmConfig, logger);
147
+ imageAssetsDescribed += Object.keys(imageSummaries).length;
148
+ } else {
149
+ logger.warn(' 未配置 LLM,图片仅保留占位信息,不含语义摘要');
150
+ }
151
+ }
152
+
153
+ // Log if base64 images were found and removed
154
+ const base64Info = analyzeBase64Images(contentForChunking);
155
+ if (base64Info.count > 0) {
156
+ logger.info(` Remaining base64 refs: ${base64Info.count} (~${formatSize(base64Info.estimatedSize)})`);
157
+ }
158
+
159
+ // Try LLM-driven chunking first
160
+ let contentChunks: Array<{ content: string; title?: string }> = [];
161
+
162
+ if (useLLMChunking && llmConfig) {
163
+ try {
164
+ logger.info(` 使用 LLM 分析文档结构...`);
165
+ const structure = await analyzeDocumentStructure(contentForChunking, llmConfig, logger);
166
+
167
+ logger.info(` 识别到 ${structure.sections.length} 个章节,建议分组: ${structure.suggestedGroups.map(g => g.name).join(', ')}`);
168
+
169
+ const llmChunks = splitByLLMStructure(contentForChunking, structure, chunkSizeBytes);
170
+ contentChunks = llmChunks.map(c => ({ content: c.content, title: c.title }));
171
+ llmChunkingSuccessFiles++;
172
+
173
+ logger.info(` LLM 智能切分: ${contentChunks.length} 个 chunks`);
174
+ } catch (llmError) {
175
+ if (options.strictLlm) {
176
+ throw new Error(`[E_SCAN_LLM_STRICT] LLM 严格模式下切分失败: ${llmError instanceof Error ? llmError.message : String(llmError)}`);
177
+ }
178
+ logger.warn(` LLM 分析失败,回退到大小切分: ${llmError instanceof Error ? llmError.message : String(llmError)}`);
179
+ // Keep a safe fallback when LLM transiently fails. We avoid heading-regex
180
+ // fallback here because it is brittle on complex mixed-format documents.
181
+ const sizeChunks = forceSplitBySize(contentForChunking, chunkSizeBytes);
182
+ contentChunks = sizeChunks.map(c => ({ content: c }));
183
+ llmChunkingFallbackFiles++;
184
+ }
185
+ } else {
186
+ // Rule-based chunking
187
+ const ruleChunks = splitByStructure(contentForChunking, chunkSizeBytes);
188
+ contentChunks = ruleChunks.map(c => ({ content: c }));
189
+ }
190
+
191
+ for (const chunk of contentChunks) {
192
+ const enrichedContent = appendImageSummariesToChunk(chunk.content, imageSummaries);
193
+ rawChunks.push({
194
+ content: enrichedContent,
195
+ sourceFiles: [filePath],
196
+ title: chunk.title,
197
+ });
198
+ }
199
+ } catch (error) {
200
+ logger.warn(`Failed to parse ${path.basename(filePath)}, using fallback`);
201
+ rawChunks.push({
202
+ content: `[Error parsing file: ${path.basename(filePath)}]`,
203
+ sourceFiles: [filePath],
204
+ });
205
+ }
206
+ }
207
+
208
+ // Merge small chunks to reduce total count
209
+ const minChunkSizeBytes = options.minChunkSize ? parseSize(options.minChunkSize) : 10 * 1024; // Default 10KB
210
+ rawChunks = mergeSmallChunks(rawChunks, chunkSizeBytes, minChunkSizeBytes);
211
+
212
+ logger.info(`最终: ${rawChunks.length} 个 chunks (最大: ${options.chunkSize}, 最小合并: ${formatSize(minChunkSizeBytes)})`);
213
+ if (useLLMChunking) {
214
+ logger.info(`LLM 切分统计: 成功 ${llmChunkingSuccessFiles} 文件, 回退 ${llmChunkingFallbackFiles} 文件`);
215
+ }
216
+ if (imageAssetsDetected > 0) {
217
+ logger.info(`图片语义统计: 检测 ${imageAssetsDetected} 张, 已摘要 ${imageAssetsDescribed} 张`);
218
+ }
219
+
220
+ // Prepare chunks directory
221
+ const outputDir = path.dirname(path.resolve(options.output));
222
+ const chunksDir = path.join(outputDir, 'chunks');
223
+ await ensureDir(chunksDir);
224
+
225
+ // Write chunks to files
226
+ const chunks: Chunk[] = [];
227
+ for (let i = 0; i < rawChunks.length; i++) {
228
+ const rawChunk = rawChunks[i];
229
+ const chunkFileName = `chunk_${i}.txt`;
230
+ const chunkFilePath = path.join(chunksDir, chunkFileName);
231
+
232
+ await require('fs-extra').writeFile(chunkFilePath, rawChunk.content, 'utf-8');
233
+
234
+ chunks.push({
235
+ id: i,
236
+ sourceFiles: rawChunk.sourceFiles,
237
+ size: Buffer.byteLength(rawChunk.content, 'utf-8'),
238
+ contentPath: chunkFilePath,
239
+ });
240
+ }
241
+
242
+ // Preview mode
243
+ if (options.dryRun) {
244
+ logger.info('Dry run mode - manifest preview:');
245
+ for (const chunk of chunks.slice(0, 20)) {
246
+ const preview = chunk.content
247
+ ? `${path.basename(chunk.sourceFiles[0])} (${formatSize(chunk.size)})`
248
+ : `${chunk.sourceFiles.length} files, ${formatSize(chunk.size)}`;
249
+ logger.info(` Chunk ${chunk.id}: ${preview}`);
250
+ }
251
+ if (chunks.length > 20) {
252
+ logger.info(` ... and ${chunks.length - 20} more chunks`);
253
+ }
254
+ return;
255
+ }
256
+
257
+ // Create manifest
258
+ const manifest: Manifest = {
259
+ version: '1.0.0',
260
+ createdAt: new Date().toISOString(),
261
+ totalFiles: inputFiles.length,
262
+ totalSize: totalSize,
263
+ chunkSize: chunkSizeBytes,
264
+ chunks: chunks,
265
+ };
266
+
267
+ // Write manifest
268
+ const outputPath = path.resolve(options.output);
269
+ await ensureDir(path.dirname(outputPath));
270
+ await writeJson(outputPath, manifest);
271
+
272
+ logger.success(`Manifest created: ${outputPath}`);
273
+ logger.json({
274
+ status: 'success',
275
+ totalFiles: inputFiles.length,
276
+ totalSize: formatSize(totalSize),
277
+ chunks: chunks.length,
278
+ chunkSize: options.chunkSize,
279
+ llmChunkingEnabled: useLLMChunking,
280
+ llmChunkingSuccessFiles,
281
+ llmChunkingFallbackFiles,
282
+ imageAssetsDetected,
283
+ imageAssetsDescribed,
284
+ manifestPath: outputPath,
285
+ });
286
+
287
+ } catch (error) {
288
+ const message = error instanceof Error ? error.message : String(error);
289
+ logger.error(`Scan failed: ${message}`);
290
+ if (message.includes('[E_SCAN_LLM_STRICT]')) {
291
+ process.exit(SCAN_EXIT_CODE.LLM_STRICT_ERROR);
292
+ }
293
+ process.exit(SCAN_EXIT_CODE.RUNTIME_ERROR);
294
+ }
295
+ }
296
+
297
+ /**
298
+ * Split Markdown content by document structure (headers, sections)
299
+ * Optimized for Markdown format with hierarchical heading structure
300
+ */
301
+ function splitByStructure(content: string, maxSize: number): string[] {
302
+ // Parse document structure to identify sections
303
+ const sections = parseMarkdownStructure(content);
304
+
305
+ const chunks: string[] = [];
306
+ let currentChunk: string[] = [];
307
+ let currentSize = 0;
308
+
309
+ for (const section of sections) {
310
+ const sectionSize = Buffer.byteLength(section.content, 'utf-8');
311
+
312
+ // If a single section is larger than maxSize, we need to split it
313
+ if (sectionSize > maxSize) {
314
+ // First, flush current chunk if any
315
+ if (currentChunk.length > 0) {
316
+ chunks.push(currentChunk.join('\n\n'));
317
+ currentChunk = [];
318
+ currentSize = 0;
319
+ }
320
+
321
+ // Split the large section by paragraphs
322
+ const subChunks = splitLargeSection(section, maxSize);
323
+ chunks.push(...subChunks);
324
+ continue;
325
+ }
326
+
327
+ // Check if adding this section would exceed maxSize
328
+ if (currentSize + sectionSize > maxSize && currentChunk.length > 0) {
329
+ // Start a new chunk
330
+ chunks.push(currentChunk.join('\n\n'));
331
+ currentChunk = [section.content];
332
+ currentSize = sectionSize;
333
+ } else {
334
+ currentChunk.push(section.content);
335
+ currentSize += sectionSize;
336
+ }
337
+ }
338
+
339
+ // Don't forget the last chunk
340
+ if (currentChunk.length > 0) {
341
+ chunks.push(currentChunk.join('\n\n'));
342
+ }
343
+
344
+ return chunks.length > 0 ? chunks : [content];
345
+ }
346
+
347
+ /**
348
+ * Parse Markdown document into structured sections
349
+ * Each section starts with a heading and includes all content until the next heading
350
+ */
351
+ interface MarkdownSection {
352
+ level: number;
353
+ title: string;
354
+ content: string;
355
+ }
356
+
357
+ function parseMarkdownStructure(content: string): MarkdownSection[] {
358
+ const lines = content.split('\n');
359
+ const sections: MarkdownSection[] = [];
360
+
361
+ let currentSection: MarkdownSection | null = null;
362
+ let currentContent: string[] = [];
363
+
364
+ for (const line of lines) {
365
+ // Check if this line is a heading
366
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
367
+
368
+ if (headingMatch) {
369
+ // Save previous section
370
+ if (currentSection) {
371
+ currentSection.content = currentContent.join('\n').trim();
372
+ sections.push(currentSection);
373
+ } else if (currentContent.length > 0) {
374
+ // Content before first heading - treat as intro section
375
+ sections.push({
376
+ level: 0,
377
+ title: '',
378
+ content: currentContent.join('\n').trim(),
379
+ });
380
+ }
381
+
382
+ // Start new section
383
+ const level = headingMatch[1].length;
384
+ const title = headingMatch[2].trim();
385
+ currentSection = {
386
+ level,
387
+ title,
388
+ content: '',
389
+ };
390
+ currentContent = [line];
391
+ } else {
392
+ currentContent.push(line);
393
+ }
394
+ }
395
+
396
+ // Don't forget the last section
397
+ if (currentSection) {
398
+ currentSection.content = currentContent.join('\n').trim();
399
+ sections.push(currentSection);
400
+ } else if (currentContent.length > 0) {
401
+ sections.push({
402
+ level: 0,
403
+ title: '',
404
+ content: currentContent.join('\n').trim(),
405
+ });
406
+ }
407
+
408
+ return sections;
409
+ }
410
+
411
+ /**
412
+ * Split a large section by paragraphs while trying to preserve context
413
+ */
414
+ function splitLargeSection(section: MarkdownSection, maxSize: number): string[] {
415
+ const chunks: string[] = [];
416
+ const paragraphs = section.content.split(/\n\n+/);
417
+
418
+ let currentChunk: string[] = [];
419
+ let currentSize = 0;
420
+
421
+ // Include the heading in each chunk for context
422
+ const headingPrefix = section.level > 0 ? `${'#'.repeat(section.level)} ${section.title}\n\n` : '';
423
+ const prefixSize = Buffer.byteLength(headingPrefix, 'utf-8');
424
+
425
+ for (const paragraph of paragraphs) {
426
+ const paraSize = Buffer.byteLength(paragraph, 'utf-8');
427
+
428
+ if (currentSize + paraSize + prefixSize > maxSize && currentChunk.length > 0) {
429
+ // Start new chunk with heading prefix
430
+ chunks.push(headingPrefix + currentChunk.join('\n\n'));
431
+ currentChunk = [paragraph];
432
+ currentSize = paraSize;
433
+ } else {
434
+ currentChunk.push(paragraph);
435
+ currentSize += paraSize;
436
+ }
437
+ }
438
+
439
+ if (currentChunk.length > 0) {
440
+ chunks.push(headingPrefix + currentChunk.join('\n\n'));
441
+ }
442
+
443
+ return chunks;
444
+ }
445
+
446
+ /**
447
+ * Force split content by size only (no structure awareness)
448
+ */
449
+ function forceSplitBySize(content: string, maxSize: number): string[] {
450
+ const chunks: string[] = [];
451
+ let position = 0;
452
+
453
+ while (position < content.length) {
454
+ let end = Math.min(position + maxSize, content.length);
455
+
456
+ // Try to find a newline to split at
457
+ if (end < content.length) {
458
+ const nextNewline = content.indexOf('\n', end - 100);
459
+ if (nextNewline !== -1 && nextNewline < end + 100) {
460
+ end = nextNewline + 1;
461
+ }
462
+ }
463
+
464
+ chunks.push(content.slice(position, end));
465
+ position = end;
466
+ }
467
+
468
+ return chunks;
469
+ }
470
+
471
+ /**
472
+ * Merge small chunks with their neighbors to reduce total chunk count
473
+ * - Chunks smaller than minSize are candidates for merging
474
+ * - They will be merged with the next chunk if possible
475
+ * - If the next chunk would exceed maxSize, merge with previous instead
476
+ */
477
+ function mergeSmallChunks(
478
+ chunks: Array<{ content: string; sourceFiles: string[]; title?: string }>,
479
+ maxSize: number,
480
+ minSize: number
481
+ ): Array<{ content: string; sourceFiles: string[]; title?: string }> {
482
+ if (chunks.length <= 1) return chunks;
483
+
484
+ const merged: Array<{ content: string; sourceFiles: string[]; title?: string }> = [];
485
+ let current = chunks[0];
486
+
487
+ for (let i = 1; i < chunks.length; i++) {
488
+ const next = chunks[i];
489
+ const currentSize = Buffer.byteLength(current.content, 'utf-8');
490
+ const nextSize = Buffer.byteLength(next.content, 'utf-8');
491
+
492
+ // If current chunk is small, try to merge with next
493
+ if (currentSize < minSize) {
494
+ const combinedSize = currentSize + nextSize + 2; // +2 for separator
495
+
496
+ if (combinedSize <= maxSize) {
497
+ // Merge with next chunk
498
+ current.content = current.content + '\n\n' + next.content;
499
+ current.sourceFiles = [...new Set([...current.sourceFiles, ...next.sourceFiles])];
500
+ // Combine titles if both have them
501
+ if (current.title && next.title && current.title !== next.title) {
502
+ current.title = `${current.title} + ${next.title}`;
503
+ } else if (next.title && !current.title) {
504
+ current.title = next.title;
505
+ }
506
+ // Continue to next iteration - might merge more
507
+ } else {
508
+ // Can't merge with next, keep current as-is and move to next
509
+ merged.push(current);
510
+ current = next;
511
+ }
512
+ } else {
513
+ // Current chunk is big enough, keep it
514
+ merged.push(current);
515
+ current = next;
516
+ }
517
+ }
518
+
519
+ // Don't forget the last chunk
520
+ merged.push(current);
521
+
522
+ return merged;
523
+ }
524
+
525
+ function appendImageSummariesToChunk(content: string, imageSummaries: Record<string, string>): string {
526
+ if (!imageSummaries || Object.keys(imageSummaries).length === 0) {
527
+ return content;
528
+ }
529
+
530
+ const idMatches = content.match(/图片引用\s+((?:IMG|PDFIMG)\d{4})/g) || [];
531
+ const imageIds = Array.from(new Set(idMatches.map(item => item.replace(/.*\s+/, '').trim())));
532
+ if (imageIds.length === 0) {
533
+ return content;
534
+ }
535
+
536
+ const summaryLines: string[] = [];
537
+ for (const imageId of imageIds) {
538
+ const summary = imageSummaries[imageId];
539
+ if (summary) {
540
+ summaryLines.push(`- ${imageId}: ${summary}`);
541
+ }
542
+ }
543
+
544
+ if (summaryLines.length === 0) {
545
+ return content;
546
+ }
547
+
548
+ return `${content}\n\n### 图片语义补充\n${summaryLines.join('\n')}`;
549
+ }
550
+
551
+ function readStdin(): Promise<string> {
552
+ return new Promise((resolve) => {
553
+ let data = '';
554
+ process.stdin.setEncoding('utf8');
555
+ process.stdin.on('data', (chunk) => {
556
+ data += chunk;
557
+ });
558
+ process.stdin.on('end', () => {
559
+ resolve(data);
560
+ });
561
+ // If stdin is empty (not piped), resolve with empty string after short delay
562
+ setTimeout(() => {
563
+ if (data === '') {
564
+ resolve('');
565
+ }
566
+ }, 100);
567
+ });
568
+ }
@@ -0,0 +1,129 @@
1
+ import * as path from 'path';
2
+ import { Command } from 'commander';
3
+ import { Logger } from '../utils/logger';
4
+ import { fileExists, readJson } from '../utils/file';
5
+ import { WorkspaceStatus } from '../types';
6
+
7
+ interface StatusOptions {
8
+ workspace: string;
9
+ format: string;
10
+ }
11
+
12
+ const PHASES = ['scan', 'analyze', 'merge', 'plan', 'dispatch'] as const;
13
+ type Phase = typeof PHASES[number];
14
+
15
+ export async function statusCommand(options: StatusOptions, command: Command): Promise<void> {
16
+ const logger = new Logger();
17
+ const workspacePath = path.resolve(options.workspace);
18
+
19
+ try {
20
+ const status = await checkWorkspaceStatus(workspacePath);
21
+
22
+ if (options.format === 'json') {
23
+ logger.json(status);
24
+ return;
25
+ }
26
+
27
+ // Text output
28
+ logger.info(`Workspace: ${workspacePath}`);
29
+ logger.info('');
30
+ logger.info('Phases:');
31
+
32
+ for (const phase of PHASES) {
33
+ const phaseStatus = status.phases[phase];
34
+ const icon = phaseStatus.completed ? '✔' : '○';
35
+ let details = '';
36
+ if (phase === 'analyze' && 'chunksAnalyzed' in phaseStatus && phaseStatus.chunksAnalyzed !== undefined) {
37
+ details = `- ${phaseStatus.chunksAnalyzed} chunks analyzed`;
38
+ } else if ('output' in phaseStatus && phaseStatus.output) {
39
+ details = `- ${phaseStatus.output}`;
40
+ }
41
+ logger.info(` ${icon} ${phase.padEnd(10)} ${details}`);
42
+ }
43
+
44
+ if (status.canResumeFrom) {
45
+ logger.info('');
46
+ logger.info(`Can resume from: ${status.canResumeFrom}`);
47
+ logger.info(` spec-agent pipeline --from ${status.canResumeFrom} --output ${options.workspace}`);
48
+ }
49
+
50
+ } catch (error) {
51
+ logger.error(`Status check failed: ${error instanceof Error ? error.message : String(error)}`);
52
+ process.exit(1);
53
+ }
54
+ }
55
+
56
+ async function checkWorkspaceStatus(workspacePath: string): Promise<WorkspaceStatus> {
57
+ const status: WorkspaceStatus = {
58
+ workspace: workspacePath,
59
+ phases: {
60
+ scan: { completed: false },
61
+ analyze: { completed: false },
62
+ merge: { completed: false },
63
+ plan: { completed: false },
64
+ dispatch: { completed: false },
65
+ },
66
+ };
67
+
68
+ // Check each phase
69
+ for (const phase of PHASES) {
70
+ const doneFile = path.join(workspacePath, `.${phase}_done`);
71
+ const phaseOutput = getPhaseOutput(phase);
72
+ const outputPath = path.join(workspacePath, phaseOutput);
73
+ const phaseState = status.phases[phase];
74
+
75
+ if (await fileExists(doneFile)) {
76
+ phaseState.completed = true;
77
+ if (phase !== 'analyze') {
78
+ (phaseState as { completed: boolean; output?: string }).output = phaseOutput;
79
+ }
80
+
81
+ // Additional details for specific phases
82
+ if (phase === 'analyze') {
83
+ try {
84
+ const summariesDir = path.join(workspacePath, 'summaries');
85
+ const files = await import('../utils/file').then(m => m.findFiles('chunk_*_summary.json', summariesDir));
86
+ (phaseState as { completed: boolean; chunksAnalyzed?: number }).chunksAnalyzed = files.length;
87
+ } catch {
88
+ // Ignore errors
89
+ }
90
+ }
91
+ } else if (await fileExists(outputPath)) {
92
+ // Output exists but not marked done
93
+ phaseState.completed = true;
94
+ if (phase !== 'analyze') {
95
+ (phaseState as { completed: boolean; output?: string }).output = phaseOutput;
96
+ }
97
+ }
98
+ }
99
+
100
+ // Determine where we can resume from
101
+ for (let i = PHASES.length - 1; i >= 0; i--) {
102
+ const phase = PHASES[i];
103
+ if (status.phases[phase].completed) {
104
+ // Can resume from next phase
105
+ if (i < PHASES.length - 1) {
106
+ status.canResumeFrom = PHASES[i + 1];
107
+ }
108
+ break;
109
+ }
110
+ }
111
+
112
+ // If nothing done, can start from scan
113
+ if (!status.phases.scan.completed) {
114
+ status.canResumeFrom = 'scan';
115
+ }
116
+
117
+ return status;
118
+ }
119
+
120
+ function getPhaseOutput(phase: Phase): string {
121
+ const outputs: Record<Phase, string> = {
122
+ scan: 'manifest.json',
123
+ analyze: 'summaries/',
124
+ merge: 'spec_summary.json',
125
+ plan: 'task_plan.json',
126
+ dispatch: 'dispatch_plan.json',
127
+ };
128
+ return outputs[phase];
129
+ }