spec-agent 2.0.5 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import * as path from 'path';
2
+ import * as fs from 'fs-extra';
2
3
  import { Command } from 'commander';
3
4
  import { Logger } from '../utils/logger';
4
5
  import {
@@ -8,8 +9,7 @@ import {
8
9
  formatSize,
9
10
  parseSize,
10
11
  findFiles,
11
- writeJson,
12
- readFileContent
12
+ writeJson
13
13
  } from '../utils/file';
14
14
  import { parseDocument, analyzeBase64Images } from '../services/document-parser';
15
15
  import {
@@ -19,7 +19,7 @@ import {
19
19
  getLLMConfigForPurpose,
20
20
  validateLLMConfig
21
21
  } from '../services/llm';
22
- import { Manifest, Chunk } from '../types';
22
+ import { Manifest, Chunk, PrototypeAsset } from '../types';
23
23
 
24
24
  interface ScanOptions {
25
25
  input?: string;
@@ -53,6 +53,16 @@ const SCAN_EXIT_CODE = {
53
53
  RUNTIME_ERROR: 10,
54
54
  } as const;
55
55
 
56
+ interface PrototypeAssetPending {
57
+ id: string;
58
+ sourceFile: string;
59
+ alt: string;
60
+ mimeType: string;
61
+ estimatedSize: number;
62
+ dataUri: string;
63
+ summary?: string;
64
+ }
65
+
56
66
  export async function scanCommand(options: ScanOptions, command: Command): Promise<void> {
57
67
  const logger = new Logger();
58
68
 
@@ -118,6 +128,7 @@ export async function scanCommand(options: ScanOptions, command: Command): Promi
118
128
  let llmChunkingFallbackFiles = 0;
119
129
  let imageAssetsDetected = 0;
120
130
  let imageAssetsDescribed = 0;
131
+ const pendingPrototypeAssets: PrototypeAssetPending[] = [];
121
132
  if (useLLMChunking && llmConfig) {
122
133
  // LLM chunking is enabled by default; fail fast so we don't silently degrade
123
134
  // to regex-only behavior on complex documents.
@@ -129,23 +140,44 @@ export async function scanCommand(options: ScanOptions, command: Command): Promi
129
140
  }
130
141
  }
131
142
 
132
- for (const filePath of inputFiles) {
143
+ for (let fileIdx = 0; fileIdx < inputFiles.length; fileIdx++) {
144
+ const filePath = inputFiles[fileIdx];
133
145
  logger.info(`Parsing ${path.basename(filePath)}...`);
134
146
 
135
147
  try {
136
148
  const parsed = await parseDocument(filePath);
137
149
  let contentForChunking = parsed.content;
138
- imageAssetsDetected += parsed.images?.length || 0;
150
+ const parsedImages = (parsed.images || []).map(image => ({
151
+ ...image,
152
+ id: scopeImageId(fileIdx, image.id),
153
+ }));
154
+ const idMapping = new Map<string, string>();
155
+ for (let i = 0; i < (parsed.images || []).length; i++) {
156
+ idMapping.set(parsed.images![i].id, parsedImages[i].id);
157
+ }
158
+ contentForChunking = replaceImageIds(contentForChunking, idMapping);
159
+ imageAssetsDetected += parsedImages.length;
139
160
  let imageSummaries: Record<string, string> = {};
140
161
 
141
- if (parsed.images && parsed.images.length > 0) {
142
- logger.info(` 检测到 ${parsed.images.length} 张嵌入图片`);
162
+ if (parsedImages.length > 0) {
163
+ logger.info(` 检测到 ${parsedImages.length} 张嵌入图片`);
143
164
  if (llmConfig && llmConfig.apiKey) {
144
- imageSummaries = await describeEmbeddedImages(parsed.images, llmConfig, logger);
165
+ imageSummaries = await describeEmbeddedImages(parsedImages, llmConfig, logger);
145
166
  imageAssetsDescribed += Object.keys(imageSummaries).length;
146
167
  } else {
147
168
  logger.warn(' 未配置 LLM,图片仅保留占位信息,不含语义摘要');
148
169
  }
170
+ for (const image of parsedImages) {
171
+ pendingPrototypeAssets.push({
172
+ id: image.id,
173
+ sourceFile: filePath,
174
+ alt: image.alt,
175
+ mimeType: image.mimeType,
176
+ estimatedSize: image.estimatedSize,
177
+ dataUri: image.dataUri,
178
+ summary: imageSummaries[image.id],
179
+ });
180
+ }
149
181
  }
150
182
 
151
183
  // Log if base64 images were found and removed
@@ -235,7 +267,9 @@ export async function scanCommand(options: ScanOptions, command: Command): Promi
235
267
  // Prepare chunks directory
236
268
  const outputDir = path.dirname(path.resolve(options.output));
237
269
  const chunksDir = path.join(outputDir, 'chunks');
270
+ const prototypesDir = path.join(outputDir, 'prototypes');
238
271
  await ensureDir(chunksDir);
272
+ await ensureDir(prototypesDir);
239
273
 
240
274
  // Write chunks to files
241
275
  const chunks: Chunk[] = [];
@@ -244,7 +278,7 @@ export async function scanCommand(options: ScanOptions, command: Command): Promi
244
278
  const chunkFileName = `chunk_${i}.txt`;
245
279
  const chunkFilePath = path.join(chunksDir, chunkFileName);
246
280
 
247
- await require('fs-extra').writeFile(chunkFilePath, rawChunk.content, 'utf-8');
281
+ await fs.writeFile(chunkFilePath, rawChunk.content, 'utf-8');
248
282
 
249
283
  chunks.push({
250
284
  id: i,
@@ -254,6 +288,13 @@ export async function scanCommand(options: ScanOptions, command: Command): Promi
254
288
  });
255
289
  }
256
290
 
291
+ const prototypeAssets = await writePrototypeAssets({
292
+ outputDir,
293
+ prototypesDir,
294
+ rawChunks,
295
+ pendingAssets: pendingPrototypeAssets,
296
+ });
297
+
257
298
  // Create manifest
258
299
  const manifest: Manifest = {
259
300
  version: '1.0.0',
@@ -262,6 +303,7 @@ export async function scanCommand(options: ScanOptions, command: Command): Promi
262
303
  totalSize: totalSize,
263
304
  chunkSize: chunkSizeBytes,
264
305
  chunks: chunks,
306
+ prototypeAssets,
265
307
  };
266
308
 
267
309
  // Write manifest
@@ -270,6 +312,9 @@ export async function scanCommand(options: ScanOptions, command: Command): Promi
270
312
  await writeJson(outputPath, manifest);
271
313
 
272
314
  logger.success(`Manifest created: ${outputPath}`);
315
+ if (prototypeAssets.length > 0) {
316
+ logger.info(`Prototype index: ${path.join(outputDir, 'prototype_index.json')}`);
317
+ }
273
318
  logger.json({
274
319
  status: 'success',
275
320
  totalFiles: inputFiles.length,
@@ -281,6 +326,7 @@ export async function scanCommand(options: ScanOptions, command: Command): Promi
281
326
  llmChunkingFallbackFiles,
282
327
  imageAssetsDetected,
283
328
  imageAssetsDescribed,
329
+ prototypeAssets: prototypeAssets.length,
284
330
  manifestPath: outputPath,
285
331
  });
286
332
 
@@ -527,7 +573,7 @@ function appendImageSummariesToChunk(content: string, imageSummaries: Record<str
527
573
  return content;
528
574
  }
529
575
 
530
- const idMatches = content.match(/图片引用\s+((?:IMG|PDFIMG)\d{4})/g) || [];
576
+ const idMatches = content.match(/图片引用\s+([A-Z0-9_]+)/g) || [];
531
577
  const imageIds = Array.from(new Set(idMatches.map(item => item.replace(/.*\s+/, '').trim())));
532
578
  if (imageIds.length === 0) {
533
579
  return content;
@@ -548,6 +594,113 @@ function appendImageSummariesToChunk(content: string, imageSummaries: Record<str
548
594
  return `${content}\n\n### 图片语义补充\n${summaryLines.join('\n')}`;
549
595
  }
550
596
 
597
+ async function writePrototypeAssets(input: {
598
+ outputDir: string;
599
+ prototypesDir: string;
600
+ rawChunks: Array<{ content: string; sourceFiles: string[]; title?: string }>;
601
+ pendingAssets: PrototypeAssetPending[];
602
+ }): Promise<PrototypeAsset[]> {
603
+ const { outputDir, prototypesDir, rawChunks, pendingAssets } = input;
604
+ if (pendingAssets.length === 0) {
605
+ return [];
606
+ }
607
+
608
+ const prototypeAssets: PrototypeAsset[] = [];
609
+ for (const asset of pendingAssets) {
610
+ const ext = extFromMimeType(asset.mimeType);
611
+ const fileName = `${asset.id}.${ext}`;
612
+ const absolutePath = path.join(prototypesDir, fileName);
613
+ const written = await writeDataUriToFile(asset.dataUri, absolutePath);
614
+ if (!written) {
615
+ continue;
616
+ }
617
+ const sourceChunks = collectSourceChunks(rawChunks, asset.id);
618
+ prototypeAssets.push({
619
+ id: asset.id,
620
+ sourceFile: asset.sourceFile,
621
+ alt: asset.alt,
622
+ mimeType: asset.mimeType,
623
+ estimatedSize: asset.estimatedSize,
624
+ path: path.join('prototypes', fileName).replace(/\\/g, '/'),
625
+ summary: asset.summary,
626
+ sourceChunks,
627
+ });
628
+ }
629
+
630
+ const indexJsonPath = path.join(outputDir, 'prototype_index.json');
631
+ await writeJson(indexJsonPath, {
632
+ version: '1.0.0',
633
+ createdAt: new Date().toISOString(),
634
+ total: prototypeAssets.length,
635
+ assets: prototypeAssets,
636
+ });
637
+
638
+ const indexMarkdownPath = path.join(outputDir, 'prototype_index.md');
639
+ const lines: string[] = [
640
+ '# Prototype Index',
641
+ '',
642
+ `- Total: ${prototypeAssets.length}`,
643
+ '',
644
+ '| ID | Path | Source File | Chunks | Summary |',
645
+ '| --- | --- | --- | --- | --- |',
646
+ ];
647
+ for (const asset of prototypeAssets) {
648
+ lines.push(
649
+ `| ${asset.id} | \`${asset.path}\` | \`${path.basename(asset.sourceFile)}\` | ${asset.sourceChunks.join(', ') || '-'} | ${(asset.summary || '').replace(/\|/g, '\\|')} |`
650
+ );
651
+ }
652
+ await fs.writeFile(indexMarkdownPath, `${lines.join('\n')}\n`, 'utf-8');
653
+
654
+ return prototypeAssets;
655
+ }
656
+
657
+ function collectSourceChunks(
658
+ rawChunks: Array<{ content: string; sourceFiles: string[]; title?: string }>,
659
+ imageId: string
660
+ ): number[] {
661
+ const results: number[] = [];
662
+ for (let i = 0; i < rawChunks.length; i++) {
663
+ if (rawChunks[i].content.includes(imageId)) {
664
+ results.push(i);
665
+ }
666
+ }
667
+ return results;
668
+ }
669
+
670
+ function scopeImageId(fileIdx: number, imageId: string): string {
671
+ return `F${String(fileIdx + 1).padStart(3, '0')}_${imageId}`;
672
+ }
673
+
674
+ function replaceImageIds(content: string, idMapping: Map<string, string>): string {
675
+ let next = content;
676
+ for (const [fromId, toId] of idMapping.entries()) {
677
+ next = next.replace(new RegExp(`\\b${escapeRegExp(fromId)}\\b`, 'g'), toId);
678
+ }
679
+ return next;
680
+ }
681
+
682
+ function escapeRegExp(input: string): string {
683
+ return input.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
684
+ }
685
+
686
+ function extFromMimeType(mimeType: string): string {
687
+ if (mimeType.includes('png')) return 'png';
688
+ if (mimeType.includes('jpeg') || mimeType.includes('jpg')) return 'jpg';
689
+ if (mimeType.includes('webp')) return 'webp';
690
+ if (mimeType.includes('gif')) return 'gif';
691
+ return 'img';
692
+ }
693
+
694
+ async function writeDataUriToFile(dataUri: string, filePath: string): Promise<boolean> {
695
+ const match = dataUri.match(/^data:image\/[a-zA-Z0-9.+-]+;base64,([A-Za-z0-9+/=]+)$/);
696
+ if (!match) {
697
+ return false;
698
+ }
699
+ const buffer = Buffer.from(match[1], 'base64');
700
+ await fs.writeFile(filePath, buffer);
701
+ return true;
702
+ }
703
+
551
704
  function readStdin(): Promise<string> {
552
705
  return new Promise((resolve) => {
553
706
  let data = '';
package/src/types.ts CHANGED
@@ -10,6 +10,17 @@ export interface Chunk {
10
10
  endLine?: number;
11
11
  }
12
12
 
13
+ export interface PrototypeAsset {
14
+ id: string;
15
+ sourceFile: string;
16
+ alt: string;
17
+ mimeType: string;
18
+ estimatedSize: number;
19
+ path: string;
20
+ summary?: string;
21
+ sourceChunks: number[];
22
+ }
23
+
13
24
  export interface Manifest {
14
25
  version: string;
15
26
  createdAt: string;
@@ -17,6 +28,7 @@ export interface Manifest {
17
28
  totalSize: number;
18
29
  chunkSize: number;
19
30
  chunks: Chunk[];
31
+ prototypeAssets?: PrototypeAsset[];
20
32
  }
21
33
 
22
34
  export interface Feature {