spec-agent 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +256 -0
- package/bin/spec-agent.js +14 -0
- package/dist/commands/analyze.d.ts +16 -0
- package/dist/commands/analyze.d.ts.map +1 -0
- package/dist/commands/analyze.js +283 -0
- package/dist/commands/analyze.js.map +1 -0
- package/dist/commands/clean.d.ts +9 -0
- package/dist/commands/clean.d.ts.map +1 -0
- package/dist/commands/clean.js +109 -0
- package/dist/commands/clean.js.map +1 -0
- package/dist/commands/dispatch.d.ts +12 -0
- package/dist/commands/dispatch.d.ts.map +1 -0
- package/dist/commands/dispatch.js +232 -0
- package/dist/commands/dispatch.js.map +1 -0
- package/dist/commands/doctor.d.ts +9 -0
- package/dist/commands/doctor.d.ts.map +1 -0
- package/dist/commands/doctor.js +153 -0
- package/dist/commands/doctor.js.map +1 -0
- package/dist/commands/learn.d.ts +13 -0
- package/dist/commands/learn.d.ts.map +1 -0
- package/dist/commands/learn.js +234 -0
- package/dist/commands/learn.js.map +1 -0
- package/dist/commands/merge.d.ts +11 -0
- package/dist/commands/merge.d.ts.map +1 -0
- package/dist/commands/merge.js +335 -0
- package/dist/commands/merge.js.map +1 -0
- package/dist/commands/pipeline.d.ts +19 -0
- package/dist/commands/pipeline.d.ts.map +1 -0
- package/dist/commands/pipeline.js +266 -0
- package/dist/commands/pipeline.js.map +1 -0
- package/dist/commands/plan.d.ts +13 -0
- package/dist/commands/plan.d.ts.map +1 -0
- package/dist/commands/plan.js +314 -0
- package/dist/commands/plan.js.map +1 -0
- package/dist/commands/scan.d.ts +28 -0
- package/dist/commands/scan.d.ts.map +1 -0
- package/dist/commands/scan.js +488 -0
- package/dist/commands/scan.js.map +1 -0
- package/dist/commands/status.d.ts +8 -0
- package/dist/commands/status.d.ts.map +1 -0
- package/dist/commands/status.js +146 -0
- package/dist/commands/status.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +126 -0
- package/dist/index.js.map +1 -0
- package/dist/services/document-parser.d.ts +49 -0
- package/dist/services/document-parser.d.ts.map +1 -0
- package/dist/services/document-parser.js +499 -0
- package/dist/services/document-parser.js.map +1 -0
- package/dist/services/llm.d.ts +61 -0
- package/dist/services/llm.d.ts.map +1 -0
- package/dist/services/llm.js +716 -0
- package/dist/services/llm.js.map +1 -0
- package/dist/types.d.ts +159 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +4 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/file.d.ts +10 -0
- package/dist/utils/file.d.ts.map +1 -0
- package/dist/utils/file.js +96 -0
- package/dist/utils/file.js.map +1 -0
- package/dist/utils/logger.d.ts +13 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +55 -0
- package/dist/utils/logger.js.map +1 -0
- package/package.json +48 -0
- package/scripts/publish-npm.js +174 -0
- package/spec-agent-implementation.md +750 -0
- package/src/commands/analyze.ts +322 -0
- package/src/commands/clean.ts +88 -0
- package/src/commands/dispatch.ts +250 -0
- package/src/commands/doctor.ts +136 -0
- package/src/commands/learn.ts +261 -0
- package/src/commands/merge.ts +377 -0
- package/src/commands/pipeline.ts +306 -0
- package/src/commands/plan.ts +331 -0
- package/src/commands/scan.ts +568 -0
- package/src/commands/status.ts +129 -0
- package/src/index.ts +137 -0
- package/src/services/document-parser.ts +548 -0
- package/src/services/llm.ts +857 -0
- package/src/types.ts +161 -0
- package/src/utils/file.ts +60 -0
- package/src/utils/logger.ts +58 -0
- package/tsconfig.json +19 -0
|
@@ -0,0 +1,499 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.parseDocument = parseDocument;
|
|
37
|
+
exports.extractEmbeddedImages = extractEmbeddedImages;
|
|
38
|
+
exports.analyzeBase64Images = analyzeBase64Images;
|
|
39
|
+
exports.readChunkContent = readChunkContent;
|
|
40
|
+
exports.isSupportedFormat = isSupportedFormat;
|
|
41
|
+
const fs = __importStar(require("fs-extra"));
|
|
42
|
+
const path = __importStar(require("path"));
|
|
43
|
+
const child_process_1 = require("child_process");
|
|
44
|
+
const util_1 = require("util");
|
|
45
|
+
const execFileAsync = (0, util_1.promisify)(child_process_1.execFile);
|
|
46
|
+
/**
|
|
47
|
+
* Parse a document file and extract text content as Markdown
|
|
48
|
+
* Supports: .md, .txt, .html, .pdf, .docx
|
|
49
|
+
*
|
|
50
|
+
* All formats are normalized to Markdown for consistent chunking:
|
|
51
|
+
* - Headings become # ## ###
|
|
52
|
+
* - Lists become - or 1.
|
|
53
|
+
* - Tables become Markdown tables
|
|
54
|
+
*/
|
|
55
|
+
async function parseDocument(filePath) {
|
|
56
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
57
|
+
const buffer = await fs.readFile(filePath);
|
|
58
|
+
switch (ext) {
|
|
59
|
+
case '.md':
|
|
60
|
+
return parseMarkdownFile(buffer);
|
|
61
|
+
case '.txt':
|
|
62
|
+
return parseTextFile(buffer);
|
|
63
|
+
case '.html':
|
|
64
|
+
case '.htm':
|
|
65
|
+
return parseHtmlToMarkdown(buffer);
|
|
66
|
+
case '.pdf':
|
|
67
|
+
return parsePdfToMarkdown(buffer, filePath);
|
|
68
|
+
case '.docx':
|
|
69
|
+
return parseDocxToMarkdown(buffer);
|
|
70
|
+
default:
|
|
71
|
+
// Try to read as text
|
|
72
|
+
return parseTextFile(buffer);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
function parseMarkdownFile(buffer) {
|
|
76
|
+
const content = buffer.toString('utf-8');
|
|
77
|
+
const normalized = normalizeMarkdown(content);
|
|
78
|
+
const extracted = extractEmbeddedImages(normalized);
|
|
79
|
+
return {
|
|
80
|
+
content: extracted.content,
|
|
81
|
+
format: 'markdown',
|
|
82
|
+
images: extracted.images,
|
|
83
|
+
metadata: {
|
|
84
|
+
wordCount: content.split(/\s+/).length,
|
|
85
|
+
},
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
function parseTextFile(buffer) {
|
|
89
|
+
const content = buffer.toString('utf-8');
|
|
90
|
+
const normalized = normalizeMarkdown(content);
|
|
91
|
+
const extracted = extractEmbeddedImages(normalized);
|
|
92
|
+
return {
|
|
93
|
+
content: extracted.content,
|
|
94
|
+
format: 'markdown',
|
|
95
|
+
images: extracted.images,
|
|
96
|
+
metadata: {
|
|
97
|
+
wordCount: content.split(/\s+/).length,
|
|
98
|
+
},
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Normalize content to proper Markdown format
|
|
103
|
+
* - Ensures consistent heading syntax
|
|
104
|
+
* - Normalizes list markers
|
|
105
|
+
* - Fixes spacing around headers
|
|
106
|
+
* - Removes base64 images (replaces with placeholder)
|
|
107
|
+
*/
|
|
108
|
+
function normalizeMarkdown(content) {
|
|
109
|
+
return content
|
|
110
|
+
// Ensure space after # for headers
|
|
111
|
+
.replace(/^(#{1,6})([^\s#])/gm, '$1 $2')
|
|
112
|
+
// Normalize list markers (convert * to -)
|
|
113
|
+
.replace(/^(\s*)\*[ \t]/gm, '$1- ')
|
|
114
|
+
// Ensure blank line before headers
|
|
115
|
+
.replace(/([^\n])\n(#{1,6}\s)/g, '$1\n\n$2')
|
|
116
|
+
// Remove excessive blank lines (max 2)
|
|
117
|
+
.replace(/\n{4,}/g, '\n\n\n')
|
|
118
|
+
.trim();
|
|
119
|
+
}
|
|
120
|
+
function extractEmbeddedImages(content) {
|
|
121
|
+
const images = [];
|
|
122
|
+
let imageIndex = 1;
|
|
123
|
+
const withMarkdownImages = content.replace(/!\[([^\]]*)\]\((data:image\/([^;]+);base64,([A-Za-z0-9+/=]+))\)/g, (_match, altText, dataUri, mimeSubType, base64Data) => {
|
|
124
|
+
const id = `IMG${String(imageIndex++).padStart(4, '0')}`;
|
|
125
|
+
const estimatedSize = Math.round(base64Data.length * 0.75);
|
|
126
|
+
const mimeType = `image/${mimeSubType}`;
|
|
127
|
+
const alt = (altText || '').trim();
|
|
128
|
+
images.push({ id, alt, mimeType, estimatedSize, dataUri });
|
|
129
|
+
return `\n[图片引用 ${id} | alt="${alt || '无'}" | ${mimeType} | ${estimatedSize} bytes]\n`;
|
|
130
|
+
});
|
|
131
|
+
const withHtmlImages = withMarkdownImages.replace(/<img[^>]*src="(data:image\/([^;]+);base64,([A-Za-z0-9+/=]+))"[^>]*>/gi, (match, dataUri, mimeSubType, base64Data) => {
|
|
132
|
+
const altMatch = match.match(/\salt="([^"]*)"/i);
|
|
133
|
+
const alt = altMatch?.[1]?.trim() || '';
|
|
134
|
+
const id = `IMG${String(imageIndex++).padStart(4, '0')}`;
|
|
135
|
+
const estimatedSize = Math.round(base64Data.length * 0.75);
|
|
136
|
+
const mimeType = `image/${mimeSubType}`;
|
|
137
|
+
images.push({ id, alt, mimeType, estimatedSize, dataUri });
|
|
138
|
+
return `\n[图片引用 ${id} | alt="${alt || '无'}" | ${mimeType} | ${estimatedSize} bytes]\n`;
|
|
139
|
+
});
|
|
140
|
+
return {
|
|
141
|
+
content: withHtmlImages,
|
|
142
|
+
images,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Check if content contains base64 images and estimate their size
|
|
147
|
+
*/
|
|
148
|
+
function analyzeBase64Images(content) {
|
|
149
|
+
const base64Pattern = /data:image\/[^;]+;base64,([A-Za-z0-9+/=]+)/g;
|
|
150
|
+
let match;
|
|
151
|
+
let count = 0;
|
|
152
|
+
let totalLength = 0;
|
|
153
|
+
while ((match = base64Pattern.exec(content)) !== null) {
|
|
154
|
+
count++;
|
|
155
|
+
totalLength += match[1].length;
|
|
156
|
+
}
|
|
157
|
+
// Base64 is ~4/3 of binary size, so multiply by 0.75 to get approximate binary size
|
|
158
|
+
const estimatedSize = Math.round(totalLength * 0.75);
|
|
159
|
+
return { count, estimatedSize };
|
|
160
|
+
}
|
|
161
|
+
async function parseHtmlToMarkdown(buffer) {
|
|
162
|
+
const html = buffer.toString('utf-8');
|
|
163
|
+
// Convert HTML to Markdown with structure preservation
|
|
164
|
+
const markdown = convertHtmlToMarkdown(html);
|
|
165
|
+
const extracted = extractEmbeddedImages(markdown);
|
|
166
|
+
return {
|
|
167
|
+
content: extracted.content,
|
|
168
|
+
format: 'markdown',
|
|
169
|
+
images: extracted.images,
|
|
170
|
+
metadata: {
|
|
171
|
+
wordCount: markdown.split(/\s+/).length,
|
|
172
|
+
},
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Convert HTML to Markdown while preserving document structure
|
|
177
|
+
*/
|
|
178
|
+
function convertHtmlToMarkdown(html) {
|
|
179
|
+
let md = html;
|
|
180
|
+
// Remove script and style tags with content
|
|
181
|
+
md = md.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
|
|
182
|
+
md = md.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
|
|
183
|
+
// Convert headings
|
|
184
|
+
md = md.replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, '\n\n# $1\n\n');
|
|
185
|
+
md = md.replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, '\n\n## $1\n\n');
|
|
186
|
+
md = md.replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, '\n\n### $1\n\n');
|
|
187
|
+
md = md.replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, '\n\n#### $1\n\n');
|
|
188
|
+
md = md.replace(/<h5[^>]*>([\s\S]*?)<\/h5>/gi, '\n\n##### $1\n\n');
|
|
189
|
+
md = md.replace(/<h6[^>]*>([\s\S]*?)<\/h6>/gi, '\n\n###### $1\n\n');
|
|
190
|
+
// Convert paragraphs
|
|
191
|
+
md = md.replace(/<p[^>]*>([\s\S]*?)<\/p>/gi, '\n\n$1\n\n');
|
|
192
|
+
// Convert line breaks
|
|
193
|
+
md = md.replace(/<br\s*\/?>/gi, '\n');
|
|
194
|
+
// Convert strong/b and em/i
|
|
195
|
+
md = md.replace(/<(strong|b)[^>]*>([\s\S]*?)<\/(strong|b)>/gi, '**$2**');
|
|
196
|
+
md = md.replace(/<(em|i)[^>]*>([\s\S]*?)<\/(em|i)>/gi, '*$2*');
|
|
197
|
+
// Convert code
|
|
198
|
+
md = md.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, '`$1`');
|
|
199
|
+
md = md.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, '\n\n```\n$1\n```\n\n');
|
|
200
|
+
// Convert unordered lists
|
|
201
|
+
md = md.replace(/<ul[^>]*>([\s\S]*?)<\/ul>/gi, (match, content) => {
|
|
202
|
+
return '\n\n' + content.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, '- $1\n') + '\n';
|
|
203
|
+
});
|
|
204
|
+
// Convert ordered lists
|
|
205
|
+
md = md.replace(/<ol[^>]*>([\s\S]*?)<\/ol>/gi, (match, content) => {
|
|
206
|
+
let index = 1;
|
|
207
|
+
return '\n\n' + content.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, (_liMatch, itemContent) => `${index++}. ${itemContent.trim()}\n`) + '\n';
|
|
208
|
+
});
|
|
209
|
+
// Convert tables
|
|
210
|
+
md = md.replace(/<table[^>]*>([\s\S]*?)<\/table>/gi, (match, content) => {
|
|
211
|
+
let tableMd = '\n\n';
|
|
212
|
+
const rows = content.match(/<tr[^>]*>([\s\S]*?)<\/tr>/gi) || [];
|
|
213
|
+
rows.forEach((row, rowIndex) => {
|
|
214
|
+
const cells = row.match(/<t[dh][^>]*>([\s\S]*?)<\/t[dh]>/gi) || [];
|
|
215
|
+
const cellContents = cells.map((cell) => cell.replace(/<[^>]+>/g, '').trim());
|
|
216
|
+
if (cellContents.length > 0) {
|
|
217
|
+
tableMd += '| ' + cellContents.join(' | ') + ' |\n';
|
|
218
|
+
// Add separator after header row
|
|
219
|
+
if (rowIndex === 0) {
|
|
220
|
+
tableMd += '|' + cellContents.map(() => ' --- |').join('') + '\n';
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
});
|
|
224
|
+
return tableMd + '\n';
|
|
225
|
+
});
|
|
226
|
+
// Convert links
|
|
227
|
+
md = md.replace(/<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gi, '[$2]($1)');
|
|
228
|
+
// Convert images
|
|
229
|
+
md = md.replace(/<img[^>]+src="([^"]+)"[^>]*alt="([^"]*)"[^>]*\/?>/gi, '');
|
|
230
|
+
md = md.replace(/<img[^>]+alt="([^"]*)"[^>]*src="([^"]+)"[^>]*\/?>/gi, '');
|
|
231
|
+
md = md.replace(/<img[^>]+src="([^"]+)"[^>]*\/?>/gi, '');
|
|
232
|
+
// Remove remaining HTML tags but keep content
|
|
233
|
+
md = md.replace(/<[^>]+>/g, '');
|
|
234
|
+
// Decode HTML entities
|
|
235
|
+
md = md.replace(/&/g, '&');
|
|
236
|
+
md = md.replace(/</g, '<');
|
|
237
|
+
md = md.replace(/>/g, '>');
|
|
238
|
+
md = md.replace(/"/g, '"');
|
|
239
|
+
md = md.replace(/'/g, "'");
|
|
240
|
+
md = md.replace(/ /g, ' ');
|
|
241
|
+
md = md.replace(/—/g, '—');
|
|
242
|
+
md = md.replace(/–/g, '–');
|
|
243
|
+
md = md.replace(/…/g, '...');
|
|
244
|
+
// Clean up excessive whitespace
|
|
245
|
+
md = md.replace(/\n{4,}/g, '\n\n\n');
|
|
246
|
+
return md.trim();
|
|
247
|
+
}
|
|
248
|
+
function parseEnvInt(name, fallback) {
|
|
249
|
+
const raw = process.env[name];
|
|
250
|
+
if (!raw)
|
|
251
|
+
return fallback;
|
|
252
|
+
const parsed = parseInt(raw, 10);
|
|
253
|
+
return Number.isFinite(parsed) ? parsed : fallback;
|
|
254
|
+
}
|
|
255
|
+
async function extractPdfPageImages(pdfPath, maxPages) {
|
|
256
|
+
if (maxPages <= 0) {
|
|
257
|
+
return [];
|
|
258
|
+
}
|
|
259
|
+
const tempRoot = path.join(process.cwd(), '.spec-agent-tmp', 'pdf-pages', `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`);
|
|
260
|
+
const outPrefix = path.join(tempRoot, 'page');
|
|
261
|
+
await fs.ensureDir(tempRoot);
|
|
262
|
+
try {
|
|
263
|
+
await execFileAsync('pdftoppm', [
|
|
264
|
+
'-png',
|
|
265
|
+
'-f', '1',
|
|
266
|
+
'-l', String(maxPages),
|
|
267
|
+
pdfPath,
|
|
268
|
+
outPrefix,
|
|
269
|
+
]);
|
|
270
|
+
const files = await fs.readdir(tempRoot);
|
|
271
|
+
const pngFiles = files
|
|
272
|
+
.filter(f => /^page-\d+\.png$/i.test(f))
|
|
273
|
+
.sort((a, b) => {
|
|
274
|
+
const ai = parseInt(a.match(/\d+/)?.[0] || '0', 10);
|
|
275
|
+
const bi = parseInt(b.match(/\d+/)?.[0] || '0', 10);
|
|
276
|
+
return ai - bi;
|
|
277
|
+
});
|
|
278
|
+
const images = [];
|
|
279
|
+
let imageIdx = 1;
|
|
280
|
+
for (const file of pngFiles) {
|
|
281
|
+
const imagePath = path.join(tempRoot, file);
|
|
282
|
+
const buffer = await fs.readFile(imagePath);
|
|
283
|
+
const base64 = buffer.toString('base64');
|
|
284
|
+
const pageNo = parseInt(file.match(/\d+/)?.[0] || String(imageIdx), 10);
|
|
285
|
+
images.push({
|
|
286
|
+
id: `PDFIMG${String(imageIdx++).padStart(4, '0')}`,
|
|
287
|
+
alt: `PDF第${pageNo}页`,
|
|
288
|
+
mimeType: 'image/png',
|
|
289
|
+
estimatedSize: buffer.length,
|
|
290
|
+
dataUri: `data:image/png;base64,${base64}`,
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
return images;
|
|
294
|
+
}
|
|
295
|
+
catch {
|
|
296
|
+
// pdftoppm unavailable or conversion failed.
|
|
297
|
+
return [];
|
|
298
|
+
}
|
|
299
|
+
finally {
|
|
300
|
+
await fs.remove(tempRoot).catch(() => undefined);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
async function parsePdfToMarkdown(buffer, filePath) {
|
|
304
|
+
try {
|
|
305
|
+
// Dynamic import to avoid loading if not needed
|
|
306
|
+
const pdfParse = await Promise.resolve().then(() => __importStar(require('pdf-parse')));
|
|
307
|
+
const result = await pdfParse.default(buffer);
|
|
308
|
+
// Convert PDF text to structured Markdown
|
|
309
|
+
// PDF text often has page breaks and layout artifacts that need cleaning
|
|
310
|
+
const structuredContent = structurePdfContent(result.text);
|
|
311
|
+
const maxPdfImagePages = Math.max(0, parseEnvInt('PDF_IMAGE_PAGE_LIMIT', 8));
|
|
312
|
+
const pageImages = filePath
|
|
313
|
+
? await extractPdfPageImages(filePath, Math.min(result.numpages, maxPdfImagePages))
|
|
314
|
+
: [];
|
|
315
|
+
const imageHeaders = pageImages.map(image => `[图片引用 ${image.id} | alt="${image.alt || '无'}" | ${image.mimeType} | ${image.estimatedSize} bytes]`);
|
|
316
|
+
const contentWithPageImages = imageHeaders.length > 0
|
|
317
|
+
? `${imageHeaders.join('\n')}\n\n${structuredContent}`
|
|
318
|
+
: structuredContent;
|
|
319
|
+
return {
|
|
320
|
+
content: extractEmbeddedImages(contentWithPageImages).content,
|
|
321
|
+
format: 'markdown',
|
|
322
|
+
images: pageImages,
|
|
323
|
+
metadata: {
|
|
324
|
+
pages: result.numpages,
|
|
325
|
+
wordCount: result.text.split(/\s+/).length,
|
|
326
|
+
},
|
|
327
|
+
};
|
|
328
|
+
}
|
|
329
|
+
catch (error) {
|
|
330
|
+
// Fallback: try to extract text as-is
|
|
331
|
+
const text = buffer.toString('utf-8');
|
|
332
|
+
if (text.length > 100) {
|
|
333
|
+
return {
|
|
334
|
+
content: extractEmbeddedImages(normalizeMarkdown(text)).content,
|
|
335
|
+
format: 'markdown',
|
|
336
|
+
images: [],
|
|
337
|
+
metadata: {},
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
throw new Error(`Failed to parse PDF: ${error instanceof Error ? error.message : String(error)}`);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
/**
|
|
344
|
+
* Structure PDF content into Markdown format
|
|
345
|
+
* PDFs often have layout artifacts that need intelligent processing
|
|
346
|
+
*/
|
|
347
|
+
function structurePdfContent(text) {
|
|
348
|
+
let md = text;
|
|
349
|
+
// Remove page number lines (standalone numbers)
|
|
350
|
+
md = md.replace(/\n\s*\d+\s*\n/g, '\n\n');
|
|
351
|
+
// Detect and convert potential headers
|
|
352
|
+
// Short lines at the start of paragraphs that are all caps or title case
|
|
353
|
+
const lines = md.split('\n');
|
|
354
|
+
const processedLines = [];
|
|
355
|
+
let prevLineEmpty = true;
|
|
356
|
+
for (let i = 0; i < lines.length; i++) {
|
|
357
|
+
const line = lines[i].trim();
|
|
358
|
+
const nextLine = lines[i + 1]?.trim() || '';
|
|
359
|
+
// Skip empty lines but track them
|
|
360
|
+
if (!line) {
|
|
361
|
+
processedLines.push('');
|
|
362
|
+
prevLineEmpty = true;
|
|
363
|
+
continue;
|
|
364
|
+
}
|
|
365
|
+
// Detect headers based on various heuristics
|
|
366
|
+
const isShortLine = line.length < 100;
|
|
367
|
+
const isAllCaps = line === line.toUpperCase() && line.length > 3 && /[A-Z]/.test(line);
|
|
368
|
+
const isTitleCase = /^[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*$/.test(line) && line.length > 3;
|
|
369
|
+
const looksLikeNumberedHeader = /^\d+(?:\.\d+)*\.?\s+\S/.test(line);
|
|
370
|
+
const looksLikeChapter = /^(Chapter|CHAPTER|第[一二三四五六七八九十\d]+章)/.test(line);
|
|
371
|
+
if (prevLineEmpty && isShortLine) {
|
|
372
|
+
if (looksLikeChapter) {
|
|
373
|
+
// Chapter header - H1
|
|
374
|
+
processedLines.push(`# ${line}`);
|
|
375
|
+
}
|
|
376
|
+
else if (looksLikeNumberedHeader) {
|
|
377
|
+
// Numbered section - could be H2 or H3
|
|
378
|
+
const level = (line.match(/\./g) || []).length + 2;
|
|
379
|
+
processedLines.push(`${'#'.repeat(Math.min(level, 6))} ${line}`);
|
|
380
|
+
}
|
|
381
|
+
else if (isAllCaps && line.length < 50) {
|
|
382
|
+
// ALL CAPS short line - likely a section header
|
|
383
|
+
processedLines.push(`## ${line}`);
|
|
384
|
+
}
|
|
385
|
+
else if (isTitleCase && !nextLine.startsWith(line.substring(0, 10))) {
|
|
386
|
+
// Title case that doesn't continue - likely a header
|
|
387
|
+
processedLines.push(`### ${line}`);
|
|
388
|
+
}
|
|
389
|
+
else {
|
|
390
|
+
processedLines.push(line);
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
else {
|
|
394
|
+
processedLines.push(line);
|
|
395
|
+
}
|
|
396
|
+
prevLineEmpty = false;
|
|
397
|
+
}
|
|
398
|
+
md = processedLines.join('\n');
|
|
399
|
+
// Clean up excessive whitespace
|
|
400
|
+
md = md.replace(/\n{4,}/g, '\n\n\n');
|
|
401
|
+
return normalizeMarkdown(md);
|
|
402
|
+
}
|
|
403
|
+
async function parseDocxToMarkdown(buffer) {
|
|
404
|
+
try {
|
|
405
|
+
// Dynamic import to avoid loading if not needed
|
|
406
|
+
const mammoth = await Promise.resolve().then(() => __importStar(require('mammoth')));
|
|
407
|
+
// Use mammoth's HTML conversion to preserve structure, then convert to Markdown
|
|
408
|
+
const htmlResult = await mammoth.convertToHtml({ buffer }, {
|
|
409
|
+
styleMap: [
|
|
410
|
+
"p[style-name='Heading 1'] => h1:fresh",
|
|
411
|
+
"p[style-name='Heading 2'] => h2:fresh",
|
|
412
|
+
"p[style-name='Heading 3'] => h3:fresh",
|
|
413
|
+
"p[style-name='Heading 4'] => h4:fresh",
|
|
414
|
+
"p[style-name='Heading 5'] => h5:fresh",
|
|
415
|
+
"p[style-name='Heading 6'] => h6:fresh",
|
|
416
|
+
"p[style-name='Title'] => h1.title:fresh",
|
|
417
|
+
"p[style-name='Subtitle'] => h2.subtitle:fresh",
|
|
418
|
+
]
|
|
419
|
+
});
|
|
420
|
+
// Convert HTML to Markdown
|
|
421
|
+
const markdown = convertHtmlToMarkdown(htmlResult.value);
|
|
422
|
+
const extracted = extractEmbeddedImages(markdown);
|
|
423
|
+
// Extract metadata from document
|
|
424
|
+
const metadata = {
|
|
425
|
+
wordCount: markdown.split(/\s+/).length,
|
|
426
|
+
};
|
|
427
|
+
// Try to extract title from the first heading
|
|
428
|
+
const titleMatch = markdown.match(/^#\s+(.+)$/m);
|
|
429
|
+
if (titleMatch) {
|
|
430
|
+
metadata.title = titleMatch[1].trim();
|
|
431
|
+
}
|
|
432
|
+
return {
|
|
433
|
+
content: extracted.content,
|
|
434
|
+
format: 'markdown',
|
|
435
|
+
images: extracted.images,
|
|
436
|
+
metadata,
|
|
437
|
+
};
|
|
438
|
+
}
|
|
439
|
+
catch (error) {
|
|
440
|
+
// Fallback: try raw text extraction
|
|
441
|
+
try {
|
|
442
|
+
const mammoth = await Promise.resolve().then(() => __importStar(require('mammoth')));
|
|
443
|
+
const result = await mammoth.extractRawText({ buffer });
|
|
444
|
+
return {
|
|
445
|
+
content: extractEmbeddedImages(normalizeMarkdown(result.value)).content,
|
|
446
|
+
format: 'markdown',
|
|
447
|
+
images: [],
|
|
448
|
+
metadata: {
|
|
449
|
+
wordCount: result.value.split(/\s+/).length,
|
|
450
|
+
},
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
catch {
|
|
454
|
+
// Last resort: read as plain text
|
|
455
|
+
const text = buffer.toString('utf-8');
|
|
456
|
+
if (text.length > 100) {
|
|
457
|
+
return {
|
|
458
|
+
content: extractEmbeddedImages(normalizeMarkdown(text)).content,
|
|
459
|
+
format: 'markdown',
|
|
460
|
+
images: [],
|
|
461
|
+
metadata: {},
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
throw new Error(`Failed to parse DOCX: ${error instanceof Error ? error.message : String(error)}`);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
/**
|
|
469
|
+
* Read and concatenate multiple document files
|
|
470
|
+
* All content is normalized to Markdown format
|
|
471
|
+
*/
|
|
472
|
+
async function readChunkContent(filePaths) {
|
|
473
|
+
const contents = [];
|
|
474
|
+
for (const filePath of filePaths) {
|
|
475
|
+
try {
|
|
476
|
+
const parsed = await parseDocument(filePath);
|
|
477
|
+
contents.push(`=== ${path.basename(filePath)} ===\n${parsed.content}`);
|
|
478
|
+
}
|
|
479
|
+
catch (error) {
|
|
480
|
+
// If parsing fails, try to read as plain text
|
|
481
|
+
try {
|
|
482
|
+
const text = await fs.readFile(filePath, 'utf-8');
|
|
483
|
+
contents.push(`=== ${path.basename(filePath)} ===\n${normalizeMarkdown(text)}`);
|
|
484
|
+
}
|
|
485
|
+
catch {
|
|
486
|
+
contents.push(`=== ${path.basename(filePath)} ===\n[Error reading file: ${error instanceof Error ? error.message : String(error)}]`);
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
return contents.join('\n\n---\n\n');
|
|
491
|
+
}
|
|
492
|
+
/**
|
|
493
|
+
* Check if a file format is supported
|
|
494
|
+
*/
|
|
495
|
+
function isSupportedFormat(filePath) {
|
|
496
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
497
|
+
return ['.md', '.txt', '.html', '.htm', '.pdf', '.docx'].includes(ext);
|
|
498
|
+
}
|
|
499
|
+
//# sourceMappingURL=document-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document-parser.js","sourceRoot":"","sources":["../../src/services/document-parser.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAoCA,sCAoBC;AAkDD,sDAiCC;AAKD,kDAeC;AAyWD,4CAmBC;AAKD,8CAGC;AAniBD,6CAA+B;AAC/B,2CAA6B;AAC7B,iDAAyC;AACzC,+BAAiC;AAEjC,MAAM,aAAa,GAAG,IAAA,gBAAS,EAAC,wBAAQ,CAAC,CAAC;AAsB1C;;;;;;;;GAQG;AACI,KAAK,UAAU,aAAa,CAAC,QAAgB;IAClD,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IACjD,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE3C,QAAQ,GAAG,EAAE,CAAC;QACZ,KAAK,KAAK;YACR,OAAO,iBAAiB,CAAC,MAAM,CAAC,CAAC;QACnC,KAAK,MAAM;YACT,OAAO,aAAa,CAAC,MAAM,CAAC,CAAC;QAC/B,KAAK,OAAO,CAAC;QACb,KAAK,MAAM;YACT,OAAO,mBAAmB,CAAC,MAAM,CAAC,CAAC;QACrC,KAAK,MAAM;YACT,OAAO,kBAAkB,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QAC9C,KAAK,OAAO;YACV,OAAO,mBAAmB,CAAC,MAAM,CAAC,CAAC;QACrC;YACE,sBAAsB;YACtB,OAAO,aAAa,CAAC,MAAM,CAAC,CAAC;IACjC,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,MAAc;IACvC,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IACzC,MAAM,UAAU,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAC9C,MAAM,SAAS,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAC;IACpD,OAAO;QACL,OAAO,EAAE,SAAS,CAAC,OAAO;QAC1B,MAAM,EAAE,UAAU;QAClB,MAAM,EAAE,SAAS,CAAC,MAAM;QACxB,QAAQ,EAAE;YACR,SAAS,EAAE,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;SACvC;KACF,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,MAAc;IACnC,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IACzC,MAAM,UAAU,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAC9C,MAAM,SAAS,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAC;IACpD,OAAO;QACL,OAAO,EAAE,SAAS,CAAC,OAAO;QAC1B,MAAM,EAAE,UAAU;QAClB,MAAM,EAAE,SAAS,CAAC,MAAM;QACxB,QAAQ,EAAE;YACR,SAAS,EAAE,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;SACvC;KACF,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,SAAS,iBAAiB,CAAC,OAAe;IACxC,OAAO,OAAO;QACZ,mCAAmC;SAClC,OAAO,CAAC,qBAAqB,EAAE,OAAO,CAAC;QACxC,0CAA0C;SACzC,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC;QACnC,mCAAmC;SAClC,OAAO,CAAC,sBAAsB,EAAE,UAAU,CAAC;QAC5C,uCAAuC;SACtC,OAAO,CAAC,SAAS,EAAE,QAAQ,CAAC;SAC5B,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,SAAgB,qBAAqB,CAAC,OAAe;IACnD,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,MAAM,kBAAkB,GAAG,OAAO,CAAC,OAAO,CACxC,kEAAkE,EAClE,CAAC,MAAM,EAAE,OAAe,EAAE,OAAe,EAAE,WAAmB,EAAE,UAAkB,EAAE,EAAE;QACpF,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;QACzD,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;QAC3D,MAAM,QAAQ,GAAG,SAAS,WAAW,EAAE,CAAC;QACxC,MAAM,GAAG,GAAG,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,QAAQ,EAAE,aAAa,EAAE,OAAO,EAAE,CAAC,CAAC;QAC3D,OAAO,WAAW,EAAE,WAAW,GAAG,IAAI,GAAG,OAAO,QAAQ,MAAM,aAAa,WAAW,CAAC;IACzF,CAAC,CACF,CAAC;IAEF,MAAM,cAAc,GAAG,kBAAkB,CAAC,OAAO,CAC/C,uEAAuE,EACvE,CAAC,KAAK,EAAE,OAAe,EAAE,WAAmB,EAAE,UAAkB,EAAE,EAAE;QAClE,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACjD,MAAM,GAAG,GAAG,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QACxC,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;QACzD,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;QAC3D,MAAM,QAAQ,GAAG,SAAS,WAAW,EAAE,CAAC;QACxC,MAAM,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,QAAQ,EAAE,aAAa,EAAE,OAAO,EAAE,CAAC,CAAC;QAC3D,OAAO,WAAW,EAAE,WAAW,GAAG,IAAI,GAAG,OAAO,QAAQ,MAAM,aAAa,WAAW,CAAC;IACzF,CAAC,CACF,CAAC;IAEF,OAAO;QACL,OAAO,EAAE,cAAc;QACvB,MAAM;KACP,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAgB,mBAAmB,CAAC,OAAe;IACjD,MAAM,aAAa,GAAG,6CAA6C,CAAC;IACpE,IAAI,KAAK,CAAC;IACV,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,OAAO,CAAC,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACtD,KAAK,EAAE,CAAC;QACR,WAAW,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACjC,CAAC;IAED,oFAAoF;IACpF,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC;IAErD,OAAO,EAAE,KAAK,EAAE,aAAa,EAAE,CAAC;AAClC,CAAC;AAED,KAAK,UAAU,mBAAmB,CAAC,MAAc;IAC/C,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IAEtC,uDAAuD;IACvD,MAAM,QAAQ,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAE7C,MAAM,SAAS,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAClD,OAAO;QACL,OAAO,EAAE,SAAS,CAAC,OAAO;QAC1B,MAAM,EAAE,UAAU;QAClB,MAAM,EAAE,SAAS,CAAC,MAAM;QACxB,QAAQ,EAAE;YACR,SAAS,EAAE,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;SACxC;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,qBAAqB,CAAC,IAAY;IACzC,IAAI,EAAE,GAAG,IAAI,CAAC;IAEd,4CAA4C;IAC5C,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAC;IACzD,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,iCAAiC,EAAE,EAAE,CAAC,CAAC;IAEvD,mBAAmB;IACnB,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,6BAA6B,EAAE,cAAc,CAAC,CAAC;IAC/D,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,6BAA6B,EAAE,eAAe,CAAC,CAAC;IAChE,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,6BAA6B,EAAE,gBAAgB,CAAC,CAAC;IACjE,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,6BAA6B,EAAE,iBAAiB,CAAC,CAAC;IAClE,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,6BAA6B,EAAE,kBAAkB,CAAC,CAAC;IACnE,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,6BAA6B,EAAE,mBAAmB,CAAC,CAAC;IAEpE,qBAAqB;IACrB,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,2BAA2B,EAAE,YAAY,CAAC,CAAC;IAE3D,sBAAsB;IACtB,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,cAAc,EAAE,IAAI,CAAC,CAAC;IAEtC,4BAA4B;IAC5B,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,6CAA6C,EAAE,QAAQ,CAAC,CAAC;IACzE,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,qCAAqC,EAAE,MAAM,CAAC,CAAC;IAE/D,eAAe;IACf,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,iCAAiC,EAAE,MAAM,CAAC,CAAC;IAC3D,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,+BAA+B,EAAE,sBAAsB,CAAC,CAAC;IAEzE,0BAA0B;IAC1B,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,6BAA6B,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;QAChE,OAAO,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,6BAA6B,EAAE,QAAQ,CAAC,GAAG,IAAI,CAAC;IAClF,CAAC,CAAC,CAAC;IAEH,wBAAwB;IACxB,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,6BAA6B,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;QAChE,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,OAAO,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,6BAA6B,EAAE,CAAC,QAAgB,EAAE,WAAmB,EAAE,EAAE,CAAC,GAAG,KAAK,EAAE,KAAK,WAAW,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,IAAI,CAAC;IAC1J,CAAC,CAAC,CAAC;IAEH,iBAAiB;IACjB,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,mCAAmC,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;QACtE,IAAI,OAAO,GAAG,MAAM,CAAC;QACrB,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,IAAI,EAAE,CAAC;QAEhE,IAAI,CAAC,OAAO,CAAC,CAAC,GAAW,EAAE,QAAgB,EAAE,EAAE;YAC7C,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,mCAAmC,CAAC,IAAI,EAAE,CAAC;YACnE,MAAM,YAAY,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAY,EAAE,EAAE,CAC9C,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CACpC,CAAC;YAEF,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5B,OAAO,IAAI,IAAI,GAAG,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC;gBACpD,iCAAiC;gBACjC,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;oBACnB,OAAO,IAAI,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC;gBACpE,CAAC;YACH,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,OAAO,OAAO,GAAG,IAAI,CAAC;IACxB,CAAC,CAAC,CAAC;IAEH,gBAAgB;IAChB,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,8CAA8C,EAAE,UAAU,CAAC,CAAC;IAE5E,iBAAiB;IACjB,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,qDAAqD,EAAE,WAAW,CAAC,CAAC;IACpF,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,qDAAqD,EAAE,WAAW,CAAC,CAAC;IACpF,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,mCAAmC,EAAE,SAAS,CAAC,CAAC;IAEhE,8CAA8C;IAC9C,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;IAEhC,uBAAuB;IACvB,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAC/B,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IAC9B,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IAC9B,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IAChC,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAC/B,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IAChC,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;IACjC,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;IACjC,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC;IAEpC,gCAAgC;IAChC,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAErC,OAAO,EAAE,CAAC,IAAI,EAAE,CAAC;AACnB,CAAC;AAED,SAAS,WAAW,CAAC,IAAY,EAAE,QAAgB;IACjD,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC9B,IAAI,CAAC,GAAG;QAAE,OAAO,QAAQ,CAAC;IAC1B,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;IACjC,OAAO,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;AACrD,CAAC;AAED,KAAK,UAAU,oBAAoB,CAAC,OAAe,EAAE,QAAgB;IACnE,IAAI,QAAQ,IAAI,CAAC,EAAE,CAAC;QAClB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CACxB,OAAO,CAAC,GAAG,EAAE,EACb,iBAAiB,EACjB,WAAW,EACX,GAAG,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAC1D,CAAC;IACF,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC9C,MAAM,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IAE7B,IAAI,CAAC;QACH,MAAM,aAAa,CAAC,UAAU,EAAE;YAC9B,MAAM;YACN,IAAI,EAAE,GAAG;YACT,IAAI,EAAE,MAAM,CAAC,QAAQ,CAAC;YACtB,OAAO;YACP,SAAS;SACV,CAAC,CAAC;QAEH,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,KAAK;aACnB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;aACvC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YACb,MAAM,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;YACpD,MAAM,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;YACpD,OAAO,EAAE,GAAG,EAAE,CAAC;QACjB,CAAC,CAAC,CAAC;QAEL,MAAM,MAAM,GAAoB,EAAE,CAAC;QACnC,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;YAC5C,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;YAC5C,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YACzC,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC,CAAC;YACxE,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,SAAS,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;gBAClD,GAAG,EAAE,OAAO,MAAM,GAAG;gBACrB,QAAQ,EAAE,WAAW;gBACrB,aAAa,EAAE,MAAM,CAAC,MAAM;gBAC5B,OAAO,EAAE,yBAAyB,MAAM,EAAE;aAC3C,CAAC,CAAC;QACL,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,MAAM,CAAC;QACP,6CAA6C;QAC7C,OAAO,EAAE,CAAC;IACZ,CAAC;YAAS,CAAC;QACT,MAAM,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;IACnD,CAAC;AACH,CAAC;AAED,KAAK,UAAU,kBAAkB,CAAC,MAAc,EAAE,QAAiB;IACjE,IAAI,CAAC;QACH,gDAAgD;QAChD,MAAM,QAAQ,GAAG,wDAAa,WAAW,GAAC,CAAC;QAC3C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAE9C,0CAA0C;QAC1C,yEAAyE;QACzE,MAAM,iBAAiB,GAAG,mBAAmB,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAE3D,MAAM,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,CAAC,sBAAsB,EAAE,CAAC,CAAC,CAAC,CAAC;QAC7E,MAAM,UAAU,GAAG,QAAQ;YACzB,CAAC,CAAC,MAAM,oBAAoB,CAAC,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;YACnF,CAAC,CAAC,EAAE,CAAC;QAEP,MAAM,YAAY,GAAG,UAAU,CAAC,GAAG,CACjC,KAAK,CAAC,EAAE,CAAC,SAAS,KAAK,CAAC,EAAE,WAAW,KAAK,CAAC,GAAG,IAAI,GAAG,OAAO,KAAK,CAAC,QAAQ,MAAM,KAAK,CAAC,aAAa,SAAS,CAC7G,CAAC;QACF,MAAM,qBAAqB,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC;YACnD,CAAC,CAAC,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,iBAAiB,EAAE;YACtD,CAAC,CAAC,iBAAiB,CAAC;QAEtB,OAAO;YACL,OAAO,EAAE,qBAAqB,CAAC,qBAAqB,CAAC,CAAC,OAAO;YAC7D,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,UAAU;YAClB,QAAQ,EAAE;gBACR,KAAK,EAAE,MAAM,CAAC,QAAQ;gBACtB,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;aAC3C;SACF,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,sCAAsC;QACtC,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QACtC,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACtB,OAAO;gBACL,OAAO,EAAE,qBAAqB,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO;gBAC/D,MAAM,EAAE,UAAU;gBAClB,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE,EAAE;aACb,CAAC;QACJ,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,wBAAwB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IACpG,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,mBAAmB,CAAC,IAAY;IACvC,IAAI,EAAE,GAAG,IAAI,CAAC;IAEd,gDAAgD;IAChD,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,gBAAgB,EAAE,MAAM,CAAC,CAAC;IAE1C,uCAAuC;IACvC,yEAAyE;IACzE,MAAM,KAAK,GAAG,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,cAAc,GAAa,EAAE,CAAC;IACpC,IAAI,aAAa,GAAG,IAAI,CAAC;IAEzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAE5C,kCAAkC;QAClC,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,cAAc,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACxB,aAAa,GAAG,IAAI,CAAC;YACrB,SAAS;QACX,CAAC;QAED,6CAA6C;QAC7C,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC;QACtC,MAAM,SAAS,GAAG,IAAI,KAAK,IAAI,CAAC,WAAW,EAAE,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvF,MAAM,WAAW,GAAG,kCAAkC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;QACrF,MAAM,uBAAuB,GAAG,wBAAwB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpE,MAAM,gBAAgB,GAAG,sCAAsC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE3E,IAAI,aAAa,IAAI,WAAW,EAAE,CAAC;YACjC,IAAI,gBAAgB,EAAE,CAAC;gBACrB,sBAAsB;gBACtB,cAAc,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;YACnC,CAAC;iBAAM,IAAI,uBAAuB,EAAE,CAAC;gBACnC,uCAAuC;gBACvC,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;gBACnD,cAAc,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;YACnE,CAAC;iBAAM,IAAI,SAAS,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;gBACzC,gDAAgD;gBAChD,cAAc,CAAC,IAAI,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC;YACpC,CAAC;iBAAM,IAAI,WAAW,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC;gBACtE,qDAAqD;gBACrD,cAAc,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;YACrC,CAAC;iBAAM,CAAC;gBACN,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC5B,CAAC;QACH,CAAC;aAAM,CAAC;YACN,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5B,CAAC;QAED,aAAa,GAAG,KAAK,CAAC;IACxB,CAAC;IAED,EAAE,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE/B,gCAAgC;IAChC,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAErC,OAAO,iBAAiB,CAAC,EAAE,CAAC,CAAC;AAC/B,CAAC;AAED,KAAK,UAAU,mBAAmB,CAAC,MAAc;IAC/C,IAAI,CAAC;QACH,gDAAgD;QAChD,MAAM,OAAO,GAAG,wDAAa,SAAS,GAAC,CAAC;QAExC,gFAAgF;QAChF,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,aAAa,CAAC,EAAE,MAAM,EAAE,EAAE;YACzD,QAAQ,EAAE;gBACR,uCAAuC;gBACvC,uCAAuC;gBACvC,uCAAuC;gBACvC,uCAAuC;gBACvC,uCAAuC;gBACvC,uCAAuC;gBACvC,yCAAyC;gBACzC,+CAA+C;aAChD;SACF,CAAC,CAAC;QAEH,2BAA2B;QAC3B,MAAM,QAAQ,GAAG,qBAAqB,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACzD,MAAM,SAAS,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;QAElD,iCAAiC;QACjC,MAAM,QAAQ,GAA+B;YAC3C,SAAS,EAAE,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;SACxC,CAAC;QAEF,8CAA8C;QAC9C,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QACjD,IAAI,UAAU,EAAE,CAAC;YACf,QAAQ,CAAC,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACxC,CAAC;QAED,OAAO;YACL,OAAO,EAAE,SAAS,CAAC,OAAO;YAC1B,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,SAAS,CAAC,MAAM;YACxB,QAAQ;SACT,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,oCAAoC;QACpC,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,wDAAa,SAAS,GAAC,CAAC;YACxC,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;YACxD,OAAO;gBACL,OAAO,EAAE,qBAAqB,CAAC,iBAAiB,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO;gBACvE,MAAM,EAAE,UAAU;gBAClB,MAAM,EAAE,EAAE;gBACV,QAAQ,EAAE;oBACR,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;iBAC5C;aACF,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,kCAAkC;YAClC,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YACtC,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;gBACtB,OAAO;oBACL,OAAO,EAAE,qBAAqB,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO;oBAC/D,MAAM,EAAE,UAAU;oBAClB,MAAM,EAAE,EAAE;oBACV,QAAQ,EAAE,EAAE;iBACb,CAAC;YACJ,CAAC;QACH,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,yBAAyB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IACrG,CAAC;AACH,CAAC;AAED;;;GAGG;AACI,KAAK,UAAU,gBAAgB,CAAC,SAAmB;IACxD,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,QAAQ,CAAC,CAAC;YAC7C,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,SAAS,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;QACzE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,8CAA8C;YAC9C,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBAClD,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,SAAS,iBAAiB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAClF,CAAC;YAAC,MAAM,CAAC;gBACP,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,8BAA8B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACvI,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;AACtC,CAAC;AAED;;GAEG;AACH,SAAgB,iBAAiB,CAAC,QAAgB;IAChD,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IACjD,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;AACzE,CAAC"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { Logger } from '../utils/logger';
|
|
2
|
+
import { ChunkSummary } from '../types';
|
|
3
|
+
export interface LLMConfig {
|
|
4
|
+
apiKey: string;
|
|
5
|
+
baseUrl: string;
|
|
6
|
+
model: string;
|
|
7
|
+
maxTokens?: number;
|
|
8
|
+
temperature?: number;
|
|
9
|
+
}
|
|
10
|
+
export interface LLMResponse {
|
|
11
|
+
content: string;
|
|
12
|
+
usage?: {
|
|
13
|
+
promptTokens: number;
|
|
14
|
+
completionTokens: number;
|
|
15
|
+
totalTokens: number;
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
export interface ImageUnderstandingInput {
|
|
19
|
+
id: string;
|
|
20
|
+
alt?: string;
|
|
21
|
+
mimeType?: string;
|
|
22
|
+
estimatedSize?: number;
|
|
23
|
+
dataUri: string;
|
|
24
|
+
}
|
|
25
|
+
export declare function getLLMConfig(): LLMConfig;
|
|
26
|
+
export declare function getLLMConfigForPurpose(purpose: 'scan' | 'analyze' | 'vision' | 'default'): LLMConfig;
|
|
27
|
+
export declare function validateLLMConfig(config: LLMConfig): void;
|
|
28
|
+
export declare function callLLM(prompt: string, config: LLMConfig, logger?: Logger): Promise<LLMResponse>;
|
|
29
|
+
export declare function describeEmbeddedImages(images: ImageUnderstandingInput[], config: LLMConfig, logger?: Logger): Promise<Record<string, string>>;
|
|
30
|
+
export declare function analyzeChunkWithLLM(chunkContent: string, chunkId: number, focus: string, config: LLMConfig, logger?: Logger): Promise<ChunkSummary>;
|
|
31
|
+
export interface DocumentSection {
|
|
32
|
+
id: string;
|
|
33
|
+
title: string;
|
|
34
|
+
level: number;
|
|
35
|
+
startLine: number;
|
|
36
|
+
endLine: number;
|
|
37
|
+
content: string;
|
|
38
|
+
suggestedGroup?: string;
|
|
39
|
+
}
|
|
40
|
+
export interface DocumentStructure {
|
|
41
|
+
title: string;
|
|
42
|
+
sections: DocumentSection[];
|
|
43
|
+
suggestedGroups: {
|
|
44
|
+
name: string;
|
|
45
|
+
sections: string[];
|
|
46
|
+
reason: string;
|
|
47
|
+
}[];
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Use LLM to analyze document structure and suggest optimal chunking strategy
|
|
51
|
+
*/
|
|
52
|
+
export declare function analyzeDocumentStructure(content: string, config: LLMConfig, logger?: Logger): Promise<DocumentStructure>;
|
|
53
|
+
/**
|
|
54
|
+
* Split content based on LLM-analyzed structure
|
|
55
|
+
*/
|
|
56
|
+
export declare function splitByLLMStructure(content: string, structure: DocumentStructure, maxChunkSize: number): Array<{
|
|
57
|
+
title: string;
|
|
58
|
+
content: string;
|
|
59
|
+
sections: string[];
|
|
60
|
+
}>;
|
|
61
|
+
//# sourceMappingURL=llm.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../src/services/llm.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AACzC,OAAO,EAAE,YAAY,EAAiC,MAAM,UAAU,CAAC;AAEvE,MAAM,WAAW,SAAS;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE;QACN,YAAY,EAAE,MAAM,CAAC;QACrB,gBAAgB,EAAE,MAAM,CAAC;QACzB,WAAW,EAAE,MAAM,CAAC;KACrB,CAAC;CACH;AAED,MAAM,WAAW,uBAAuB;IACtC,EAAE,EAAE,MAAM,CAAC;IACX,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,wBAAgB,YAAY,IAAI,SAAS,CAQxC;AAED,wBAAgB,sBAAsB,CAAC,OAAO,EAAE,MAAM,GAAG,SAAS,GAAG,QAAQ,GAAG,SAAS,GAAG,SAAS,CAapG;AAED,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI,CAMzD;AAoDD,wBAAsB,OAAO,CAC3B,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,SAAS,EACjB,MAAM,CAAC,EAAE,MAAM,GACd,OAAO,CAAC,WAAW,CAAC,CAkFtB;AA8ID,wBAAsB,sBAAsB,CAC1C,MAAM,EAAE,uBAAuB,EAAE,EACjC,MAAM,EAAE,SAAS,EACjB,MAAM,CAAC,EAAE,MAAM,GACd,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CA8DjC;AAkFD,wBAAsB,mBAAmB,CACvC,YAAY,EAAE,MAAM,EACpB,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,SAAS,EACjB,MAAM,CAAC,EAAE,MAAM,GACd,OAAO,CAAC,YAAY,CAAC,CAyEvB;AAMD,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,eAAe,EAAE,CAAC;IAC5B,eAAe,EAAE;QACf,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,EAAE,MAAM,EAAE,CAAC;QACnB,MAAM,EAAE,MAAM,CAAC;KAChB,EAAE,CAAC;CACL;AA+FD;;GAEG;AACH,wBAAsB,wBAAwB,CAC5C,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,SAAS,EACjB,MAAM,CAAC,EAAE,MAAM,GACd,OAAO,CAAC,iBAAiB,CAAC,CAiG5B;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CACjC,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,iBAAiB,EAC5B,YAAY,EAAE,MAAM,GACnB,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;CAAE,CAAC,CA0D/D"}
|