oricore 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -1
- package/README.zh-CN.md +13 -1
- package/package.json +1 -1
- package/src/core/config.ts +55 -0
- package/src/core/configValidation.ts +293 -0
- package/src/core/history.ts +103 -4
- package/src/session/session.ts +50 -26
- package/src/tools/tools/read.ts +68 -5
- package/src/types/pdf-parse.d.ts +22 -0
- package/src/utils/fileLock.ts +191 -0
- package/src/utils/pdf-parser.ts +186 -0
package/README.md
CHANGED
|
@@ -51,6 +51,16 @@ pnpm add oricore
|
|
|
51
51
|
bun add oricore
|
|
52
52
|
```
|
|
53
53
|
|
|
54
|
+
### Optional Dependencies
|
|
55
|
+
|
|
56
|
+
OriCore has support for additional features through optional dependencies:
|
|
57
|
+
|
|
58
|
+
**PDF Support**
|
|
59
|
+
```bash
|
|
60
|
+
npm install pdf-parse
|
|
61
|
+
```
|
|
62
|
+
The `read` tool can parse PDF files when `pdf-parse` is installed. Without it, PDF reading will be disabled.
|
|
63
|
+
|
|
54
64
|
## Quick Start
|
|
55
65
|
|
|
56
66
|
```typescript
|
|
@@ -113,7 +123,7 @@ OriCore includes a comprehensive set of tools:
|
|
|
113
123
|
|
|
114
124
|
| Tool | Description |
|
|
115
125
|
|------|-------------|
|
|
116
|
-
| `read` | Read file contents |
|
|
126
|
+
| `read` | Read file contents (supports text, images, and PDF*) |
|
|
117
127
|
| `write` | Write new files |
|
|
118
128
|
| `edit` | Edit existing files with search/replace |
|
|
119
129
|
| `glob` | Find files by pattern |
|
|
@@ -124,6 +134,8 @@ OriCore includes a comprehensive set of tools:
|
|
|
124
134
|
| `task` | Spawn specialized agents |
|
|
125
135
|
| `todo` | Track task progress |
|
|
126
136
|
|
|
137
|
+
*PDF support requires the optional `pdf-parse` package (see below)
|
|
138
|
+
|
|
127
139
|
## Configuration
|
|
128
140
|
|
|
129
141
|
### Full Configuration Example
|
package/README.zh-CN.md
CHANGED
|
@@ -51,6 +51,16 @@ pnpm add oricore
|
|
|
51
51
|
bun add oricore
|
|
52
52
|
```
|
|
53
53
|
|
|
54
|
+
### 可选依赖
|
|
55
|
+
|
|
56
|
+
OriCore 支持通过可选依赖来扩展功能:
|
|
57
|
+
|
|
58
|
+
**PDF 支持**
|
|
59
|
+
```bash
|
|
60
|
+
npm install pdf-parse
|
|
61
|
+
```
|
|
62
|
+
安装 `pdf-parse` 后,`read` 工具可以解析 PDF 文件。未安装时,PDF 读取功能将被禁用。
|
|
63
|
+
|
|
54
64
|
## 快速开始
|
|
55
65
|
|
|
56
66
|
```typescript
|
|
@@ -113,7 +123,7 @@ OriCore 包含一套完整的工具:
|
|
|
113
123
|
|
|
114
124
|
| 工具 | 描述 |
|
|
115
125
|
|------|------|
|
|
116
|
-
| `read` |
|
|
126
|
+
| `read` | 读取文件内容(支持文本、图片和 PDF*) |
|
|
117
127
|
| `write` | 写入新文件 |
|
|
118
128
|
| `edit` | 编辑现有文件(搜索/替换) |
|
|
119
129
|
| `glob` | 按模式查找文件 |
|
|
@@ -124,6 +134,8 @@ OriCore 包含一套完整的工具:
|
|
|
124
134
|
| `task` | 启动专用 Agent |
|
|
125
135
|
| `todo` | 跟踪任务进度 |
|
|
126
136
|
|
|
137
|
+
*PDF 支持需要安装可选的 `pdf-parse` 包(见上文)
|
|
138
|
+
|
|
127
139
|
## 配置
|
|
128
140
|
|
|
129
141
|
### 完整配置示例
|
package/package.json
CHANGED
package/src/core/config.ts
CHANGED
|
@@ -3,6 +3,12 @@ import fs from 'fs';
|
|
|
3
3
|
import { homedir } from 'os';
|
|
4
4
|
import path from 'pathe';
|
|
5
5
|
import type { Provider } from '../core/model';
|
|
6
|
+
import {
|
|
7
|
+
assertValidConfig,
|
|
8
|
+
formatValidationErrors,
|
|
9
|
+
type ValidationResult,
|
|
10
|
+
validateConfig,
|
|
11
|
+
} from './configValidation';
|
|
6
12
|
|
|
7
13
|
export type McpStdioServerConfig = {
|
|
8
14
|
type: 'stdio';
|
|
@@ -167,6 +173,7 @@ export class ConfigManager {
|
|
|
167
173
|
argvConfig: Partial<Config>;
|
|
168
174
|
globalConfigPath: string;
|
|
169
175
|
projectConfigPath: string;
|
|
176
|
+
private validationEnabled: boolean = false;
|
|
170
177
|
|
|
171
178
|
constructor(cwd: string, productName: string, argvConfig: Partial<Config>) {
|
|
172
179
|
const lowerProductName = productName.toLowerCase();
|
|
@@ -203,9 +210,57 @@ export class ConfigManager {
|
|
|
203
210
|
config.planModel = config.planModel || config.model;
|
|
204
211
|
config.smallModel = config.smallModel || config.model;
|
|
205
212
|
config.visionModel = config.visionModel || config.model;
|
|
213
|
+
|
|
214
|
+
// Validate config if enabled
|
|
215
|
+
if (this.validationEnabled) {
|
|
216
|
+
assertValidConfig(config);
|
|
217
|
+
}
|
|
218
|
+
|
|
206
219
|
return config;
|
|
207
220
|
}
|
|
208
221
|
|
|
222
|
+
/**
|
|
223
|
+
* Enable configuration validation
|
|
224
|
+
* When enabled, invalid configurations will throw errors
|
|
225
|
+
*/
|
|
226
|
+
enableValidation(): void {
|
|
227
|
+
this.validationEnabled = true;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Disable configuration validation
|
|
232
|
+
*/
|
|
233
|
+
disableValidation(): void {
|
|
234
|
+
this.validationEnabled = false;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Validate the current configuration
|
|
239
|
+
* Returns validation result without throwing
|
|
240
|
+
*/
|
|
241
|
+
validate(): ValidationResult {
|
|
242
|
+
const config = defu(
|
|
243
|
+
this.argvConfig,
|
|
244
|
+
defu(this.projectConfig, defu(this.globalConfig, DEFAULT_CONFIG)),
|
|
245
|
+
) as Config;
|
|
246
|
+
config.planModel = config.planModel || config.model;
|
|
247
|
+
config.smallModel = config.smallModel || config.model;
|
|
248
|
+
config.visionModel = config.visionModel || config.model;
|
|
249
|
+
|
|
250
|
+
return validateConfig(config);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* Get validation errors as a formatted string
|
|
255
|
+
*/
|
|
256
|
+
getValidationErrors(): string {
|
|
257
|
+
const result = this.validate();
|
|
258
|
+
if (result.valid) {
|
|
259
|
+
return 'Configuration is valid';
|
|
260
|
+
}
|
|
261
|
+
return formatValidationErrors(result);
|
|
262
|
+
}
|
|
263
|
+
|
|
209
264
|
removeConfig(global: boolean, key: string, values?: string[]) {
|
|
210
265
|
assertGlobalAllowed(global, key);
|
|
211
266
|
const config = global ? this.globalConfig : this.projectConfig;
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Configuration Validation
|
|
3
|
+
* Runtime validation for engine configuration
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type { Config, McpServerConfig } from './config';
|
|
7
|
+
import { z } from 'zod';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Validation result
|
|
11
|
+
*/
|
|
12
|
+
export interface ValidationResult {
|
|
13
|
+
valid: boolean;
|
|
14
|
+
errors: ValidationError[];
|
|
15
|
+
warnings: ValidationWarning[];
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface ValidationError {
|
|
19
|
+
path: string;
|
|
20
|
+
message: string;
|
|
21
|
+
value?: any;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface ValidationWarning {
|
|
25
|
+
path: string;
|
|
26
|
+
message: string;
|
|
27
|
+
value?: any;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* MCP server configuration schema
|
|
32
|
+
*/
|
|
33
|
+
const McpServerConfigSchema = z.object({
|
|
34
|
+
type: z.enum(['stdio', 'sse', 'http']).optional(),
|
|
35
|
+
command: z.string().optional(),
|
|
36
|
+
args: z.array(z.string()).optional(),
|
|
37
|
+
env: z.record(z.string(), z.string()).optional(),
|
|
38
|
+
url: z.string().optional(),
|
|
39
|
+
disable: z.boolean().optional(),
|
|
40
|
+
timeout: z.number().positive().optional(),
|
|
41
|
+
headers: z.record(z.string(), z.string()).optional(),
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Provider configuration schema
|
|
46
|
+
*/
|
|
47
|
+
const ProviderConfigSchema = z.object({
|
|
48
|
+
apiKey: z.string().optional(),
|
|
49
|
+
apiKeys: z.array(z.string()).optional(),
|
|
50
|
+
baseURL: z.string().optional(),
|
|
51
|
+
proxy: z.string().optional(),
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Agent configuration schema
|
|
56
|
+
*/
|
|
57
|
+
const AgentConfigSchema = z.object({
|
|
58
|
+
model: z.string().optional(),
|
|
59
|
+
tools: z.array(z.string()).optional(),
|
|
60
|
+
disallowedTools: z.array(z.string()).optional(),
|
|
61
|
+
forkContext: z.boolean().optional(),
|
|
62
|
+
color: z.string().optional(),
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Main configuration schema
|
|
67
|
+
*/
|
|
68
|
+
const ConfigSchema = z.object({
|
|
69
|
+
// Model configuration
|
|
70
|
+
model: z.string().min(1, 'Model cannot be empty'),
|
|
71
|
+
planModel: z.string().optional(),
|
|
72
|
+
smallModel: z.string().optional(),
|
|
73
|
+
visionModel: z.string().optional(),
|
|
74
|
+
|
|
75
|
+
// Behavior configuration
|
|
76
|
+
language: z.string().optional(),
|
|
77
|
+
quiet: z.boolean().optional(),
|
|
78
|
+
approvalMode: z.enum(['default', 'autoEdit', 'yolo']).optional(),
|
|
79
|
+
autoCompact: z.boolean().optional(),
|
|
80
|
+
temperature: z
|
|
81
|
+
.number()
|
|
82
|
+
.min(0, 'Temperature must be at least 0')
|
|
83
|
+
.max(2, 'Temperature must be at most 2')
|
|
84
|
+
.optional(),
|
|
85
|
+
|
|
86
|
+
// Feature flags
|
|
87
|
+
plugins: z.array(z.string()).optional(),
|
|
88
|
+
tools: z.record(z.string(), z.boolean()).optional(),
|
|
89
|
+
todo: z.boolean().optional(),
|
|
90
|
+
|
|
91
|
+
// MCP servers
|
|
92
|
+
mcpServers: z.record(z.string(), z.any()).optional(), // Validated separately
|
|
93
|
+
|
|
94
|
+
// Provider extensions
|
|
95
|
+
provider: z.record(z.string(), z.any()).optional(), // Validated separately
|
|
96
|
+
|
|
97
|
+
// Extensions
|
|
98
|
+
extensions: z.record(z.string(), z.any()).optional(),
|
|
99
|
+
|
|
100
|
+
// Agents
|
|
101
|
+
agent: z.record(z.string(), z.any()).optional(), // Validated separately
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Validate a configuration object
|
|
106
|
+
*/
|
|
107
|
+
export function validateConfig(config: Partial<Config>): ValidationResult {
|
|
108
|
+
const errors: ValidationError[] = [];
|
|
109
|
+
const warnings: ValidationWarning[] = [];
|
|
110
|
+
|
|
111
|
+
// Validate main config structure
|
|
112
|
+
const mainResult = ConfigSchema.safeParse(config);
|
|
113
|
+
if (!mainResult.success) {
|
|
114
|
+
for (const issue of mainResult.error.issues) {
|
|
115
|
+
errors.push({
|
|
116
|
+
path: issue.path.join('.'),
|
|
117
|
+
message: issue.message,
|
|
118
|
+
value: getNestedValue(config, issue.path as (string | number)[]),
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Validate MCP servers if present
|
|
124
|
+
if (config.mcpServers) {
|
|
125
|
+
for (const [name, mcpConfig] of Object.entries(config.mcpServers)) {
|
|
126
|
+
const mcpResult = McpServerConfigSchema.safeParse(mcpConfig);
|
|
127
|
+
if (!mcpResult.success) {
|
|
128
|
+
for (const issue of mcpResult.error.issues) {
|
|
129
|
+
errors.push({
|
|
130
|
+
path: `mcpServers.${name}.${issue.path.join('.')}`,
|
|
131
|
+
message: issue.message,
|
|
132
|
+
value: getNestedValue(mcpConfig, issue.path as (string | number)[]),
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Additional validation: MCP server must have either command or url
|
|
138
|
+
const mcp = mcpConfig as any;
|
|
139
|
+
if (!mcp.disable && !mcp.command && !mcp.url) {
|
|
140
|
+
errors.push({
|
|
141
|
+
path: `mcpServers.${name}`,
|
|
142
|
+
message: 'MCP server must have either "command" or "url" configured',
|
|
143
|
+
value: mcp,
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Warning: stdio type requires command
|
|
148
|
+
if (mcp.type === 'stdio' && !mcp.command) {
|
|
149
|
+
warnings.push({
|
|
150
|
+
path: `mcpServers.${name}`,
|
|
151
|
+
message: 'stdio MCP server type requires "command" to be set',
|
|
152
|
+
value: mcp,
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Warning: http/sse type requires url
|
|
157
|
+
if ((mcp.type === 'http' || mcp.type === 'sse') && !mcp.url) {
|
|
158
|
+
warnings.push({
|
|
159
|
+
path: `mcpServers.${name}`,
|
|
160
|
+
message: `${mcp.type} MCP server type requires "url" to be set`,
|
|
161
|
+
value: mcp,
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Validate provider configs if present
|
|
168
|
+
if (config.provider) {
|
|
169
|
+
for (const [name, providerConfig] of Object.entries(config.provider)) {
|
|
170
|
+
const providerResult = ProviderConfigSchema.safeParse(providerConfig);
|
|
171
|
+
if (!providerResult.success) {
|
|
172
|
+
for (const issue of providerResult.error.issues) {
|
|
173
|
+
errors.push({
|
|
174
|
+
path: `provider.${name}.${issue.path.join('.')}`,
|
|
175
|
+
message: issue.message,
|
|
176
|
+
value: getNestedValue(providerConfig, issue.path as (string | number)[]),
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Warning: Multiple API keys detected but no apiKey (single)
|
|
182
|
+
const provider = providerConfig as any;
|
|
183
|
+
if (provider.apiKeys && provider.apiKeys.length > 1 && !provider.apiKey) {
|
|
184
|
+
warnings.push({
|
|
185
|
+
path: `provider.${name}`,
|
|
186
|
+
message: 'Multiple API keys detected - rotation will be used',
|
|
187
|
+
value: { apiKeyCount: provider.apiKeys.length },
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Validate agent configs if present
|
|
194
|
+
if (config.agent) {
|
|
195
|
+
for (const [name, agentConfig] of Object.entries(config.agent)) {
|
|
196
|
+
const agentResult = AgentConfigSchema.safeParse(agentConfig);
|
|
197
|
+
if (!agentResult.success) {
|
|
198
|
+
for (const issue of agentResult.error.issues) {
|
|
199
|
+
errors.push({
|
|
200
|
+
path: `agent.${name}.${issue.path.join('.')}`,
|
|
201
|
+
message: issue.message,
|
|
202
|
+
value: getNestedValue(agentConfig, issue.path as (string | number)[]),
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Validate temperature range
|
|
210
|
+
if (config.temperature !== undefined) {
|
|
211
|
+
if (config.temperature < 0 || config.temperature > 2) {
|
|
212
|
+
errors.push({
|
|
213
|
+
path: 'temperature',
|
|
214
|
+
message: 'Temperature must be between 0 and 2',
|
|
215
|
+
value: config.temperature,
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Validate language code format
|
|
221
|
+
if (config.language) {
|
|
222
|
+
const languageCodeRegex = /^[a-z]{2}(-[A-Z]{2})?$/;
|
|
223
|
+
if (!languageCodeRegex.test(config.language)) {
|
|
224
|
+
warnings.push({
|
|
225
|
+
path: 'language',
|
|
226
|
+
message: 'Language code should be in ISO 639-1 format (e.g., "en", "zh-CN")',
|
|
227
|
+
value: config.language,
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return {
|
|
233
|
+
valid: errors.length === 0,
|
|
234
|
+
errors,
|
|
235
|
+
warnings,
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Get nested value from object using path array
|
|
241
|
+
*/
|
|
242
|
+
function getNestedValue(obj: any, path: (string | number)[]): any {
|
|
243
|
+
let current = obj;
|
|
244
|
+
for (const key of path) {
|
|
245
|
+
if (current == null) {
|
|
246
|
+
return undefined;
|
|
247
|
+
}
|
|
248
|
+
current = current[key];
|
|
249
|
+
}
|
|
250
|
+
return current;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* Format validation errors for display
|
|
255
|
+
*/
|
|
256
|
+
export function formatValidationErrors(result: ValidationResult): string {
|
|
257
|
+
const lines: string[] = [];
|
|
258
|
+
|
|
259
|
+
if (result.errors.length > 0) {
|
|
260
|
+
lines.push('Configuration errors:');
|
|
261
|
+
for (const error of result.errors) {
|
|
262
|
+
const valueStr =
|
|
263
|
+
error.value !== undefined ? ` (got: ${JSON.stringify(error.value)})` : '';
|
|
264
|
+
lines.push(` - ${error.path}: ${error.message}${valueStr}`);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
if (result.warnings.length > 0) {
|
|
269
|
+
if (lines.length > 0) lines.push('');
|
|
270
|
+
lines.push('Configuration warnings:');
|
|
271
|
+
for (const warning of result.warnings) {
|
|
272
|
+
const valueStr =
|
|
273
|
+
warning.value !== undefined
|
|
274
|
+
? ` (value: ${JSON.stringify(warning.value)})`
|
|
275
|
+
: '';
|
|
276
|
+
lines.push(` - ${warning.path}: ${warning.message}${valueStr}`);
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
return lines.join('\n');
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Assert that configuration is valid, throws if not
|
|
285
|
+
*/
|
|
286
|
+
export function assertValidConfig(config: Partial<Config>): void {
|
|
287
|
+
const result = validateConfig(config);
|
|
288
|
+
if (!result.valid) {
|
|
289
|
+
throw new Error(
|
|
290
|
+
`Invalid configuration:\n${formatValidationErrors(result)}`,
|
|
291
|
+
);
|
|
292
|
+
}
|
|
293
|
+
}
|
package/src/core/history.ts
CHANGED
|
@@ -295,12 +295,52 @@ export class History {
|
|
|
295
295
|
});
|
|
296
296
|
} catch (error) {
|
|
297
297
|
debug('Compact failed:', error);
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
);
|
|
298
|
+
|
|
299
|
+
// Graceful degradation: instead of throwing, use a simple summary
|
|
300
|
+
const fallbackSummary = this.#generateFallbackSummary();
|
|
301
|
+
debug('Using fallback summary due to compaction failure');
|
|
302
|
+
|
|
303
|
+
const summaryMessage: NormalizedMessage = {
|
|
304
|
+
parentUuid: null,
|
|
305
|
+
uuid: randomUUID(),
|
|
306
|
+
role: 'user',
|
|
307
|
+
content: [{ type: 'text', text: fallbackSummary }],
|
|
308
|
+
uiContent: COMPACT_MESSAGE,
|
|
309
|
+
type: 'message',
|
|
310
|
+
timestamp: new Date().toISOString(),
|
|
311
|
+
};
|
|
312
|
+
this.messages = [summaryMessage];
|
|
313
|
+
await this.onMessage?.(summaryMessage);
|
|
314
|
+
|
|
315
|
+
return {
|
|
316
|
+
compressed: true,
|
|
317
|
+
summary: fallbackSummary,
|
|
318
|
+
fallback: true,
|
|
319
|
+
error: error instanceof Error ? error.message : String(error),
|
|
320
|
+
};
|
|
301
321
|
}
|
|
302
322
|
if (!summary || summary.trim().length === 0) {
|
|
303
|
-
|
|
323
|
+
// If summary is empty, use fallback
|
|
324
|
+
const fallbackSummary = this.#generateFallbackSummary();
|
|
325
|
+
debug('Generated summary is empty, using fallback');
|
|
326
|
+
|
|
327
|
+
const summaryMessage: NormalizedMessage = {
|
|
328
|
+
parentUuid: null,
|
|
329
|
+
uuid: randomUUID(),
|
|
330
|
+
role: 'user',
|
|
331
|
+
content: [{ type: 'text', text: fallbackSummary }],
|
|
332
|
+
uiContent: COMPACT_MESSAGE,
|
|
333
|
+
type: 'message',
|
|
334
|
+
timestamp: new Date().toISOString(),
|
|
335
|
+
};
|
|
336
|
+
this.messages = [summaryMessage];
|
|
337
|
+
await this.onMessage?.(summaryMessage);
|
|
338
|
+
|
|
339
|
+
return {
|
|
340
|
+
compressed: true,
|
|
341
|
+
summary: fallbackSummary,
|
|
342
|
+
fallback: true,
|
|
343
|
+
};
|
|
304
344
|
}
|
|
305
345
|
|
|
306
346
|
const summaryMessage: NormalizedMessage = {
|
|
@@ -318,6 +358,65 @@ export class History {
|
|
|
318
358
|
return {
|
|
319
359
|
compressed: true,
|
|
320
360
|
summary,
|
|
361
|
+
fallback: false,
|
|
321
362
|
};
|
|
322
363
|
}
|
|
364
|
+
|
|
365
|
+
/**
|
|
366
|
+
* Generate a simple fallback summary when AI compression fails
|
|
367
|
+
* This ensures the conversation can continue even if compression fails
|
|
368
|
+
*/
|
|
369
|
+
#generateFallbackSummary(): string {
|
|
370
|
+
const messageCount = this.messages.length;
|
|
371
|
+
const lastUserMessage = [...this.messages]
|
|
372
|
+
.reverse()
|
|
373
|
+
.find((m) => m.role === 'user');
|
|
374
|
+
|
|
375
|
+
const recentTools = new Set<string>();
|
|
376
|
+
for (let i = this.messages.length - 1; i >= 0 && recentTools.size < 5; i--) {
|
|
377
|
+
const msg = this.messages[i];
|
|
378
|
+
if (msg.role === 'tool') {
|
|
379
|
+
for (const part of msg.content as any[]) {
|
|
380
|
+
if (part.type === 'tool-result') {
|
|
381
|
+
recentTools.add(part.toolName);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
const sections: string[] = [];
|
|
388
|
+
|
|
389
|
+
sections.push(`<conversation_overview>`);
|
|
390
|
+
sections.push(
|
|
391
|
+
`Previous conversation contained ${messageCount} messages. `,
|
|
392
|
+
);
|
|
393
|
+
if (lastUserMessage) {
|
|
394
|
+
const content =
|
|
395
|
+
typeof lastUserMessage.content === 'string'
|
|
396
|
+
? lastUserMessage.content
|
|
397
|
+
: JSON.stringify(lastUserMessage.content);
|
|
398
|
+
const preview =
|
|
399
|
+
content.length > 100
|
|
400
|
+
? content.substring(0, 100) + '...'
|
|
401
|
+
: content;
|
|
402
|
+
sections.push(`Last user message was about: "${preview}"`);
|
|
403
|
+
}
|
|
404
|
+
sections.push(`</conversation_overview>`);
|
|
405
|
+
|
|
406
|
+
if (recentTools.size > 0) {
|
|
407
|
+
sections.push(`\n<recent_actions>`);
|
|
408
|
+
sections.push(`Recent tools used: ${Array.from(recentTools).join(', ')}`);
|
|
409
|
+
sections.push(`</recent_actions>`);
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
sections.push(`\n<note>`);
|
|
413
|
+
sections.push(
|
|
414
|
+
`This is an auto-generated summary due to compression unavailability. `,
|
|
415
|
+
);
|
|
416
|
+
sections.push(`The conversation history has been compressed to continue the session. `,
|
|
417
|
+
);
|
|
418
|
+
sections.push(`</note>`);
|
|
419
|
+
|
|
420
|
+
return sections.join('\n');
|
|
421
|
+
}
|
|
323
422
|
}
|
package/src/session/session.ts
CHANGED
|
@@ -5,6 +5,8 @@ import { History } from '../core/history';
|
|
|
5
5
|
import type { NormalizedMessage } from '../core/message';
|
|
6
6
|
import { Usage } from '../core/usage';
|
|
7
7
|
import { randomUUID } from '../utils/randomUUID';
|
|
8
|
+
import { lockRegistry } from '../utils/fileLock';
|
|
9
|
+
import type { ModeType } from '../modes/types';
|
|
8
10
|
|
|
9
11
|
export type SessionId = string;
|
|
10
12
|
|
|
@@ -65,8 +67,10 @@ const DEFAULT_SESSION_CONFIG: SessionConfig = {
|
|
|
65
67
|
export class SessionConfigManager {
|
|
66
68
|
logPath: string;
|
|
67
69
|
config: SessionConfig;
|
|
70
|
+
mode: ModeType | null;
|
|
68
71
|
constructor(opts: { logPath: string }) {
|
|
69
72
|
this.logPath = opts.logPath;
|
|
73
|
+
this.mode = null;
|
|
70
74
|
this.config = this.load(opts.logPath);
|
|
71
75
|
}
|
|
72
76
|
|
|
@@ -81,6 +85,10 @@ export class SessionConfigManager {
|
|
|
81
85
|
try {
|
|
82
86
|
const parsed = JSON.parse(line);
|
|
83
87
|
if (parsed.type === 'config') {
|
|
88
|
+
// Load mode if present
|
|
89
|
+
if (parsed.mode) {
|
|
90
|
+
this.mode = parsed.mode;
|
|
91
|
+
}
|
|
84
92
|
return parsed.config;
|
|
85
93
|
}
|
|
86
94
|
} catch {}
|
|
@@ -90,33 +98,49 @@ export class SessionConfigManager {
|
|
|
90
98
|
return DEFAULT_SESSION_CONFIG;
|
|
91
99
|
}
|
|
92
100
|
}
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
return true;
|
|
111
|
-
}
|
|
101
|
+
|
|
102
|
+
setMode(mode: ModeType): void {
|
|
103
|
+
this.mode = mode;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
getMode(): ModeType | null {
|
|
107
|
+
return this.mode;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
async write() {
|
|
111
|
+
const lock = lockRegistry.getLock(this.logPath);
|
|
112
|
+
|
|
113
|
+
await lock.withLock(async () => {
|
|
114
|
+
const configLine = JSON.stringify({
|
|
115
|
+
type: 'config',
|
|
116
|
+
config: this.config,
|
|
117
|
+
mode: this.mode,
|
|
112
118
|
});
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
119
|
+
if (!fs.existsSync(this.logPath)) {
|
|
120
|
+
fs.mkdirSync(path.dirname(this.logPath), { recursive: true });
|
|
121
|
+
fs.writeFileSync(this.logPath, configLine + '\n', 'utf-8');
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
try {
|
|
125
|
+
const content = fs.readFileSync(this.logPath, 'utf-8');
|
|
126
|
+
const lines = content.split('\n');
|
|
127
|
+
const filteredLines = lines.filter((line) => {
|
|
128
|
+
if (!line) return false;
|
|
129
|
+
try {
|
|
130
|
+
const parsed = JSON.parse(line);
|
|
131
|
+
return parsed.type !== 'config';
|
|
132
|
+
} catch {
|
|
133
|
+
return true;
|
|
134
|
+
}
|
|
135
|
+
});
|
|
136
|
+
const newContent = [configLine, ...filteredLines].join('\n');
|
|
137
|
+
fs.writeFileSync(this.logPath, newContent + '\n', 'utf-8');
|
|
138
|
+
} catch (e: any) {
|
|
139
|
+
throw new Error(
|
|
140
|
+
`Failed to write config to log file: ${this.logPath}: ${e.message}`,
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
});
|
|
120
144
|
}
|
|
121
145
|
}
|
|
122
146
|
|
package/src/tools/tools/read.ts
CHANGED
|
@@ -9,6 +9,11 @@ import {
|
|
|
9
9
|
MaxFileReadTokenExceededError,
|
|
10
10
|
} from '../../utils/error';
|
|
11
11
|
import { safeStringify } from '../../utils/safeStringify';
|
|
12
|
+
import {
|
|
13
|
+
formatPDFResult,
|
|
14
|
+
isPDFParsingAvailable,
|
|
15
|
+
parsePDF,
|
|
16
|
+
} from '../../utils/pdf-parser';
|
|
12
17
|
|
|
13
18
|
type ImageMediaType =
|
|
14
19
|
| 'image/jpeg'
|
|
@@ -136,11 +141,6 @@ Usage:
|
|
|
136
141
|
|
|
137
142
|
const ext = path.extname(file_path).toLowerCase();
|
|
138
143
|
|
|
139
|
-
// Handle PDF files
|
|
140
|
-
if ('.pdf' === ext) {
|
|
141
|
-
throw new Error('PDF files are not supported yet');
|
|
142
|
-
}
|
|
143
|
-
|
|
144
144
|
const fullFilePath = (() => {
|
|
145
145
|
if (path.isAbsolute(file_path)) {
|
|
146
146
|
return file_path;
|
|
@@ -158,6 +158,69 @@ Usage:
|
|
|
158
158
|
throw new Error(`File ${file_path} does not exist.`);
|
|
159
159
|
})();
|
|
160
160
|
|
|
161
|
+
// Handle PDF files
|
|
162
|
+
if ('.pdf' === ext) {
|
|
163
|
+
const pdfAvailable = await isPDFParsingAvailable();
|
|
164
|
+
if (!pdfAvailable) {
|
|
165
|
+
throw new Error(
|
|
166
|
+
'PDF parsing requires the "pdf-parse" package. Install it with: npm install pdf-parse',
|
|
167
|
+
);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const stats = fs.statSync(fullFilePath);
|
|
171
|
+
|
|
172
|
+
// Security: Validate file path to prevent traversal attacks
|
|
173
|
+
const resolvedPath = path.resolve(fullFilePath);
|
|
174
|
+
const normalizedCwd = path.resolve(opts.cwd);
|
|
175
|
+
if (!resolvedPath.startsWith(normalizedCwd)) {
|
|
176
|
+
throw new Error('Invalid file path: path traversal detected');
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Check file size (warn if > 5MB)
|
|
180
|
+
const fileSizeMB = stats.size / (1024 * 1024);
|
|
181
|
+
if (fileSizeMB > 5) {
|
|
182
|
+
return {
|
|
183
|
+
isError: true,
|
|
184
|
+
llmContent: `PDF file is too large (${Math.round(fileSizeMB * 100) / 100}MB). Maximum supported size is 5MB for PDF files.`,
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
try {
|
|
189
|
+
const result = await parsePDF(fullFilePath, {
|
|
190
|
+
maxPages: 100, // Limit to 100 pages
|
|
191
|
+
maxCharsPerPage: 5000, // Limit per page
|
|
192
|
+
includeMetadata: true,
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
// Check token count
|
|
196
|
+
const tokenCount = countTokens(result.text);
|
|
197
|
+
const MAX_PDF_TOKENS = 15000;
|
|
198
|
+
if (tokenCount > MAX_PDF_TOKENS) {
|
|
199
|
+
return {
|
|
200
|
+
isError: true,
|
|
201
|
+
llmContent: `PDF content is too large (${tokenCount} tokens). Maximum supported is ${MAX_PDF_TOKENS} tokens. Try extracting specific pages.`,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return {
|
|
206
|
+
returnDisplay: `Read PDF file (${result.pageCount} pages)`,
|
|
207
|
+
llmContent: safeStringify({
|
|
208
|
+
type: 'pdf',
|
|
209
|
+
filePath: file_path,
|
|
210
|
+
pageCount: result.pageCount,
|
|
211
|
+
metadata: result.metadata,
|
|
212
|
+
content: result.text,
|
|
213
|
+
tokenCount,
|
|
214
|
+
}),
|
|
215
|
+
};
|
|
216
|
+
} catch (error) {
|
|
217
|
+
return {
|
|
218
|
+
isError: true,
|
|
219
|
+
llmContent: `Failed to parse PDF file: ${error instanceof Error ? error.message : String(error)}`,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
161
224
|
// Handle image files
|
|
162
225
|
if (IMAGE_EXTENSIONS.has(ext)) {
|
|
163
226
|
const result = await processImage(fullFilePath, opts.cwd);
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
declare module 'pdf-parse' {
|
|
2
|
+
interface PDFParseOptions {
|
|
3
|
+
max?: number;
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
interface PDFParseData {
|
|
7
|
+
numpages: number;
|
|
8
|
+
numrender: number;
|
|
9
|
+
info: any;
|
|
10
|
+
metadata: any;
|
|
11
|
+
text: string;
|
|
12
|
+
version: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
interface PDFStatic {
|
|
16
|
+
(buffer: Buffer, options?: PDFParseOptions): Promise<PDFParseData>;
|
|
17
|
+
default: (buffer: Buffer, options?: PDFParseOptions) => Promise<PDFParseData>;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const pdfParse: PDFStatic;
|
|
21
|
+
export = pdfParse;
|
|
22
|
+
}
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'pathe';
|
|
3
|
+
import { randomUUID } from './randomUUID';
|
|
4
|
+
|
|
5
|
+
const LOCK_TIMEOUT = 30_000; // 30 seconds
|
|
6
|
+
const LOCK_POLL_INTERVAL = 100; // 100ms
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* File-based lock for concurrent write safety
|
|
10
|
+
* Uses lock files with unique identifiers to prevent race conditions
|
|
11
|
+
*/
|
|
12
|
+
export class FileLock {
|
|
13
|
+
private lockFilePath: string;
|
|
14
|
+
private lockId: string | null = null;
|
|
15
|
+
private acquired = false;
|
|
16
|
+
|
|
17
|
+
constructor(filePath: string) {
|
|
18
|
+
// Lock file is the original file path with .lock extension
|
|
19
|
+
this.lockFilePath = `${filePath}.lock`;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Attempt to acquire the lock
|
|
24
|
+
* @returns true if lock was acquired, false otherwise
|
|
25
|
+
*/
|
|
26
|
+
async acquire(): Promise<boolean> {
|
|
27
|
+
const startTime = Date.now();
|
|
28
|
+
|
|
29
|
+
while (Date.now() - startTime < LOCK_TIMEOUT) {
|
|
30
|
+
try {
|
|
31
|
+
// Try to create lock file exclusively
|
|
32
|
+
const lockId = randomUUID();
|
|
33
|
+
const lockContent = JSON.stringify({
|
|
34
|
+
lockId,
|
|
35
|
+
pid: process.pid,
|
|
36
|
+
timestamp: Date.now(),
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
// Check if lock file exists and is valid
|
|
40
|
+
if (fs.existsSync(this.lockFilePath)) {
|
|
41
|
+
const existingLock = this.readLockFile();
|
|
42
|
+
if (existingLock && !this.isLockExpired(existingLock)) {
|
|
43
|
+
// Lock is held by another process and not expired
|
|
44
|
+
await new Promise((resolve) =>
|
|
45
|
+
setTimeout(resolve, LOCK_POLL_INTERVAL),
|
|
46
|
+
);
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
// Lock is expired or invalid, try to remove it
|
|
50
|
+
try {
|
|
51
|
+
fs.unlinkSync(this.lockFilePath);
|
|
52
|
+
} catch {
|
|
53
|
+
// Ignore errors, another process might have removed it
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Try to create lock file atomically
|
|
58
|
+
fs.writeFileSync(this.lockFilePath, lockContent, {
|
|
59
|
+
flag: 'wx', // Exclusive create (fail if exists)
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
this.lockId = lockId;
|
|
63
|
+
this.acquired = true;
|
|
64
|
+
return true;
|
|
65
|
+
} catch (error: any) {
|
|
66
|
+
if (error.code === 'EEXIST') {
|
|
67
|
+
// Lock file exists, wait and retry
|
|
68
|
+
await new Promise((resolve) =>
|
|
69
|
+
setTimeout(resolve, LOCK_POLL_INTERVAL),
|
|
70
|
+
);
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
// Other error, fail fast
|
|
74
|
+
return false;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Timeout exceeded
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Release the lock
|
|
84
|
+
* Only releases if this instance holds the lock (matching lockId)
|
|
85
|
+
*/
|
|
86
|
+
release(): void {
|
|
87
|
+
if (!this.acquired || !this.lockId) {
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
try {
|
|
92
|
+
const existingLock = this.readLockFile();
|
|
93
|
+
if (existingLock && existingLock.lockId === this.lockId) {
|
|
94
|
+
fs.unlinkSync(this.lockFilePath);
|
|
95
|
+
}
|
|
96
|
+
} catch {
|
|
97
|
+
// Ignore errors during release
|
|
98
|
+
} finally {
|
|
99
|
+
this.acquired = false;
|
|
100
|
+
this.lockId = null;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Execute a callback while holding the lock
|
|
106
|
+
* Automatically acquires and releases the lock
|
|
107
|
+
*/
|
|
108
|
+
async withLock<T>(callback: () => T | Promise<T>): Promise<T> {
|
|
109
|
+
const acquired = await this.acquire();
|
|
110
|
+
if (!acquired) {
|
|
111
|
+
throw new Error(
|
|
112
|
+
`Failed to acquire lock for ${this.lockFilePath} after ${LOCK_TIMEOUT}ms`,
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
try {
|
|
117
|
+
return await callback();
|
|
118
|
+
} finally {
|
|
119
|
+
this.release();
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Read and parse the lock file
|
|
125
|
+
*/
|
|
126
|
+
private readLockFile():
|
|
127
|
+
| { lockId: string; pid: number; timestamp: number }
|
|
128
|
+
| null {
|
|
129
|
+
try {
|
|
130
|
+
const content = fs.readFileSync(this.lockFilePath, 'utf-8');
|
|
131
|
+
return JSON.parse(content);
|
|
132
|
+
} catch {
|
|
133
|
+
return null;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Check if a lock has expired (older than timeout)
|
|
139
|
+
*/
|
|
140
|
+
private isLockExpired(lock: {
|
|
141
|
+
lockId: string;
|
|
142
|
+
pid: number;
|
|
143
|
+
timestamp: number;
|
|
144
|
+
}): boolean {
|
|
145
|
+
return Date.now() - lock.timestamp > LOCK_TIMEOUT;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Force release any existing lock (use with caution)
|
|
150
|
+
* This can be used to recover from stale locks
|
|
151
|
+
*/
|
|
152
|
+
forceRelease(): void {
|
|
153
|
+
try {
|
|
154
|
+
if (fs.existsSync(this.lockFilePath)) {
|
|
155
|
+
fs.unlinkSync(this.lockFilePath);
|
|
156
|
+
}
|
|
157
|
+
} catch {
|
|
158
|
+
// Ignore errors
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Global lock registry to manage multiple locks
|
|
165
|
+
* Helps prevent deadlocks and track active locks
|
|
166
|
+
*/
|
|
167
|
+
class LockRegistry {
|
|
168
|
+
private locks = new Map<string, FileLock>();
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Get or create a lock for a file path
|
|
172
|
+
*/
|
|
173
|
+
getLock(filePath: string): FileLock {
|
|
174
|
+
if (!this.locks.has(filePath)) {
|
|
175
|
+
this.locks.set(filePath, new FileLock(filePath));
|
|
176
|
+
}
|
|
177
|
+
return this.locks.get(filePath)!;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Release all locks (useful for cleanup)
|
|
182
|
+
*/
|
|
183
|
+
releaseAll(): void {
|
|
184
|
+
for (const lock of this.locks.values()) {
|
|
185
|
+
lock.release();
|
|
186
|
+
}
|
|
187
|
+
this.locks.clear();
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
export const lockRegistry = new LockRegistry();
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDF Parser Utility
|
|
3
|
+
* Extracts text content from PDF files
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import fs from 'fs';
|
|
7
|
+
import path from 'pathe';
|
|
8
|
+
|
|
9
|
+
export interface ParseResult {
|
|
10
|
+
text: string;
|
|
11
|
+
pageCount: number;
|
|
12
|
+
metadata?: {
|
|
13
|
+
title?: string;
|
|
14
|
+
author?: string;
|
|
15
|
+
subject?: string;
|
|
16
|
+
keywords?: string;
|
|
17
|
+
creator?: string;
|
|
18
|
+
producer?: string;
|
|
19
|
+
creationDate?: string;
|
|
20
|
+
modificationDate?: string;
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface ParseOptions {
|
|
25
|
+
/**
|
|
26
|
+
* Maximum number of pages to parse (0 = all pages)
|
|
27
|
+
* @default 0
|
|
28
|
+
*/
|
|
29
|
+
maxPages?: number;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Maximum text length per page (0 = no limit)
|
|
33
|
+
* @default 10000
|
|
34
|
+
*/
|
|
35
|
+
maxCharsPerPage?: number;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Include metadata in the result
|
|
39
|
+
* @default true
|
|
40
|
+
*/
|
|
41
|
+
includeMetadata?: boolean;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const DEFAULT_OPTIONS: ParseOptions = {
|
|
45
|
+
maxPages: 0, // Parse all pages
|
|
46
|
+
maxCharsPerPage: 10000,
|
|
47
|
+
includeMetadata: true,
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Parse a PDF file and extract text content
|
|
52
|
+
*
|
|
53
|
+
* @param filePath - Absolute path to the PDF file
|
|
54
|
+
* @param options - Parsing options
|
|
55
|
+
* @returns Parsed PDF content
|
|
56
|
+
*/
|
|
57
|
+
export async function parsePDF(
|
|
58
|
+
filePath: string,
|
|
59
|
+
options: ParseOptions = {},
|
|
60
|
+
): Promise<ParseResult> {
|
|
61
|
+
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
62
|
+
|
|
63
|
+
// Validate file exists
|
|
64
|
+
if (!fs.existsSync(filePath)) {
|
|
65
|
+
throw new Error(`PDF file not found: ${filePath}`);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Get file stats
|
|
69
|
+
const stats = fs.statSync(filePath);
|
|
70
|
+
const fileSizeMB = stats.size / (1024 * 1024);
|
|
71
|
+
|
|
72
|
+
// Check file size (warn if > 10MB)
|
|
73
|
+
if (fileSizeMB > 10) {
|
|
74
|
+
console.warn(
|
|
75
|
+
`PDF file is large (${fileSizeMB.toFixed(2)}MB), parsing may take a while`,
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
try {
|
|
80
|
+
// Try to use pdf-parse if available
|
|
81
|
+
const pdfParse = await import('pdf-parse');
|
|
82
|
+
const dataBuffer = fs.readFileSync(filePath);
|
|
83
|
+
|
|
84
|
+
const data = await pdfParse.default(dataBuffer, {
|
|
85
|
+
// Only parse requested pages
|
|
86
|
+
max: opts.maxPages || undefined,
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// Truncate text per page if needed
|
|
90
|
+
let text = data.text;
|
|
91
|
+
if (opts.maxCharsPerPage && opts.maxCharsPerPage > 0) {
|
|
92
|
+
// Split by pages (pdf-parse doesn't give page breaks, so we split by heuristics)
|
|
93
|
+
// This is a simple approach - for better results, use a library that preserves page structure
|
|
94
|
+
text = text
|
|
95
|
+
.split(/\f/) // Form feed character often used as page separator
|
|
96
|
+
.map((pageText: string) => {
|
|
97
|
+
if (pageText.length > opts.maxCharsPerPage!) {
|
|
98
|
+
return pageText.substring(0, opts.maxCharsPerPage) + '... [truncated]';
|
|
99
|
+
}
|
|
100
|
+
return pageText;
|
|
101
|
+
})
|
|
102
|
+
.join('\n\n--- Page Break ---\n\n');
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const result: ParseResult = {
|
|
106
|
+
text: text.trim(),
|
|
107
|
+
pageCount: data.numpages,
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
// Add metadata if requested and available
|
|
111
|
+
if (opts.includeMetadata) {
|
|
112
|
+
const metadata: ParseResult['metadata'] = {};
|
|
113
|
+
|
|
114
|
+
if (data.info) {
|
|
115
|
+
if (data.info.Title) metadata.title = String(data.info.Title);
|
|
116
|
+
if (data.info.Author) metadata.author = String(data.info.Author);
|
|
117
|
+
if (data.info.Subject) metadata.subject = String(data.info.Subject);
|
|
118
|
+
if (data.info.Keywords) metadata.keywords = String(data.info.Keywords);
|
|
119
|
+
if (data.info.Creator) metadata.creator = String(data.info.Creator);
|
|
120
|
+
if (data.info.Producer) metadata.producer = String(data.info.Producer);
|
|
121
|
+
if (data.info.CreationDate)
|
|
122
|
+
metadata.creationDate = String(data.info.CreationDate);
|
|
123
|
+
if (data.info.ModDate)
|
|
124
|
+
metadata.modificationDate = String(data.info.ModDate);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (Object.keys(metadata).length > 0) {
|
|
128
|
+
result.metadata = metadata;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return result;
|
|
133
|
+
} catch (error: any) {
|
|
134
|
+
if (error.code === 'MODULE_NOT_FOUND' || error.code === 'ERR_MODULE_NOT_FOUND') {
|
|
135
|
+
// pdf-parse not available, provide helpful error
|
|
136
|
+
throw new Error(
|
|
137
|
+
'PDF parsing requires the "pdf-parse" package. Install it with: npm install pdf-parse',
|
|
138
|
+
);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
throw new Error(
|
|
142
|
+
`Failed to parse PDF file: ${error.message || String(error)}`,
|
|
143
|
+
);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Check if PDF parsing is available (pdf-parse is installed)
|
|
149
|
+
*/
|
|
150
|
+
export async function isPDFParsingAvailable(): Promise<boolean> {
|
|
151
|
+
try {
|
|
152
|
+
await import('pdf-parse');
|
|
153
|
+
return true;
|
|
154
|
+
} catch {
|
|
155
|
+
return false;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Format PDF parse result for LLM consumption
|
|
161
|
+
*/
|
|
162
|
+
export function formatPDFResult(result: ParseResult): string {
|
|
163
|
+
const sections: string[] = [];
|
|
164
|
+
|
|
165
|
+
sections.push(`# PDF Document`);
|
|
166
|
+
sections.push(`Pages: ${result.pageCount}`);
|
|
167
|
+
|
|
168
|
+
if (result.metadata) {
|
|
169
|
+
const meta: string[] = [];
|
|
170
|
+
if (result.metadata.title) meta.push(`Title: ${result.metadata.title}`);
|
|
171
|
+
if (result.metadata.author) meta.push(`Author: ${result.metadata.author}`);
|
|
172
|
+
if (result.metadata.subject) meta.push(`Subject: ${result.metadata.subject}`);
|
|
173
|
+
if (result.metadata.creationDate)
|
|
174
|
+
meta.push(`Created: ${result.metadata.creationDate}`);
|
|
175
|
+
|
|
176
|
+
if (meta.length > 0) {
|
|
177
|
+
sections.push('\n## Metadata');
|
|
178
|
+
sections.push(meta.join('\n'));
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
sections.push('\n## Content');
|
|
183
|
+
sections.push(result.text);
|
|
184
|
+
|
|
185
|
+
return sections.join('\n');
|
|
186
|
+
}
|