agentlang 0.9.9 → 0.9.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/runtime/interpreter.d.ts.map +1 -1
- package/out/runtime/interpreter.js +19 -1
- package/out/runtime/interpreter.js.map +1 -1
- package/out/runtime/modules/ai.d.ts +1 -0
- package/out/runtime/modules/ai.d.ts.map +1 -1
- package/out/runtime/modules/ai.js +62 -18
- package/out/runtime/modules/ai.js.map +1 -1
- package/out/runtime/resolvers/sqldb/impl.d.ts.map +1 -1
- package/out/runtime/resolvers/sqldb/impl.js +37 -6
- package/out/runtime/resolvers/sqldb/impl.js.map +1 -1
- package/out/runtime/services/documentFetcher.d.ts +62 -0
- package/out/runtime/services/documentFetcher.d.ts.map +1 -0
- package/out/runtime/services/documentFetcher.js +387 -0
- package/out/runtime/services/documentFetcher.js.map +1 -0
- package/package.json +2 -1
- package/src/runtime/interpreter.ts +20 -1
- package/src/runtime/modules/ai.ts +76 -24
- package/src/runtime/resolvers/sqldb/impl.ts +36 -6
- package/src/runtime/services/documentFetcher.ts +468 -0
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3';
|
|
2
|
+
import { readFile } from 'node:fs/promises';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
import { logger } from '../logger.js';
|
|
5
|
+
import { parseAndEvaluateStatement } from '../interpreter.js';
|
|
6
|
+
import { CoreAIModuleName } from '../modules/ai.js';
|
|
7
|
+
import { TtlCache } from '../state.js';
|
|
8
|
+
import { preprocessRawConfig } from '../util.js';
|
|
9
|
+
import { marked } from 'marked';
|
|
10
|
+
import { isNodeEnv } from '../../utils/runtime.js';
|
|
11
|
+
|
|
12
|
+
// Provider-specific configurations
|
|
13
|
+
export interface S3Config {
|
|
14
|
+
region?: string;
|
|
15
|
+
endpoint?: string;
|
|
16
|
+
accessKeyId?: string;
|
|
17
|
+
secretAccessKey?: string;
|
|
18
|
+
forcePathStyle?: boolean;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Generic retrieval configuration for any storage provider
|
|
22
|
+
export interface RetrievalConfig {
|
|
23
|
+
provider: 's3' | 'box' | 'gdrive' | 'azure' | 'onedrive' | string;
|
|
24
|
+
config: S3Config | Record<string, any>;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface EmbeddingConfig {
|
|
28
|
+
provider?: string;
|
|
29
|
+
model?: string;
|
|
30
|
+
chunkSize?: number;
|
|
31
|
+
chunkOverlap?: number;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface DocumentConfig {
|
|
35
|
+
title: string;
|
|
36
|
+
url: string;
|
|
37
|
+
retrievalConfig?: RetrievalConfig;
|
|
38
|
+
embeddingConfig?: EmbeddingConfig;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export interface FetchedDocument {
|
|
42
|
+
title: string;
|
|
43
|
+
content: string;
|
|
44
|
+
url: string;
|
|
45
|
+
format: string;
|
|
46
|
+
fetchedAt: Date;
|
|
47
|
+
embeddingConfig?: EmbeddingConfig;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
class DocumentFetcherService {
|
|
51
|
+
private static readonly CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
|
|
52
|
+
private documentCache = new TtlCache<FetchedDocument>(DocumentFetcherService.CACHE_TTL_MS);
|
|
53
|
+
private s3Clients = new Map<string, any>();
|
|
54
|
+
private pdfParser: any = null;
|
|
55
|
+
|
|
56
|
+
async fetchDocument(config: DocumentConfig): Promise<FetchedDocument | null> {
|
|
57
|
+
this.ensureNodeEnv();
|
|
58
|
+
const cacheKey = `${config.title}:${config.url}`;
|
|
59
|
+
const cached = this.documentCache.get(cacheKey);
|
|
60
|
+
|
|
61
|
+
if (cached) {
|
|
62
|
+
logger.debug('Returning cached document', { title: config.title });
|
|
63
|
+
return cached;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
let content: string;
|
|
68
|
+
|
|
69
|
+
if (config.url.startsWith('s3://')) {
|
|
70
|
+
content = await this.fetchFromS3(config);
|
|
71
|
+
} else if (config.url.startsWith('http://') || config.url.startsWith('https://')) {
|
|
72
|
+
content = await this.fetchFromUrl(config.url);
|
|
73
|
+
} else {
|
|
74
|
+
// Local file path
|
|
75
|
+
content = await this.fetchFromLocal(config.url);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const document: FetchedDocument = {
|
|
79
|
+
title: config.title,
|
|
80
|
+
content,
|
|
81
|
+
url: config.url,
|
|
82
|
+
format: this.inferFormat(config.url),
|
|
83
|
+
fetchedAt: new Date(),
|
|
84
|
+
embeddingConfig: config.embeddingConfig,
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
this.documentCache.set(cacheKey, document);
|
|
88
|
+
|
|
89
|
+
// Auto-create Document entity from fetched content
|
|
90
|
+
await this.createDocumentEntity(document);
|
|
91
|
+
|
|
92
|
+
return document;
|
|
93
|
+
} catch (error) {
|
|
94
|
+
logger.error('Failed to fetch document', {
|
|
95
|
+
title: config.title,
|
|
96
|
+
url: config.url,
|
|
97
|
+
error: error instanceof Error ? error.message : String(error),
|
|
98
|
+
stack: error instanceof Error ? error.stack : undefined,
|
|
99
|
+
});
|
|
100
|
+
// Re-throw the error so the caller knows what happened
|
|
101
|
+
throw error;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
async fetchDocumentByTitle(title: string): Promise<FetchedDocument | null> {
|
|
106
|
+
this.ensureNodeEnv();
|
|
107
|
+
// First check if we have it in cache
|
|
108
|
+
// Note: TtlCache doesn't have a way to search by prefix, so we'll fetch directly
|
|
109
|
+
|
|
110
|
+
try {
|
|
111
|
+
// Try to find in loaded config
|
|
112
|
+
const doc = this.findDocumentInConfig(title);
|
|
113
|
+
if (doc) {
|
|
114
|
+
return this.fetchDocument(doc);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
logger.warn('Document not found in config', { title });
|
|
118
|
+
return null;
|
|
119
|
+
} catch (error) {
|
|
120
|
+
logger.error('Failed to fetch document by title', { title, error });
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
private findDocumentInConfig(title: string): DocumentConfig | null {
|
|
126
|
+
// This method should be called during config loading
|
|
127
|
+
// The documents are stored when the config is parsed
|
|
128
|
+
const docs = getConfiguredDocuments();
|
|
129
|
+
return docs.find(d => d.title === title) || null;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
private async fetchFromS3(config: DocumentConfig): Promise<string> {
|
|
133
|
+
const s3Config = this.parseS3Url(config.url, config.retrievalConfig);
|
|
134
|
+
const client = await this.getOrCreateS3Client(s3Config);
|
|
135
|
+
|
|
136
|
+
try {
|
|
137
|
+
const response = await client.send(
|
|
138
|
+
new GetObjectCommand({
|
|
139
|
+
Bucket: s3Config.bucket,
|
|
140
|
+
Key: s3Config.key,
|
|
141
|
+
})
|
|
142
|
+
);
|
|
143
|
+
|
|
144
|
+
if (!response.Body) {
|
|
145
|
+
throw new Error('S3 object has no body');
|
|
146
|
+
}
|
|
147
|
+
const bodyBuffer = await this.readS3BodyToBuffer(response.Body as any);
|
|
148
|
+
const contentType = (response.ContentType || '').toLowerCase();
|
|
149
|
+
const lowerKey = s3Config.key.toLowerCase();
|
|
150
|
+
const isPdf = contentType.includes('application/pdf') || lowerKey.endsWith('.pdf');
|
|
151
|
+
const isMarkdown =
|
|
152
|
+
contentType.includes('text/markdown') ||
|
|
153
|
+
lowerKey.endsWith('.md') ||
|
|
154
|
+
lowerKey.endsWith('.markdown') ||
|
|
155
|
+
lowerKey.endsWith('.mdown');
|
|
156
|
+
if (isPdf) {
|
|
157
|
+
return await this.parsePdfBuffer(bodyBuffer);
|
|
158
|
+
}
|
|
159
|
+
if (isMarkdown) {
|
|
160
|
+
return this.parseMarkdownText(bodyBuffer.toString('utf-8'));
|
|
161
|
+
}
|
|
162
|
+
return bodyBuffer.toString('utf-8');
|
|
163
|
+
} catch (error) {
|
|
164
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
165
|
+
const errorStack = error instanceof Error ? error.stack : undefined;
|
|
166
|
+
logger.error('S3 fetch failed', {
|
|
167
|
+
url: config.url,
|
|
168
|
+
bucket: s3Config.bucket,
|
|
169
|
+
key: s3Config.key,
|
|
170
|
+
region: s3Config.region,
|
|
171
|
+
hasAccessKey: !!s3Config.accessKeyId,
|
|
172
|
+
error: errorMessage,
|
|
173
|
+
stack: errorStack,
|
|
174
|
+
});
|
|
175
|
+
throw new Error(
|
|
176
|
+
`Failed to fetch from S3 (bucket: ${s3Config.bucket}, key: ${s3Config.key}, region: ${s3Config.region}): ${errorMessage}`
|
|
177
|
+
);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
private async fetchFromUrl(url: string): Promise<string> {
|
|
182
|
+
try {
|
|
183
|
+
const response = await fetch(url, {
|
|
184
|
+
signal: AbortSignal.timeout(30000),
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
if (!response.ok) {
|
|
188
|
+
throw new Error(`HTTP ${response.status} ${response.statusText}`);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const body = await response.arrayBuffer();
|
|
192
|
+
const maxSize = 50 * 1024 * 1024;
|
|
193
|
+
if (body.byteLength > maxSize) {
|
|
194
|
+
throw new Error(`Response too large: ${body.byteLength} bytes`);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
const contentType = (response.headers.get('content-type') || '').toLowerCase();
|
|
198
|
+
const lowerUrl = url.toLowerCase();
|
|
199
|
+
const isMarkdown =
|
|
200
|
+
contentType.includes('text/markdown') ||
|
|
201
|
+
lowerUrl.endsWith('.md') ||
|
|
202
|
+
lowerUrl.endsWith('.markdown') ||
|
|
203
|
+
lowerUrl.endsWith('.mdown');
|
|
204
|
+
const text = Buffer.from(body).toString('utf-8');
|
|
205
|
+
return isMarkdown ? this.parseMarkdownText(text) : text;
|
|
206
|
+
} catch (error) {
|
|
207
|
+
logger.error('URL fetch failed', { url, error });
|
|
208
|
+
throw new Error(`Failed to fetch from URL: ${error}`);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
private async fetchFromLocal(filePath: string): Promise<string> {
|
|
213
|
+
try {
|
|
214
|
+
const resolvedPath = path.resolve(filePath);
|
|
215
|
+
const content = await readFile(resolvedPath, 'utf-8');
|
|
216
|
+
const lowerPath = resolvedPath.toLowerCase();
|
|
217
|
+
const isMarkdown =
|
|
218
|
+
lowerPath.endsWith('.md') ||
|
|
219
|
+
lowerPath.endsWith('.markdown') ||
|
|
220
|
+
lowerPath.endsWith('.mdown');
|
|
221
|
+
return isMarkdown ? this.parseMarkdownText(content) : content;
|
|
222
|
+
} catch (error) {
|
|
223
|
+
logger.error('Local file read failed', { path: filePath, error });
|
|
224
|
+
throw new Error(`Failed to read local file: ${error}`);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
private parseS3Url(
|
|
229
|
+
url: string,
|
|
230
|
+
retrievalConfig?: RetrievalConfig
|
|
231
|
+
): {
|
|
232
|
+
bucket: string;
|
|
233
|
+
key: string;
|
|
234
|
+
region: string;
|
|
235
|
+
endpoint?: string;
|
|
236
|
+
accessKeyId?: string;
|
|
237
|
+
secretAccessKey?: string;
|
|
238
|
+
forcePathStyle?: boolean;
|
|
239
|
+
} {
|
|
240
|
+
// Parse s3://bucket/key format
|
|
241
|
+
if (!url.startsWith('s3://')) {
|
|
242
|
+
throw new Error('Invalid S3 URL format. Expected: s3://bucket/key');
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
const withoutProtocol = url.slice(5);
|
|
246
|
+
const firstSlash = withoutProtocol.indexOf('/');
|
|
247
|
+
|
|
248
|
+
if (firstSlash === -1) {
|
|
249
|
+
throw new Error('Invalid S3 URL format. Expected: s3://bucket/key');
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
const bucket = withoutProtocol.slice(0, firstSlash);
|
|
253
|
+
const key = withoutProtocol.slice(firstSlash + 1);
|
|
254
|
+
|
|
255
|
+
const normalizedRetrievalConfig = this.normalizeRetrievalConfig(retrievalConfig);
|
|
256
|
+
|
|
257
|
+
// Get S3-specific config from retrievalConfig if provider is s3
|
|
258
|
+
let s3SpecificConfig: S3Config = {};
|
|
259
|
+
if (normalizedRetrievalConfig?.provider === 's3' && normalizedRetrievalConfig.config) {
|
|
260
|
+
s3SpecificConfig = normalizedRetrievalConfig.config as S3Config;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return {
|
|
264
|
+
bucket,
|
|
265
|
+
key,
|
|
266
|
+
region: s3SpecificConfig.region || process.env.AWS_REGION || 'us-east-1',
|
|
267
|
+
endpoint: s3SpecificConfig.endpoint,
|
|
268
|
+
accessKeyId: s3SpecificConfig.accessKeyId || process.env.AWS_ACCESS_KEY_ID,
|
|
269
|
+
secretAccessKey: s3SpecificConfig.secretAccessKey || process.env.AWS_SECRET_ACCESS_KEY,
|
|
270
|
+
forcePathStyle: s3SpecificConfig.forcePathStyle,
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
private async getOrCreateS3Client(config: {
|
|
275
|
+
region: string;
|
|
276
|
+
endpoint?: string;
|
|
277
|
+
accessKeyId?: string;
|
|
278
|
+
secretAccessKey?: string;
|
|
279
|
+
forcePathStyle?: boolean;
|
|
280
|
+
}): Promise<any> {
|
|
281
|
+
const clientKey = `${config.region}:${config.endpoint || 'default'}:${config.accessKeyId || 'default'}`;
|
|
282
|
+
|
|
283
|
+
if (!this.s3Clients.has(clientKey)) {
|
|
284
|
+
const client = new S3Client({
|
|
285
|
+
region: config.region,
|
|
286
|
+
endpoint: config.endpoint,
|
|
287
|
+
forcePathStyle: config.forcePathStyle,
|
|
288
|
+
credentials:
|
|
289
|
+
config.accessKeyId && config.secretAccessKey
|
|
290
|
+
? {
|
|
291
|
+
accessKeyId: config.accessKeyId,
|
|
292
|
+
secretAccessKey: config.secretAccessKey,
|
|
293
|
+
}
|
|
294
|
+
: undefined,
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
this.s3Clients.set(clientKey, client);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
return this.s3Clients.get(clientKey)!;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
private async createDocumentEntity(document: FetchedDocument): Promise<void> {
|
|
304
|
+
try {
|
|
305
|
+
// Build the Document entity attributes
|
|
306
|
+
let docAttrs = `{title "${document.title}", content "${this.escapeContent(document.content)}"`;
|
|
307
|
+
|
|
308
|
+
// Add embeddingConfig if present
|
|
309
|
+
if (document.embeddingConfig) {
|
|
310
|
+
const configStr = JSON.stringify(document.embeddingConfig).replace(/"/g, '\\"');
|
|
311
|
+
docAttrs += `, embeddingConfig "${configStr}"`;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
docAttrs += '}';
|
|
315
|
+
|
|
316
|
+
// Upsert to database
|
|
317
|
+
await parseAndEvaluateStatement(`{${CoreAIModuleName}/Document ${docAttrs}, @upsert}`);
|
|
318
|
+
|
|
319
|
+
logger.debug('Created Document entity', {
|
|
320
|
+
title: document.title,
|
|
321
|
+
url: document.url,
|
|
322
|
+
hasEmbeddingConfig: !!document.embeddingConfig,
|
|
323
|
+
});
|
|
324
|
+
} catch (error) {
|
|
325
|
+
logger.error('Failed to create Document entity', {
|
|
326
|
+
title: document.title,
|
|
327
|
+
error,
|
|
328
|
+
});
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
private escapeContent(content: string): string {
|
|
333
|
+
return content
|
|
334
|
+
.replace(/\\/g, '\\\\')
|
|
335
|
+
.replace(/"/g, '\\"')
|
|
336
|
+
.replace(/\n/g, '\\n')
|
|
337
|
+
.replace(/\r/g, '\\r')
|
|
338
|
+
.replace(/\t/g, '\\t');
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
private inferFormat(url: string): string {
|
|
342
|
+
const parts = url.split('.');
|
|
343
|
+
if (parts.length > 1) {
|
|
344
|
+
return parts[parts.length - 1].toLowerCase();
|
|
345
|
+
}
|
|
346
|
+
return 'txt';
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
clearCache(title?: string): void {
|
|
350
|
+
if (title) {
|
|
351
|
+
// Note: TtlCache doesn't expose keys, clear all for now
|
|
352
|
+
this.documentCache.clear();
|
|
353
|
+
} else {
|
|
354
|
+
this.documentCache.clear();
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
private normalizeConfigValue(value: any): any {
|
|
359
|
+
if (value instanceof Map) {
|
|
360
|
+
const obj: Record<string, any> = {};
|
|
361
|
+
value.forEach((v, k) => {
|
|
362
|
+
obj[k] = this.normalizeConfigValue(v);
|
|
363
|
+
});
|
|
364
|
+
return obj;
|
|
365
|
+
}
|
|
366
|
+
if (Array.isArray(value)) {
|
|
367
|
+
return value.map(v => this.normalizeConfigValue(v));
|
|
368
|
+
}
|
|
369
|
+
if (value && typeof value === 'object') {
|
|
370
|
+
const obj: Record<string, any> = {};
|
|
371
|
+
Object.entries(value).forEach(([k, v]) => {
|
|
372
|
+
obj[k] = this.normalizeConfigValue(v);
|
|
373
|
+
});
|
|
374
|
+
return obj;
|
|
375
|
+
}
|
|
376
|
+
return value;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
private normalizeRetrievalConfig(retrievalConfig?: RetrievalConfig): RetrievalConfig | undefined {
|
|
380
|
+
if (!retrievalConfig) return undefined;
|
|
381
|
+
const normalized = this.normalizeConfigValue(retrievalConfig);
|
|
382
|
+
if (normalized && typeof normalized === 'object') {
|
|
383
|
+
preprocessRawConfig(normalized);
|
|
384
|
+
}
|
|
385
|
+
return normalized as RetrievalConfig;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
private ensureNodeEnv(): void {
|
|
389
|
+
if (!isNodeEnv) {
|
|
390
|
+
throw new Error('Document fetching is only available in Node.js environment');
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
private async readS3BodyToBuffer(body: any): Promise<Buffer> {
|
|
395
|
+
if (body.transformToByteArray) {
|
|
396
|
+
const bytes = await body.transformToByteArray();
|
|
397
|
+
return Buffer.from(bytes);
|
|
398
|
+
}
|
|
399
|
+
if (body.transformToString) {
|
|
400
|
+
const text = await body.transformToString('utf-8');
|
|
401
|
+
return Buffer.from(text, 'utf-8');
|
|
402
|
+
}
|
|
403
|
+
const chunks: Buffer[] = [];
|
|
404
|
+
for await (const chunk of body) {
|
|
405
|
+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
406
|
+
}
|
|
407
|
+
return Buffer.concat(chunks);
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
private async getPdfParser(): Promise<any> {
|
|
411
|
+
if (!this.pdfParser) {
|
|
412
|
+
const pdfModule: any = await import('pdf-parse');
|
|
413
|
+
this.pdfParser = pdfModule.PDFParse || pdfModule.default;
|
|
414
|
+
}
|
|
415
|
+
return this.pdfParser;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
private async parsePdfBuffer(buffer: Buffer): Promise<string> {
|
|
419
|
+
try {
|
|
420
|
+
const PDFParseClass = await this.getPdfParser();
|
|
421
|
+
const parser = new PDFParseClass({
|
|
422
|
+
data: buffer,
|
|
423
|
+
verbosity: 0,
|
|
424
|
+
});
|
|
425
|
+
const data = await parser.getText();
|
|
426
|
+
return data.text;
|
|
427
|
+
} catch (error: any) {
|
|
428
|
+
logger.error(`Failed to parse PDF: ${error.message}`);
|
|
429
|
+
throw new Error(`PDF parsing failed: ${error.message}`);
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
private parseMarkdownText(markdown: string): string {
|
|
434
|
+
const html = marked.parse(markdown);
|
|
435
|
+
if (typeof html !== 'string') {
|
|
436
|
+
return markdown;
|
|
437
|
+
}
|
|
438
|
+
return html
|
|
439
|
+
.replace(/<\s*br\s*\/?>/gi, '\n')
|
|
440
|
+
.replace(/<\/(p|li|h[1-6]|blockquote|pre|tr|table)>/gi, '\n')
|
|
441
|
+
.replace(/<[^>]+>/g, '')
|
|
442
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
443
|
+
.trim();
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// Store configured documents from config.al
|
|
448
|
+
let configuredDocuments: DocumentConfig[] = [];
|
|
449
|
+
|
|
450
|
+
export function registerConfiguredDocument(doc: DocumentConfig): void {
|
|
451
|
+
// Check if already registered
|
|
452
|
+
const existing = configuredDocuments.find(d => d.title === doc.title);
|
|
453
|
+
if (!existing) {
|
|
454
|
+
configuredDocuments.push(doc);
|
|
455
|
+
logger.debug('Registered configured document', { title: doc.title, url: doc.url });
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
export function getConfiguredDocuments(): DocumentConfig[] {
|
|
460
|
+
return [...configuredDocuments];
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
export function clearConfiguredDocuments(): void {
|
|
464
|
+
configuredDocuments = [];
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
export const documentFetcher = new DocumentFetcherService();
|
|
468
|
+
export default documentFetcher;
|