agentlang 0.9.10 → 0.9.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/out/extension/main.cjs +38 -38
  2. package/out/extension/main.cjs.map +2 -2
  3. package/out/language/generated/ast.d.ts +1 -1
  4. package/out/language/generated/ast.js +1 -1
  5. package/out/language/generated/grammar.d.ts +1 -1
  6. package/out/language/generated/grammar.js +1 -1
  7. package/out/language/generated/module.d.ts +1 -1
  8. package/out/language/generated/module.js +1 -1
  9. package/out/language/main.cjs +850 -2388
  10. package/out/language/main.cjs.map +4 -4
  11. package/out/runtime/agents/common.d.ts +3 -1
  12. package/out/runtime/agents/common.d.ts.map +1 -1
  13. package/out/runtime/agents/common.js +35 -31
  14. package/out/runtime/agents/common.js.map +1 -1
  15. package/out/runtime/docs.d.ts +1 -0
  16. package/out/runtime/docs.d.ts.map +1 -1
  17. package/out/runtime/docs.js +16 -1
  18. package/out/runtime/docs.js.map +1 -1
  19. package/out/runtime/interpreter.d.ts +1 -0
  20. package/out/runtime/interpreter.d.ts.map +1 -1
  21. package/out/runtime/interpreter.js +41 -8
  22. package/out/runtime/interpreter.js.map +1 -1
  23. package/out/runtime/jsmodules.d.ts +2 -1
  24. package/out/runtime/jsmodules.d.ts.map +1 -1
  25. package/out/runtime/jsmodules.js +2 -1
  26. package/out/runtime/jsmodules.js.map +1 -1
  27. package/out/runtime/loader.d.ts.map +1 -1
  28. package/out/runtime/loader.js +3 -2
  29. package/out/runtime/loader.js.map +1 -1
  30. package/out/runtime/module.d.ts +1 -0
  31. package/out/runtime/module.d.ts.map +1 -1
  32. package/out/runtime/module.js +3 -0
  33. package/out/runtime/module.js.map +1 -1
  34. package/out/runtime/modules/ai.d.ts +11 -0
  35. package/out/runtime/modules/ai.d.ts.map +1 -1
  36. package/out/runtime/modules/ai.js +163 -10
  37. package/out/runtime/modules/ai.js.map +1 -1
  38. package/out/runtime/modules/core.d.ts.map +1 -1
  39. package/out/runtime/modules/core.js +7 -1
  40. package/out/runtime/modules/core.js.map +1 -1
  41. package/out/runtime/services/documentFetcher.d.ts +22 -14
  42. package/out/runtime/services/documentFetcher.d.ts.map +1 -1
  43. package/out/runtime/services/documentFetcher.js +348 -153
  44. package/out/runtime/services/documentFetcher.js.map +1 -1
  45. package/package.json +1 -1
  46. package/src/language/generated/ast.ts +1 -1
  47. package/src/language/generated/grammar.ts +1 -1
  48. package/src/language/generated/module.ts +1 -1
  49. package/src/runtime/agents/common.ts +37 -31
  50. package/src/runtime/docs.ts +17 -1
  51. package/src/runtime/interpreter.ts +44 -6
  52. package/src/runtime/jsmodules.ts +3 -1
  53. package/src/runtime/loader.ts +3 -2
  54. package/src/runtime/module.ts +4 -0
  55. package/src/runtime/modules/ai.ts +194 -9
  56. package/src/runtime/modules/core.ts +7 -1
  57. package/src/runtime/services/documentFetcher.ts +372 -149
@@ -5,9 +5,8 @@ var __asyncValues = (this && this.__asyncValues) || function (o) {
5
5
  function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
6
6
  function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
7
7
  };
8
- import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3';
8
+ import { GetObjectCommand, S3Client } from '@aws-sdk/client-s3';
9
9
  import { readFile } from 'node:fs/promises';
10
- import path from 'node:path';
11
10
  import { logger } from '../logger.js';
12
11
  import { parseAndEvaluateStatement } from '../interpreter.js';
13
12
  import { CoreAIModuleName } from '../modules/ai.js';
@@ -21,9 +20,14 @@ class DocumentFetcherService {
21
20
  this.s3Clients = new Map();
22
21
  this.pdfParser = null;
23
22
  }
23
+ configureDocumentService(config) {
24
+ this.documentServiceConfig = config;
25
+ logger.info('Document service configured', { baseUrl: config.baseUrl });
26
+ }
24
27
  async fetchDocument(config) {
28
+ var _a, _b, _c, _d, _e;
25
29
  this.ensureNodeEnv();
26
- const cacheKey = `${config.title}:${config.url}`;
30
+ const cacheKey = `${config.title}:${config.url || config.documentServiceId}`;
27
31
  const cached = this.documentCache.get(cacheKey);
28
32
  if (cached) {
29
33
  logger.debug('Returning cached document', { title: config.title });
@@ -31,26 +35,92 @@ class DocumentFetcherService {
31
35
  }
32
36
  try {
33
37
  let content;
34
- if (config.url.startsWith('s3://')) {
38
+ let sourceUrl;
39
+ if ((_a = config.url) === null || _a === void 0 ? void 0 : _a.startsWith('document-service://')) {
40
+ if (!config.retrievalConfig || config.retrievalConfig.provider !== 'document-service') {
41
+ throw new Error('Document service URL requires retrievalConfig with provider: "document-service"');
42
+ }
43
+ const dsConfig = config.retrievalConfig.config;
44
+ if (!(dsConfig === null || dsConfig === void 0 ? void 0 : dsConfig.baseUrl)) {
45
+ throw new Error('Document service config requires baseUrl');
46
+ }
47
+ const urlPath = config.url.replace('document-service://', '');
48
+ const parts = urlPath.split('/');
49
+ if (parts.length !== 3) {
50
+ throw new Error(`Invalid document service URL format: ${config.url}. Expected: document-service://<user-uuid>/<app-uuid>/<doc-uuid>.ext`);
51
+ }
52
+ const appUuid = parts[1];
53
+ const docIdWithExt = parts[2];
54
+ const docId = docIdWithExt.split('.')[0]; // Remove extension
55
+ this.documentServiceConfig = {
56
+ baseUrl: dsConfig.baseUrl,
57
+ appName: appUuid,
58
+ authToken: dsConfig.authToken,
59
+ getAuthToken: dsConfig.getAuthToken,
60
+ };
61
+ content = await this.fetchFromDocumentService(docId);
62
+ sourceUrl = config.url;
63
+ }
64
+ else if (((_b = config.retrievalConfig) === null || _b === void 0 ? void 0 : _b.provider) === 'document-service') {
65
+ const dsConfig = config.retrievalConfig.config;
66
+ if (!(dsConfig === null || dsConfig === void 0 ? void 0 : dsConfig.baseUrl) || !(dsConfig === null || dsConfig === void 0 ? void 0 : dsConfig.appName)) {
67
+ throw new Error('Document service config requires baseUrl and appName');
68
+ }
69
+ this.documentServiceConfig = {
70
+ baseUrl: dsConfig.baseUrl,
71
+ appName: dsConfig.appName,
72
+ authToken: dsConfig.authToken,
73
+ getAuthToken: dsConfig.getAuthToken,
74
+ };
75
+ const docId = await this.lookupDocumentByTitle(config.title);
76
+ if (docId) {
77
+ content = await this.fetchFromDocumentService(docId);
78
+ sourceUrl = `document-service://${docId}`;
79
+ }
80
+ else {
81
+ throw new Error(`Document not found by title in document service: ${config.title}`);
82
+ }
83
+ }
84
+ else if (config.documentServiceId && this.documentServiceConfig) {
85
+ content = await this.fetchFromDocumentService(config.documentServiceId);
86
+ sourceUrl = `document-service://${config.documentServiceId}`;
87
+ }
88
+ else if ((_c = config.url) === null || _c === void 0 ? void 0 : _c.startsWith('s3://')) {
35
89
  content = await this.fetchFromS3(config);
90
+ sourceUrl = config.url;
36
91
  }
37
- else if (config.url.startsWith('http://') || config.url.startsWith('https://')) {
92
+ else if (((_d = config.url) === null || _d === void 0 ? void 0 : _d.startsWith('http://')) || ((_e = config.url) === null || _e === void 0 ? void 0 : _e.startsWith('https://'))) {
38
93
  content = await this.fetchFromUrl(config.url);
94
+ sourceUrl = config.url;
39
95
  }
40
- else {
41
- // Local file path
96
+ else if (config.url) {
42
97
  content = await this.fetchFromLocal(config.url);
98
+ sourceUrl = config.url;
99
+ }
100
+ else {
101
+ if (this.documentServiceConfig) {
102
+ const docId = await this.lookupDocumentByTitle(config.title);
103
+ if (docId) {
104
+ content = await this.fetchFromDocumentService(docId);
105
+ sourceUrl = `document-service://${docId}`;
106
+ }
107
+ else {
108
+ throw new Error(`Document not found by title: ${config.title}`);
109
+ }
110
+ }
111
+ else {
112
+ throw new Error(`No URL or document service ID provided for: ${config.title}`);
113
+ }
43
114
  }
44
115
  const document = {
45
116
  title: config.title,
46
117
  content,
47
- url: config.url,
48
- format: this.inferFormat(config.url),
118
+ url: sourceUrl,
119
+ format: this.inferFormat(sourceUrl),
49
120
  fetchedAt: new Date(),
50
121
  embeddingConfig: config.embeddingConfig,
51
122
  };
52
123
  this.documentCache.set(cacheKey, document);
53
- // Auto-create Document entity from fetched content
54
124
  await this.createDocumentEntity(document);
55
125
  return document;
56
126
  }
@@ -58,24 +128,39 @@ class DocumentFetcherService {
58
128
  logger.error('Failed to fetch document', {
59
129
  title: config.title,
60
130
  url: config.url,
131
+ documentServiceId: config.documentServiceId,
61
132
  error: error instanceof Error ? error.message : String(error),
62
133
  stack: error instanceof Error ? error.stack : undefined,
63
134
  });
64
- // Re-throw the error so the caller knows what happened
65
135
  throw error;
66
136
  }
67
137
  }
68
138
  async fetchDocumentByTitle(title) {
69
139
  this.ensureNodeEnv();
70
- // First check if we have it in cache
71
- // Note: TtlCache doesn't have a way to search by prefix, so we'll fetch directly
72
140
  try {
73
- // Try to find in loaded config
141
+ // First check if we have it in cache
142
+ const cacheKey = `${title}:lookup`;
143
+ const cached = this.documentCache.get(cacheKey);
144
+ if (cached) {
145
+ logger.debug('Returning cached document by title', { title });
146
+ return cached;
147
+ }
148
+ // Try document service lookup first (if configured)
149
+ if (this.documentServiceConfig) {
150
+ const docId = await this.lookupDocumentByTitle(title);
151
+ if (docId) {
152
+ return this.fetchDocument({
153
+ title,
154
+ documentServiceId: docId,
155
+ });
156
+ }
157
+ }
158
+ // Fall back to config-based lookup
74
159
  const doc = this.findDocumentInConfig(title);
75
160
  if (doc) {
76
161
  return this.fetchDocument(doc);
77
162
  }
78
- logger.warn('Document not found in config', { title });
163
+ logger.warn('Document not found', { title });
79
164
  return null;
80
165
  }
81
166
  catch (error) {
@@ -83,11 +168,130 @@ class DocumentFetcherService {
83
168
  return null;
84
169
  }
85
170
  }
86
- findDocumentInConfig(title) {
87
- // This method should be called during config loading
88
- // The documents are stored when the config is parsed
89
- const docs = getConfiguredDocuments();
90
- return docs.find(d => d.title === title) || null;
171
+ // Fetch from secure document-service API
172
+ async fetchFromDocumentService(documentId) {
173
+ var _a, _b, _c, _d;
174
+ if (!this.documentServiceConfig) {
175
+ throw new Error('Document service not configured');
176
+ }
177
+ try {
178
+ // Get token - either static from config or dynamic from function
179
+ let token;
180
+ if (this.documentServiceConfig.authToken) {
181
+ token = this.documentServiceConfig.authToken;
182
+ }
183
+ else if (this.documentServiceConfig.getAuthToken) {
184
+ token = await this.documentServiceConfig.getAuthToken();
185
+ }
186
+ else {
187
+ throw new Error('Document service requires authToken or getAuthToken');
188
+ }
189
+ const url = `${this.documentServiceConfig.baseUrl}/api/documents/${documentId}/content`;
190
+ logger.debug('Fetching from document service', { documentId, url });
191
+ const response = await fetch(url, {
192
+ headers: {
193
+ Authorization: `Bearer ${token}`,
194
+ 'x-app-name': this.documentServiceConfig.appName,
195
+ Accept: 'application/json',
196
+ },
197
+ });
198
+ if (!response.ok) {
199
+ if (response.status === 404) {
200
+ throw new Error(`Document not found: ${documentId}`);
201
+ }
202
+ else if (response.status === 403) {
203
+ throw new Error(`Access denied to document: ${documentId}`);
204
+ }
205
+ else {
206
+ throw new Error(`Document service error: ${response.status} ${response.statusText}`);
207
+ }
208
+ }
209
+ const data = await response.json();
210
+ if (data.isBase64) {
211
+ if (((_a = data.mimeType) === null || _a === void 0 ? void 0 : _a.includes('pdf')) || ((_b = data.format) === null || _b === void 0 ? void 0 : _b.toLowerCase()) === 'pdf') {
212
+ try {
213
+ const { parsePdfBuffer } = await import('../docs.js');
214
+ const buffer = Buffer.from(data.content, 'base64');
215
+ const text = await parsePdfBuffer(new Uint8Array(buffer));
216
+ logger.debug('Extracted text from PDF', { documentId, textLength: text.length });
217
+ return text;
218
+ }
219
+ catch (pdfError) {
220
+ logger.error('Failed to parse PDF from document service', {
221
+ documentId,
222
+ error: pdfError.message,
223
+ });
224
+ throw new Error(`Failed to extract text from PDF: ${pdfError.message}`);
225
+ }
226
+ }
227
+ return Buffer.from(data.content, 'base64').toString('utf-8');
228
+ }
229
+ if (((_c = data.format) === null || _c === void 0 ? void 0 : _c.toLowerCase()) === 'md' || ((_d = data.format) === null || _d === void 0 ? void 0 : _d.toLowerCase()) === 'markdown') {
230
+ try {
231
+ const parsedText = this.parseMarkdownText(data.content);
232
+ logger.debug('Parsed markdown content', { documentId, textLength: parsedText.length });
233
+ return parsedText;
234
+ }
235
+ catch (mdError) {
236
+ logger.warn('Markdown parsing failed, returning raw content', {
237
+ documentId,
238
+ error: mdError.message,
239
+ });
240
+ return data.content;
241
+ }
242
+ }
243
+ return data.content;
244
+ }
245
+ catch (error) {
246
+ logger.error('Document service fetch failed', {
247
+ documentId,
248
+ error: error instanceof Error ? error.message : String(error),
249
+ });
250
+ throw error;
251
+ }
252
+ }
253
+ async lookupDocumentByTitle(title) {
254
+ if (!this.documentServiceConfig) {
255
+ return null;
256
+ }
257
+ try {
258
+ let token;
259
+ if (this.documentServiceConfig.authToken) {
260
+ token = this.documentServiceConfig.authToken;
261
+ }
262
+ else if (this.documentServiceConfig.getAuthToken) {
263
+ token = await this.documentServiceConfig.getAuthToken();
264
+ }
265
+ else {
266
+ throw new Error('Document service requires authToken or getAuthToken');
267
+ }
268
+ const url = `${this.documentServiceConfig.baseUrl}/api/documents/lookup/by-title?title=${encodeURIComponent(title)}`;
269
+ logger.debug('Looking up document by title', { title, url });
270
+ const response = await fetch(url, {
271
+ headers: {
272
+ Authorization: `Bearer ${token}`,
273
+ 'x-app-name': this.documentServiceConfig.appName,
274
+ Accept: 'application/json',
275
+ },
276
+ });
277
+ if (response.status === 404) {
278
+ logger.debug('Document not found by title', { title });
279
+ return null;
280
+ }
281
+ if (!response.ok) {
282
+ throw new Error(`Document service lookup error: ${response.status}`);
283
+ }
284
+ const data = await response.json();
285
+ logger.debug('Found document by title', { title, documentId: data.documentId });
286
+ return data.documentId;
287
+ }
288
+ catch (error) {
289
+ logger.error('Document lookup failed', {
290
+ title,
291
+ error: error instanceof Error ? error.message : String(error),
292
+ });
293
+ return null;
294
+ }
91
295
  }
92
296
  async fetchFromS3(config) {
93
297
  const s3Config = this.parseS3Url(config.url, config.retrievalConfig);
@@ -148,40 +352,44 @@ class DocumentFetcherService {
148
352
  const lowerUrl = url.toLowerCase();
149
353
  const isMarkdown = contentType.includes('text/markdown') ||
150
354
  lowerUrl.endsWith('.md') ||
151
- lowerUrl.endsWith('.markdown') ||
152
- lowerUrl.endsWith('.mdown');
153
- const text = Buffer.from(body).toString('utf-8');
154
- return isMarkdown ? this.parseMarkdownText(text) : text;
355
+ lowerUrl.endsWith('.markdown');
356
+ if (isMarkdown) {
357
+ return this.parseMarkdownText(Buffer.from(body).toString('utf-8'));
358
+ }
359
+ return Buffer.from(body).toString('utf-8');
155
360
  }
156
361
  catch (error) {
157
- logger.error('URL fetch failed', { url, error });
158
- throw new Error(`Failed to fetch from URL: ${error}`);
362
+ logger.error('URL fetch failed', {
363
+ url,
364
+ error: error instanceof Error ? error.message : String(error),
365
+ });
366
+ throw error;
159
367
  }
160
368
  }
161
369
  async fetchFromLocal(filePath) {
162
370
  try {
163
- const resolvedPath = path.resolve(filePath);
164
- const content = await readFile(resolvedPath, 'utf-8');
165
- const lowerPath = resolvedPath.toLowerCase();
166
- const isMarkdown = lowerPath.endsWith('.md') ||
167
- lowerPath.endsWith('.markdown') ||
168
- lowerPath.endsWith('.mdown');
169
- return isMarkdown ? this.parseMarkdownText(content) : content;
371
+ const content = await readFile(filePath, 'utf-8');
372
+ const lowerPath = filePath.toLowerCase();
373
+ const isMarkdown = lowerPath.endsWith('.md') || lowerPath.endsWith('.markdown');
374
+ if (isMarkdown) {
375
+ return this.parseMarkdownText(content);
376
+ }
377
+ return content;
170
378
  }
171
379
  catch (error) {
172
- logger.error('Local file read failed', { path: filePath, error });
173
- throw new Error(`Failed to read local file: ${error}`);
380
+ logger.error('Local file read failed', {
381
+ path: filePath,
382
+ error: error instanceof Error ? error.message : String(error),
383
+ });
384
+ throw error;
174
385
  }
175
386
  }
176
387
  parseS3Url(url, retrievalConfig) {
177
388
  // Parse s3://bucket/key format
178
- if (!url.startsWith('s3://')) {
179
- throw new Error('Invalid S3 URL format. Expected: s3://bucket/key');
180
- }
181
- const withoutProtocol = url.slice(5);
389
+ const withoutProtocol = url.replace('s3://', '');
182
390
  const firstSlash = withoutProtocol.indexOf('/');
183
391
  if (firstSlash === -1) {
184
- throw new Error('Invalid S3 URL format. Expected: s3://bucket/key');
392
+ throw new Error(`Invalid S3 URL format: ${url}`);
185
393
  }
186
394
  const bucket = withoutProtocol.slice(0, firstSlash);
187
395
  const key = withoutProtocol.slice(firstSlash + 1);
@@ -201,6 +409,14 @@ class DocumentFetcherService {
201
409
  forcePathStyle: s3SpecificConfig.forcePathStyle,
202
410
  };
203
411
  }
412
+ normalizeRetrievalConfig(config) {
413
+ if (!config) {
414
+ return undefined;
415
+ }
416
+ // Handle nested config structure from Agentlang
417
+ const normalizedConfig = preprocessRawConfig(config);
418
+ return normalizedConfig;
419
+ }
204
420
  async getOrCreateS3Client(config) {
205
421
  const clientKey = `${config.region}:${config.endpoint || 'default'}:${config.accessKeyId || 'default'}`;
206
422
  if (!this.s3Clients.has(clientKey)) {
@@ -219,6 +435,74 @@ class DocumentFetcherService {
219
435
  }
220
436
  return this.s3Clients.get(clientKey);
221
437
  }
438
+ async parsePdfBuffer(buffer) {
439
+ // Lazy load PDF parser
440
+ if (!this.pdfParser) {
441
+ try {
442
+ const pdfParse = await import('pdf-parse');
443
+ // Handle both ESM and CSM module formats
444
+ const parser = pdfParse.default || pdfParse;
445
+ this.pdfParser = parser;
446
+ }
447
+ catch (error) {
448
+ logger.error('Failed to load PDF parser', { error });
449
+ throw new Error('PDF parsing not available. Please install pdf-parse: npm install pdf-parse');
450
+ }
451
+ }
452
+ try {
453
+ const result = await this.pdfParser(buffer);
454
+ return result.text || '';
455
+ }
456
+ catch (error) {
457
+ logger.error('PDF parsing failed', { error });
458
+ throw new Error(`Failed to parse PDF: ${error}`);
459
+ }
460
+ }
461
+ parseMarkdownText(text) {
462
+ // Convert markdown to plain text for embedding
463
+ // This removes formatting but preserves content structure
464
+ try {
465
+ const html = marked.parse(text);
466
+ // Simple HTML to text conversion
467
+ return html
468
+ .replace(/<[^>]+>/g, ' ') // Remove HTML tags
469
+ .replace(/\s+/g, ' ') // Normalize whitespace
470
+ .replace(/&lt;/g, '<')
471
+ .replace(/&gt;/g, '>')
472
+ .replace(/&amp;/g, '&')
473
+ .replace(/&quot;/g, '"')
474
+ .trim();
475
+ }
476
+ catch (error) {
477
+ logger.warn('Markdown parsing failed, returning raw text', { error });
478
+ return text;
479
+ }
480
+ }
481
+ async readS3BodyToBuffer(body) {
482
+ var _a, e_1, _b, _c;
483
+ if (body.transformToByteArray) {
484
+ const data = await body.transformToByteArray();
485
+ return Buffer.from(data);
486
+ }
487
+ // Fallback for Readable streams
488
+ const chunks = [];
489
+ try {
490
+ for (var _d = true, body_1 = __asyncValues(body), body_1_1; body_1_1 = await body_1.next(), _a = body_1_1.done, !_a; _d = true) {
491
+ _c = body_1_1.value;
492
+ _d = false;
493
+ const chunk = _c;
494
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
495
+ }
496
+ }
497
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
498
+ finally {
499
+ try {
500
+ if (!_d && !_a && (_b = body_1.return)) await _b.call(body_1);
501
+ }
502
+ finally { if (e_1) throw e_1.error; }
503
+ }
504
+ return Buffer.concat(chunks);
505
+ }
222
506
  async createDocumentEntity(document) {
223
507
  try {
224
508
  // Build the Document entity attributes
@@ -253,135 +537,46 @@ class DocumentFetcherService {
253
537
  .replace(/\t/g, '\\t');
254
538
  }
255
539
  inferFormat(url) {
540
+ // Handle document-service URLs
541
+ if (url.startsWith('document-service://')) {
542
+ return 'txt';
543
+ }
256
544
  const parts = url.split('.');
257
545
  if (parts.length > 1) {
258
546
  return parts[parts.length - 1].toLowerCase();
259
547
  }
260
548
  return 'txt';
261
549
  }
262
- clearCache(title) {
263
- if (title) {
264
- // Note: TtlCache doesn't expose keys, clear all for now
265
- this.documentCache.clear();
266
- }
267
- else {
268
- this.documentCache.clear();
269
- }
270
- }
271
- normalizeConfigValue(value) {
272
- if (value instanceof Map) {
273
- const obj = {};
274
- value.forEach((v, k) => {
275
- obj[k] = this.normalizeConfigValue(v);
276
- });
277
- return obj;
278
- }
279
- if (Array.isArray(value)) {
280
- return value.map(v => this.normalizeConfigValue(v));
281
- }
282
- if (value && typeof value === 'object') {
283
- const obj = {};
284
- Object.entries(value).forEach(([k, v]) => {
285
- obj[k] = this.normalizeConfigValue(v);
286
- });
287
- return obj;
288
- }
289
- return value;
290
- }
291
- normalizeRetrievalConfig(retrievalConfig) {
292
- if (!retrievalConfig)
293
- return undefined;
294
- const normalized = this.normalizeConfigValue(retrievalConfig);
295
- if (normalized && typeof normalized === 'object') {
296
- preprocessRawConfig(normalized);
297
- }
298
- return normalized;
550
+ findDocumentInConfig(title) {
551
+ // This method should be called during config loading
552
+ // The documents are stored when the config is parsed
553
+ const docs = getConfiguredDocuments();
554
+ return docs.find(d => d.title === title) || null;
299
555
  }
300
556
  ensureNodeEnv() {
301
557
  if (!isNodeEnv) {
302
558
  throw new Error('Document fetching is only available in Node.js environment');
303
559
  }
304
560
  }
305
- async readS3BodyToBuffer(body) {
306
- var _a, e_1, _b, _c;
307
- if (body.transformToByteArray) {
308
- const bytes = await body.transformToByteArray();
309
- return Buffer.from(bytes);
310
- }
311
- if (body.transformToString) {
312
- const text = await body.transformToString('utf-8');
313
- return Buffer.from(text, 'utf-8');
314
- }
315
- const chunks = [];
316
- try {
317
- for (var _d = true, body_1 = __asyncValues(body), body_1_1; body_1_1 = await body_1.next(), _a = body_1_1.done, !_a; _d = true) {
318
- _c = body_1_1.value;
319
- _d = false;
320
- const chunk = _c;
321
- chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
322
- }
323
- }
324
- catch (e_1_1) { e_1 = { error: e_1_1 }; }
325
- finally {
326
- try {
327
- if (!_d && !_a && (_b = body_1.return)) await _b.call(body_1);
328
- }
329
- finally { if (e_1) throw e_1.error; }
330
- }
331
- return Buffer.concat(chunks);
332
- }
333
- async getPdfParser() {
334
- if (!this.pdfParser) {
335
- const pdfModule = await import('pdf-parse');
336
- this.pdfParser = pdfModule.PDFParse || pdfModule.default;
337
- }
338
- return this.pdfParser;
339
- }
340
- async parsePdfBuffer(buffer) {
341
- try {
342
- const PDFParseClass = await this.getPdfParser();
343
- const parser = new PDFParseClass({
344
- data: buffer,
345
- verbosity: 0,
346
- });
347
- const data = await parser.getText();
348
- return data.text;
349
- }
350
- catch (error) {
351
- logger.error(`Failed to parse PDF: ${error.message}`);
352
- throw new Error(`PDF parsing failed: ${error.message}`);
353
- }
354
- }
355
- parseMarkdownText(markdown) {
356
- const html = marked.parse(markdown);
357
- if (typeof html !== 'string') {
358
- return markdown;
359
- }
360
- return html
361
- .replace(/<\s*br\s*\/?>/gi, '\n')
362
- .replace(/<\/(p|li|h[1-6]|blockquote|pre|tr|table)>/gi, '\n')
363
- .replace(/<[^>]+>/g, '')
364
- .replace(/\n{3,}/g, '\n\n')
365
- .trim();
561
+ clearCache() {
562
+ // Clear all cache
563
+ this.documentCache.clear();
366
564
  }
367
565
  }
368
566
  DocumentFetcherService.CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
369
- // Store configured documents from config.al
370
- let configuredDocuments = [];
371
- export function registerConfiguredDocument(doc) {
372
- // Check if already registered
373
- const existing = configuredDocuments.find(d => d.title === doc.title);
374
- if (!existing) {
375
- configuredDocuments.push(doc);
376
- logger.debug('Registered configured document', { title: doc.title, url: doc.url });
377
- }
378
- }
379
- export function getConfiguredDocuments() {
380
- return [...configuredDocuments];
567
+ // Singleton instance
568
+ const documentFetcher = new DocumentFetcherService();
569
+ // Helper function to get configured documents from module config
570
+ function getConfiguredDocuments() {
571
+ // This should be populated during config parsing
572
+ // For now, return empty array - actual implementation depends on how
573
+ // the config system stores document definitions
574
+ return global.__configuredDocuments || [];
381
575
  }
382
- export function clearConfiguredDocuments() {
383
- configuredDocuments = [];
576
+ // Export for use in config loading
577
+ export function setConfiguredDocuments(docs) {
578
+ global.__configuredDocuments = docs;
384
579
  }
385
- export const documentFetcher = new DocumentFetcherService();
580
+ export { documentFetcher };
386
581
  export default documentFetcher;
387
582
  //# sourceMappingURL=documentFetcher.js.map