agent-orcha 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +777 -100
- package/dist/lib/agents/agent-executor.d.ts +6 -1
- package/dist/lib/agents/agent-executor.d.ts.map +1 -1
- package/dist/lib/agents/agent-executor.js +241 -45
- package/dist/lib/agents/agent-executor.js.map +1 -1
- package/dist/lib/agents/agent-loader.d.ts.map +1 -1
- package/dist/lib/agents/agent-loader.js +3 -1
- package/dist/lib/agents/agent-loader.js.map +1 -1
- package/dist/lib/agents/index.d.ts +2 -1
- package/dist/lib/agents/index.d.ts.map +1 -1
- package/dist/lib/agents/index.js +1 -0
- package/dist/lib/agents/index.js.map +1 -1
- package/dist/lib/agents/structured-output-wrapper.d.ts +19 -0
- package/dist/lib/agents/structured-output-wrapper.d.ts.map +1 -0
- package/dist/lib/agents/structured-output-wrapper.js +104 -0
- package/dist/lib/agents/structured-output-wrapper.js.map +1 -0
- package/dist/lib/agents/types.d.ts +17 -10
- package/dist/lib/agents/types.d.ts.map +1 -1
- package/dist/lib/agents/types.js +1 -1
- package/dist/lib/agents/types.js.map +1 -1
- package/dist/lib/index.d.ts +9 -7
- package/dist/lib/index.d.ts.map +1 -1
- package/dist/lib/index.js +7 -5
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/knowledge/graph-rag/community-detector.d.ts +16 -0
- package/dist/lib/knowledge/graph-rag/community-detector.d.ts.map +1 -0
- package/dist/lib/knowledge/graph-rag/community-detector.js +81 -0
- package/dist/lib/knowledge/graph-rag/community-detector.js.map +1 -0
- package/dist/lib/knowledge/graph-rag/community-summarizer.d.ts +17 -0
- package/dist/lib/knowledge/graph-rag/community-summarizer.d.ts.map +1 -0
- package/dist/lib/knowledge/graph-rag/community-summarizer.js +87 -0
- package/dist/lib/knowledge/graph-rag/community-summarizer.js.map +1 -0
- package/dist/lib/knowledge/graph-rag/entity-extractor.d.ts +36 -0
- package/dist/lib/knowledge/graph-rag/entity-extractor.d.ts.map +1 -0
- package/dist/lib/knowledge/graph-rag/entity-extractor.js +192 -0
- package/dist/lib/knowledge/graph-rag/entity-extractor.js.map +1 -0
- package/dist/lib/knowledge/graph-rag/extraction-cache.d.ts +30 -0
- package/dist/lib/knowledge/graph-rag/extraction-cache.d.ts.map +1 -0
- package/dist/lib/knowledge/graph-rag/extraction-cache.js +88 -0
- package/dist/lib/knowledge/graph-rag/extraction-cache.js.map +1 -0
- package/dist/lib/knowledge/graph-rag/global-search.d.ts +19 -0
- package/dist/lib/knowledge/graph-rag/global-search.d.ts.map +1 -0
- package/dist/lib/knowledge/graph-rag/global-search.js +96 -0
- package/dist/lib/knowledge/graph-rag/global-search.js.map +1 -0
- package/dist/lib/knowledge/graph-rag/graph-rag-factory.d.ts +24 -0
- package/dist/lib/knowledge/graph-rag/graph-rag-factory.d.ts.map +1 -0
- package/dist/lib/knowledge/graph-rag/graph-rag-factory.js +239 -0
- package/dist/lib/knowledge/graph-rag/graph-rag-factory.js.map +1 -0
- package/dist/lib/knowledge/graph-rag/index.d.ts +14 -0
- package/dist/lib/knowledge/graph-rag/index.d.ts.map +1 -0
- package/dist/lib/knowledge/graph-rag/index.js +12 -0
- package/dist/lib/knowledge/graph-rag/index.js.map +1 -0
- package/dist/lib/knowledge/graph-rag/local-search.d.ts +20 -0
- package/dist/lib/knowledge/graph-rag/local-search.d.ts.map +1 -0
- package/dist/lib/knowledge/graph-rag/local-search.js +110 -0
- package/dist/lib/knowledge/graph-rag/local-search.js.map +1 -0
- package/dist/lib/knowledge/graph-rag/memory-graph-store.d.ts +31 -0
- package/dist/lib/knowledge/graph-rag/memory-graph-store.d.ts.map +1 -0
- package/dist/lib/knowledge/graph-rag/memory-graph-store.js +165 -0
- package/dist/lib/knowledge/graph-rag/memory-graph-store.js.map +1 -0
- package/dist/lib/knowledge/graph-rag/neo4j-graph-store.d.ts +38 -0
- package/dist/lib/knowledge/graph-rag/neo4j-graph-store.d.ts.map +1 -0
- package/dist/lib/knowledge/graph-rag/neo4j-graph-store.js +190 -0
- package/dist/lib/knowledge/graph-rag/neo4j-graph-store.js.map +1 -0
- package/dist/lib/knowledge/graph-rag/search-mode-detector.d.ts +11 -0
- package/dist/lib/knowledge/graph-rag/search-mode-detector.d.ts.map +1 -0
- package/dist/lib/knowledge/graph-rag/search-mode-detector.js +50 -0
- package/dist/lib/knowledge/graph-rag/search-mode-detector.js.map +1 -0
- package/dist/lib/knowledge/graph-rag/types.d.ts +368 -0
- package/dist/lib/knowledge/graph-rag/types.d.ts.map +1 -0
- package/dist/lib/knowledge/graph-rag/types.js +48 -0
- package/dist/lib/knowledge/graph-rag/types.js.map +1 -0
- package/dist/lib/knowledge/index.d.ts +9 -0
- package/dist/lib/knowledge/index.d.ts.map +1 -0
- package/dist/lib/knowledge/index.js +8 -0
- package/dist/lib/knowledge/index.js.map +1 -0
- package/dist/lib/knowledge/knowledge-store-factory.d.ts +16 -0
- package/dist/lib/knowledge/knowledge-store-factory.d.ts.map +1 -0
- package/dist/lib/{vectors/vector-store-factory.js → knowledge/knowledge-store-factory.js} +36 -10
- package/dist/lib/knowledge/knowledge-store-factory.js.map +1 -0
- package/dist/lib/knowledge/knowledge-store-manager.d.ts +18 -0
- package/dist/lib/knowledge/knowledge-store-manager.d.ts.map +1 -0
- package/dist/lib/knowledge/knowledge-store-manager.js +98 -0
- package/dist/lib/knowledge/knowledge-store-manager.js.map +1 -0
- package/dist/lib/knowledge/loaders/database-loader.d.ts +18 -0
- package/dist/lib/knowledge/loaders/database-loader.d.ts.map +1 -0
- package/dist/lib/knowledge/loaders/database-loader.js +115 -0
- package/dist/lib/knowledge/loaders/database-loader.js.map +1 -0
- package/dist/lib/knowledge/loaders/index.d.ts +4 -0
- package/dist/lib/knowledge/loaders/index.d.ts.map +1 -0
- package/dist/lib/knowledge/loaders/index.js +4 -0
- package/dist/lib/knowledge/loaders/index.js.map +1 -0
- package/dist/lib/knowledge/loaders/s3-loader.d.ts +17 -0
- package/dist/lib/knowledge/loaders/s3-loader.d.ts.map +1 -0
- package/dist/lib/knowledge/loaders/s3-loader.js +185 -0
- package/dist/lib/knowledge/loaders/s3-loader.js.map +1 -0
- package/dist/lib/knowledge/loaders/web-loader.d.ts +12 -0
- package/dist/lib/knowledge/loaders/web-loader.d.ts.map +1 -0
- package/dist/lib/knowledge/loaders/web-loader.js +56 -0
- package/dist/lib/knowledge/loaders/web-loader.js.map +1 -0
- package/dist/lib/knowledge/types.d.ts +1839 -0
- package/dist/lib/knowledge/types.d.ts.map +1 -0
- package/dist/lib/knowledge/types.js +111 -0
- package/dist/lib/knowledge/types.js.map +1 -0
- package/dist/lib/knowledge/utils/connection-pool.d.ts +18 -0
- package/dist/lib/knowledge/utils/connection-pool.d.ts.map +1 -0
- package/dist/lib/knowledge/utils/connection-pool.js +77 -0
- package/dist/lib/knowledge/utils/connection-pool.js.map +1 -0
- package/dist/lib/knowledge/utils/file-type-detector.d.ts +10 -0
- package/dist/lib/knowledge/utils/file-type-detector.d.ts.map +1 -0
- package/dist/lib/knowledge/utils/file-type-detector.js +32 -0
- package/dist/lib/knowledge/utils/file-type-detector.js.map +1 -0
- package/dist/lib/knowledge/utils/index.d.ts +3 -0
- package/dist/lib/knowledge/utils/index.d.ts.map +1 -0
- package/dist/lib/knowledge/utils/index.js +3 -0
- package/dist/lib/knowledge/utils/index.js.map +1 -0
- package/dist/lib/mcp/mcp-client.d.ts +9 -1
- package/dist/lib/mcp/mcp-client.d.ts.map +1 -1
- package/dist/lib/mcp/mcp-client.js +33 -0
- package/dist/lib/mcp/mcp-client.js.map +1 -1
- package/dist/lib/memory/conversation-store.d.ts +43 -0
- package/dist/lib/memory/conversation-store.d.ts.map +1 -0
- package/dist/lib/memory/conversation-store.js +109 -0
- package/dist/lib/memory/conversation-store.js.map +1 -0
- package/dist/lib/memory/index.d.ts +3 -0
- package/dist/lib/memory/index.d.ts.map +1 -0
- package/dist/lib/memory/index.js +3 -0
- package/dist/lib/memory/index.js.map +1 -0
- package/dist/lib/memory/types.d.ts +19 -0
- package/dist/lib/memory/types.d.ts.map +1 -0
- package/dist/lib/memory/types.js +6 -0
- package/dist/lib/memory/types.js.map +1 -0
- package/dist/lib/orchestrator.d.ts +56 -14
- package/dist/lib/orchestrator.d.ts.map +1 -1
- package/dist/lib/orchestrator.js +182 -25
- package/dist/lib/orchestrator.js.map +1 -1
- package/dist/lib/tools/agent-tool-wrapper.d.ts +22 -0
- package/dist/lib/tools/agent-tool-wrapper.d.ts.map +1 -0
- package/dist/lib/tools/agent-tool-wrapper.js +56 -0
- package/dist/lib/tools/agent-tool-wrapper.js.map +1 -0
- package/dist/lib/tools/built-in/ask-user.tool.d.ts +7 -0
- package/dist/lib/tools/built-in/ask-user.tool.d.ts.map +1 -0
- package/dist/lib/tools/built-in/ask-user.tool.js +23 -0
- package/dist/lib/tools/built-in/ask-user.tool.js.map +1 -0
- package/dist/lib/tools/built-in/index.d.ts +2 -1
- package/dist/lib/tools/built-in/index.d.ts.map +1 -1
- package/dist/lib/tools/built-in/index.js +2 -1
- package/dist/lib/tools/built-in/index.js.map +1 -1
- package/dist/lib/tools/built-in/knowledge-search.tool.d.ts +4 -0
- package/dist/lib/tools/built-in/knowledge-search.tool.d.ts.map +1 -0
- package/dist/lib/tools/built-in/{vector-search.tool.js → knowledge-search.tool.js} +4 -4
- package/dist/lib/tools/built-in/knowledge-search.tool.js.map +1 -0
- package/dist/lib/tools/index.d.ts +3 -1
- package/dist/lib/tools/index.d.ts.map +1 -1
- package/dist/lib/tools/index.js +3 -1
- package/dist/lib/tools/index.js.map +1 -1
- package/dist/lib/tools/tool-discovery.d.ts +50 -0
- package/dist/lib/tools/tool-discovery.d.ts.map +1 -0
- package/dist/lib/tools/tool-discovery.js +178 -0
- package/dist/lib/tools/tool-discovery.js.map +1 -0
- package/dist/lib/tools/tool-registry.d.ts +19 -3
- package/dist/lib/tools/tool-registry.d.ts.map +1 -1
- package/dist/lib/tools/tool-registry.js +63 -10
- package/dist/lib/tools/tool-registry.js.map +1 -1
- package/dist/lib/workflows/index.d.ts +4 -2
- package/dist/lib/workflows/index.d.ts.map +1 -1
- package/dist/lib/workflows/index.js +3 -1
- package/dist/lib/workflows/index.js.map +1 -1
- package/dist/lib/workflows/interrupt-manager.d.ts +42 -0
- package/dist/lib/workflows/interrupt-manager.d.ts.map +1 -0
- package/dist/lib/workflows/interrupt-manager.js +102 -0
- package/dist/lib/workflows/interrupt-manager.js.map +1 -0
- package/dist/lib/workflows/langgraph-executor.d.ts +51 -0
- package/dist/lib/workflows/langgraph-executor.d.ts.map +1 -0
- package/dist/lib/workflows/langgraph-executor.js +297 -0
- package/dist/lib/workflows/langgraph-executor.js.map +1 -0
- package/dist/lib/workflows/types.d.ts +911 -34
- package/dist/lib/workflows/types.d.ts.map +1 -1
- package/dist/lib/workflows/types.js +51 -2
- package/dist/lib/workflows/types.js.map +1 -1
- package/dist/lib/workflows/workflow-executor.d.ts.map +1 -1
- package/dist/lib/workflows/workflow-executor.js +4 -0
- package/dist/lib/workflows/workflow-executor.js.map +1 -1
- package/dist/lib/workflows/workflow-loader.d.ts.map +1 -1
- package/dist/lib/workflows/workflow-loader.js +3 -1
- package/dist/lib/workflows/workflow-loader.js.map +1 -1
- package/dist/public/index.html +133 -700
- package/dist/public/src/components/AgentsView.js +763 -0
- package/dist/public/src/components/AppRoot.js +76 -0
- package/dist/public/src/components/IdeView.js +330 -0
- package/dist/public/src/components/KnowledgeView.js +133 -0
- package/dist/public/src/components/LlmView.js +127 -0
- package/dist/public/src/components/McpView.js +387 -0
- package/dist/public/src/components/NavBar.js +71 -0
- package/dist/public/src/components/WorkflowsView.js +243 -0
- package/dist/public/src/main.js +9 -0
- package/dist/public/src/services/ApiService.js +142 -0
- package/dist/public/src/store.js +41 -0
- package/dist/public/src/utils/Component.js +23 -0
- package/dist/public/src/utils/markdown.js +82 -0
- package/dist/src/cli/commands/start.d.ts.map +1 -1
- package/dist/src/cli/commands/start.js +3 -2
- package/dist/src/cli/commands/start.js.map +1 -1
- package/dist/src/index.js +46 -12
- package/dist/src/index.js.map +1 -1
- package/dist/src/routes/agents.route.d.ts.map +1 -1
- package/dist/src/routes/agents.route.js +38 -5
- package/dist/src/routes/agents.route.js.map +1 -1
- package/dist/src/routes/files.route.d.ts +3 -0
- package/dist/src/routes/files.route.d.ts.map +1 -0
- package/dist/src/routes/files.route.js +160 -0
- package/dist/src/routes/files.route.js.map +1 -0
- package/dist/src/routes/functions.route.d.ts +3 -0
- package/dist/src/routes/functions.route.d.ts.map +1 -0
- package/dist/src/routes/functions.route.js +83 -0
- package/dist/src/routes/functions.route.js.map +1 -0
- package/dist/src/routes/knowledge.route.d.ts +3 -0
- package/dist/src/routes/knowledge.route.d.ts.map +1 -0
- package/dist/src/routes/knowledge.route.js +153 -0
- package/dist/src/routes/knowledge.route.js.map +1 -0
- package/dist/src/routes/mcp.route.d.ts +3 -0
- package/dist/src/routes/mcp.route.d.ts.map +1 -0
- package/dist/src/routes/mcp.route.js +79 -0
- package/dist/src/routes/mcp.route.js.map +1 -0
- package/dist/src/routes/workflows.route.d.ts.map +1 -1
- package/dist/src/routes/workflows.route.js +2 -1
- package/dist/src/routes/workflows.route.js.map +1 -1
- package/dist/src/server.d.ts.map +1 -1
- package/dist/src/server.js +8 -2
- package/dist/src/server.js.map +1 -1
- package/dist/templates/.env.example +21 -0
- package/dist/templates/README.md +43 -152
- package/dist/templates/agents/call-center-analyst-simple.agent.yaml +36 -0
- package/dist/templates/agents/math.agent.yaml +4 -14
- package/dist/templates/agents/sentiment-structured.agent.yaml +42 -0
- package/dist/templates/functions/calculator.function.js +69 -0
- package/dist/templates/functions/text-formatter.function.js +66 -0
- package/dist/templates/{vectors/example.vector.yaml → knowledge/example.knowledge.yaml} +1 -1
- package/dist/templates/knowledge/transcripts/call-001.txt +40 -0
- package/dist/templates/knowledge/transcripts/call-002.txt +36 -0
- package/dist/templates/knowledge/transcripts/call-003.txt +42 -0
- package/dist/templates/llm.md +1195 -0
- package/dist/templates/workflows/example.workflow.yaml +8 -19
- package/dist/templates/workflows/langgraph-example.workflow.yaml +84 -0
- package/package.json +19 -10
- package/dist/lib/tools/built-in/vector-search.tool.d.ts +0 -4
- package/dist/lib/tools/built-in/vector-search.tool.d.ts.map +0 -1
- package/dist/lib/tools/built-in/vector-search.tool.js.map +0 -1
- package/dist/lib/vectors/index.d.ts +0 -5
- package/dist/lib/vectors/index.d.ts.map +0 -1
- package/dist/lib/vectors/index.js +0 -4
- package/dist/lib/vectors/index.js.map +0 -1
- package/dist/lib/vectors/types.d.ts +0 -212
- package/dist/lib/vectors/types.d.ts.map +0 -1
- package/dist/lib/vectors/types.js +0 -39
- package/dist/lib/vectors/types.js.map +0 -1
- package/dist/lib/vectors/vector-store-factory.d.ts +0 -14
- package/dist/lib/vectors/vector-store-factory.d.ts.map +0 -1
- package/dist/lib/vectors/vector-store-factory.js.map +0 -1
- package/dist/lib/vectors/vector-store-manager.d.ts +0 -18
- package/dist/lib/vectors/vector-store-manager.d.ts.map +0 -1
- package/dist/lib/vectors/vector-store-manager.js +0 -79
- package/dist/lib/vectors/vector-store-manager.js.map +0 -1
- package/dist/src/routes/vectors.route.d.ts +0 -3
- package/dist/src/routes/vectors.route.d.ts.map +0 -1
- package/dist/src/routes/vectors.route.js +0 -74
- package/dist/src/routes/vectors.route.js.map +0 -1
- package/dist/templates/agents/example.agent.yaml +0 -32
- package/dist/templates/agents/knowledge.agent.yaml +0 -36
- package/dist/templates/agents/time.agent.yaml +0 -42
- package/dist/templates/functions/README.md +0 -195
- package/dist/templates/functions/fibonacci.function.js +0 -55
- package/dist/templates/vectors/sample-data/example-document.txt +0 -15
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import * as fs from 'fs/promises';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
import { glob } from 'glob';
|
|
4
|
+
import { parse as parseYaml } from 'yaml';
|
|
5
|
+
import { KnowledgeConfigSchema } from './types.js';
|
|
6
|
+
import { KnowledgeStoreFactory } from './knowledge-store-factory.js';
|
|
7
|
+
import { GraphRagFactory } from './graph-rag/graph-rag-factory.js';
|
|
8
|
+
import { createLogger } from '../logger.js';
|
|
9
|
+
const logger = createLogger('KnowledgeStore');
|
|
10
|
+
export class KnowledgeStoreManager {
|
|
11
|
+
knowledgeDir;
|
|
12
|
+
projectRoot;
|
|
13
|
+
stores = new Map();
|
|
14
|
+
configs = new Map();
|
|
15
|
+
constructor(knowledgeDir, projectRoot) {
|
|
16
|
+
this.knowledgeDir = knowledgeDir;
|
|
17
|
+
this.projectRoot = projectRoot;
|
|
18
|
+
}
|
|
19
|
+
async loadAll() {
|
|
20
|
+
const files = await glob('**/*.knowledge.yaml', { cwd: this.knowledgeDir });
|
|
21
|
+
for (const file of files) {
|
|
22
|
+
const filePath = path.join(this.knowledgeDir, file);
|
|
23
|
+
await this.loadOne(filePath);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
async loadOne(filePath) {
|
|
27
|
+
const content = await fs.readFile(filePath, 'utf-8');
|
|
28
|
+
const parsed = parseYaml(content);
|
|
29
|
+
const config = KnowledgeConfigSchema.parse(parsed);
|
|
30
|
+
this.configs.set(config.name, config);
|
|
31
|
+
return config;
|
|
32
|
+
}
|
|
33
|
+
async initialize(name) {
|
|
34
|
+
const existing = this.stores.get(name);
|
|
35
|
+
if (existing) {
|
|
36
|
+
logger.info(`"${name}" already initialized`);
|
|
37
|
+
return existing;
|
|
38
|
+
}
|
|
39
|
+
const config = this.configs.get(name);
|
|
40
|
+
if (!config) {
|
|
41
|
+
throw new Error(`Knowledge config not found: ${name}`);
|
|
42
|
+
}
|
|
43
|
+
logger.info(`Initializing "${name}" (kind: ${config.kind})...`);
|
|
44
|
+
// Log source-specific info
|
|
45
|
+
if (config.source.type === 'directory' || config.source.type === 'file') {
|
|
46
|
+
logger.info(`Source: ${config.source.path}, Pattern: ${'pattern' in config.source ? config.source.pattern || '*' : 'N/A'}`);
|
|
47
|
+
}
|
|
48
|
+
else if (config.source.type === 'database') {
|
|
49
|
+
logger.info(`Source: database (${config.source.connectionString.split('@')[1] || 'unknown'})`);
|
|
50
|
+
}
|
|
51
|
+
else if (config.source.type === 'web') {
|
|
52
|
+
logger.info(`Source: web (${config.source.url})`);
|
|
53
|
+
}
|
|
54
|
+
else if (config.source.type === 's3') {
|
|
55
|
+
logger.info(`Source: s3 (bucket: ${config.source.bucket})`);
|
|
56
|
+
}
|
|
57
|
+
try {
|
|
58
|
+
let store;
|
|
59
|
+
if (config.kind === 'graph-rag') {
|
|
60
|
+
store = await GraphRagFactory.create(config, this.projectRoot);
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
store = await KnowledgeStoreFactory.create(config, this.projectRoot);
|
|
64
|
+
}
|
|
65
|
+
this.stores.set(name, store);
|
|
66
|
+
logger.info(`"${name}" initialized successfully`);
|
|
67
|
+
return store;
|
|
68
|
+
}
|
|
69
|
+
catch (error) {
|
|
70
|
+
logger.error(`Failed to initialize "${name}":`, error);
|
|
71
|
+
throw error;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
async initializeAll() {
|
|
75
|
+
for (const name of this.configs.keys()) {
|
|
76
|
+
await this.initialize(name);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
get(name) {
|
|
80
|
+
return this.stores.get(name);
|
|
81
|
+
}
|
|
82
|
+
getConfig(name) {
|
|
83
|
+
return this.configs.get(name);
|
|
84
|
+
}
|
|
85
|
+
list() {
|
|
86
|
+
return Array.from(this.stores.values());
|
|
87
|
+
}
|
|
88
|
+
listConfigs() {
|
|
89
|
+
return Array.from(this.configs.values());
|
|
90
|
+
}
|
|
91
|
+
async refresh(name) {
|
|
92
|
+
const store = this.stores.get(name);
|
|
93
|
+
if (store) {
|
|
94
|
+
await store.refresh();
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
//# sourceMappingURL=knowledge-store-manager.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"knowledge-store-manager.js","sourceRoot":"","sources":["../../../lib/knowledge/knowledge-store-manager.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,aAAa,CAAC;AAClC,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAC1C,OAAO,EAAE,qBAAqB,EAA+G,MAAM,YAAY,CAAC;AAChK,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,eAAe,EAAE,MAAM,kCAAkC,CAAC;AACnE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,MAAM,GAAG,YAAY,CAAC,gBAAgB,CAAC,CAAC;AAE9C,MAAM,OAAO,qBAAqB;IACxB,YAAY,CAAS;IACrB,WAAW,CAAS;IACpB,MAAM,GAAwC,IAAI,GAAG,EAAE,CAAC;IACxD,OAAO,GAAiC,IAAI,GAAG,EAAE,CAAC;IAE1D,YAAY,YAAoB,EAAE,WAAmB;QACnD,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,OAAO;QACX,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,qBAAqB,EAAE,EAAE,GAAG,EAAE,IAAI,CAAC,YAAY,EAAE,CAAC,CAAC;QAE5E,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,IAAI,CAAC,CAAC;YACpD,MAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,QAAgB;QAC5B,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,MAAM,GAAG,qBAAqB,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACnD,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QACtC,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,IAAY;QAC3B,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,CAAC,IAAI,CAAC,IAAI,IAAI,uBAAuB,CAAC,CAAC;YAC7C,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACtC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,+BAA+B,IAAI,EAAE,CAAC,CAAC;QACzD,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,iBAAiB,IAAI,YAAY,MAAM,CAAC,IAAI,MAAM,CAAC,CAAC;QAEhE,2BAA2B;QAC3B,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,KAAK,WAAW,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YACxE,MAAM,CAAC,IAAI,CAAC,WAAW,MAAM,CAAC,MAAM,CAAC,IAAI,cAAc,SAAS,IAAI,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QAC9H,CAAC;aAAM,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YAC7C,MAAM,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,MAAM,CAAC,gBAAgB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,SAAS,GAAG,CAAC,CAAC;QACjG,CAAC;aAAM,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;YACxC,MAAM,CAAC,IAAI,CAAC,gBAAgB,MAAM,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC;QACpD,CAAC;aAAM,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;YACvC,MAAM,CAAC,IAAI,CAAC,uBAAuB,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC;QAC9D,CAAC;QAED,IAAI,CAAC;YACH,IAAI,KAA6B,CAAC;YAElC,IAAI,MAAM,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBAChC,KAAK,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,MAAiC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YAC5F,CAAC;iBAAM,CAAC;gBACN,KAAK,GAAG,MAAM,qBAAqB,CAAC,MAAM,CAAC,MAA+B,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YAChG,CAAC;YAED,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC,IAAI,IAAI,4BAA4B,CAAC,CAAC;YAClD,OAAO,KAAK,CAAC;QACf,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,yBAAyB,IAAI,IAAI,EAAE,KAAK,CAAC,CAAC;YACvD,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,CAAC,aAAa;QACjB,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;YACvC,MAAM,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;IAED,GAAG,CAAC,IAAY;QACd,OAAO,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC/B,CAAC;IAED,SAAS,CAAC,IAAY;QACpB,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IAED,IAAI;QACF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1C,CAAC;IAED,WAAW;QACT,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC3C,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,IAAY;QACxB,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACpC,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,KAAK,CAAC,OAAO,EAAE,CAAC;QACxB,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { BaseDocumentLoader } from '@langchain/core/document_loaders/base';
|
|
2
|
+
import { Document } from '@langchain/core/documents';
|
|
3
|
+
import type { DatabaseSourceConfig } from '../types.js';
|
|
4
|
+
/**
|
|
5
|
+
* Database document loader for PostgreSQL and MySQL.
|
|
6
|
+
* Executes SQL queries and transforms rows into LangChain documents.
|
|
7
|
+
*/
|
|
8
|
+
export declare class DatabaseLoader extends BaseDocumentLoader {
|
|
9
|
+
private config;
|
|
10
|
+
constructor(config: DatabaseSourceConfig);
|
|
11
|
+
load(): Promise<Document[]>;
|
|
12
|
+
private loadFromPostgres;
|
|
13
|
+
private loadFromMysql;
|
|
14
|
+
private processPostgresBatch;
|
|
15
|
+
private processMysqlBatch;
|
|
16
|
+
private rowToDocument;
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=database-loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"database-loader.d.ts","sourceRoot":"","sources":["../../../../lib/knowledge/loaders/database-loader.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,uCAAuC,CAAC;AAC3E,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AAIrD,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAKxD;;;GAGG;AACH,qBAAa,cAAe,SAAQ,kBAAkB;IACpD,OAAO,CAAC,MAAM,CAAuB;gBAEzB,MAAM,EAAE,oBAAoB;IAKlC,IAAI,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YA2BnB,gBAAgB;YA+BhB,aAAa;IAgC3B,OAAO,CAAC,oBAAoB;IAc5B,OAAO,CAAC,iBAAiB;IAazB,OAAO,CAAC,aAAa;CA+BtB"}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import { BaseDocumentLoader } from '@langchain/core/document_loaders/base';
|
|
2
|
+
import { Document } from '@langchain/core/documents';
|
|
3
|
+
import { getPool, getDatabaseType } from '../utils/connection-pool.js';
|
|
4
|
+
import { createLogger } from '../../logger.js';
|
|
5
|
+
const logger = createLogger('DatabaseLoader');
|
|
6
|
+
/**
|
|
7
|
+
* Database document loader for PostgreSQL and MySQL.
|
|
8
|
+
* Executes SQL queries and transforms rows into LangChain documents.
|
|
9
|
+
*/
|
|
10
|
+
export class DatabaseLoader extends BaseDocumentLoader {
|
|
11
|
+
config;
|
|
12
|
+
constructor(config) {
|
|
13
|
+
super();
|
|
14
|
+
this.config = config;
|
|
15
|
+
}
|
|
16
|
+
async load() {
|
|
17
|
+
const { connectionString, query, contentColumn, metadataColumns, batchSize } = this.config;
|
|
18
|
+
logger.info(`Loading documents from database`);
|
|
19
|
+
logger.info(`Query: ${query.substring(0, 100)}${query.length > 100 ? '...' : ''}`);
|
|
20
|
+
const dbType = getDatabaseType(connectionString);
|
|
21
|
+
const pool = getPool(connectionString);
|
|
22
|
+
const documents = [];
|
|
23
|
+
try {
|
|
24
|
+
if (dbType === 'postgresql') {
|
|
25
|
+
await this.loadFromPostgres(pool, query, contentColumn, metadataColumns, batchSize, documents);
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
await this.loadFromMysql(pool, query, contentColumn, metadataColumns, batchSize, documents);
|
|
29
|
+
}
|
|
30
|
+
logger.info(`Loaded ${documents.length} document(s) from database`);
|
|
31
|
+
return documents;
|
|
32
|
+
}
|
|
33
|
+
catch (error) {
|
|
34
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
35
|
+
logger.error(`Database query failed: ${errorMessage}`);
|
|
36
|
+
throw new Error(`Failed to load documents from database: ${errorMessage}`);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
async loadFromPostgres(pool, query, contentColumn, metadataColumns, batchSize, documents) {
|
|
40
|
+
const client = await pool.connect();
|
|
41
|
+
try {
|
|
42
|
+
logger.info(`Executing PostgreSQL query`);
|
|
43
|
+
// Execute query and get all results
|
|
44
|
+
const result = await client.query(query);
|
|
45
|
+
const rows = result.rows;
|
|
46
|
+
const fields = result.fields;
|
|
47
|
+
logger.info(`Fetched ${rows.length} row(s)`);
|
|
48
|
+
// Process in batches
|
|
49
|
+
for (let i = 0; i < rows.length; i += batchSize) {
|
|
50
|
+
const batch = rows.slice(i, i + batchSize);
|
|
51
|
+
this.processPostgresBatch(batch, fields, contentColumn, metadataColumns, documents);
|
|
52
|
+
logger.info(`Processed ${Math.min(i + batchSize, rows.length)} / ${rows.length} row(s)`);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
finally {
|
|
56
|
+
client.release();
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
async loadFromMysql(pool, query, contentColumn, metadataColumns, batchSize, documents) {
|
|
60
|
+
const connection = await pool.getConnection();
|
|
61
|
+
try {
|
|
62
|
+
logger.info(`Executing MySQL query with batch size ${batchSize}`);
|
|
63
|
+
const [rows, fields] = await connection.query(query);
|
|
64
|
+
if (!Array.isArray(rows)) {
|
|
65
|
+
throw new Error('Query did not return rows');
|
|
66
|
+
}
|
|
67
|
+
logger.info(`Fetched ${rows.length} row(s)`);
|
|
68
|
+
// Process in batches
|
|
69
|
+
for (let i = 0; i < rows.length; i += batchSize) {
|
|
70
|
+
const batch = rows.slice(i, i + batchSize);
|
|
71
|
+
this.processMysqlBatch(batch, fields, contentColumn, metadataColumns, documents);
|
|
72
|
+
logger.info(`Processed ${Math.min(i + batchSize, rows.length)} / ${rows.length} row(s)`);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
finally {
|
|
76
|
+
connection.release();
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
processPostgresBatch(batch, _fields, contentColumn, metadataColumns, documents) {
|
|
80
|
+
for (const row of batch) {
|
|
81
|
+
// PostgreSQL returns rows as objects, not arrays
|
|
82
|
+
const doc = this.rowToDocument(row, contentColumn, metadataColumns);
|
|
83
|
+
documents.push(doc);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
processMysqlBatch(batch, _fields, contentColumn, metadataColumns, documents) {
|
|
87
|
+
for (const row of batch) {
|
|
88
|
+
const doc = this.rowToDocument(row, contentColumn, metadataColumns);
|
|
89
|
+
documents.push(doc);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
rowToDocument(row, contentColumn, metadataColumns) {
|
|
93
|
+
// Extract content
|
|
94
|
+
const content = row[contentColumn];
|
|
95
|
+
if (content === null || content === undefined) {
|
|
96
|
+
throw new Error(`Content column "${contentColumn}" not found or is null in row`);
|
|
97
|
+
}
|
|
98
|
+
// Convert content to string if needed
|
|
99
|
+
const pageContent = typeof content === 'string' ? content : String(content);
|
|
100
|
+
// Extract metadata from specified columns
|
|
101
|
+
const metadata = {};
|
|
102
|
+
if (metadataColumns && metadataColumns.length > 0) {
|
|
103
|
+
for (const column of metadataColumns) {
|
|
104
|
+
if (column in row) {
|
|
105
|
+
metadata[column] = row[column];
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return new Document({
|
|
110
|
+
pageContent,
|
|
111
|
+
metadata,
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
//# sourceMappingURL=database-loader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"database-loader.js","sourceRoot":"","sources":["../../../../lib/knowledge/loaders/database-loader.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,uCAAuC,CAAC;AAC3E,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AAGrD,OAAO,EAAE,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAEvE,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAE/C,MAAM,MAAM,GAAG,YAAY,CAAC,gBAAgB,CAAC,CAAC;AAE9C;;;GAGG;AACH,MAAM,OAAO,cAAe,SAAQ,kBAAkB;IAC5C,MAAM,CAAuB;IAErC,YAAY,MAA4B;QACtC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,EAAE,gBAAgB,EAAE,KAAK,EAAE,aAAa,EAAE,eAAe,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC;QAE3F,MAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QAC/C,MAAM,CAAC,IAAI,CAAC,UAAU,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAEnF,MAAM,MAAM,GAAG,eAAe,CAAC,gBAAgB,CAAC,CAAC;QACjD,MAAM,IAAI,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAAC;QAEvC,MAAM,SAAS,GAAe,EAAE,CAAC;QAEjC,IAAI,CAAC;YACH,IAAI,MAAM,KAAK,YAAY,EAAE,CAAC;gBAC5B,MAAM,IAAI,CAAC,gBAAgB,CAAC,IAAc,EAAE,KAAK,EAAE,aAAa,EAAE,eAAe,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;YAC3G,CAAC;iBAAM,CAAC;gBACN,MAAM,IAAI,CAAC,aAAa,CAAC,IAAkB,EAAE,KAAK,EAAE,aAAa,EAAE,eAAe,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;YAC5G,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,UAAU,SAAS,CAAC,MAAM,4BAA4B,CAAC,CAAC;YACpE,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5E,MAAM,CAAC,KAAK,CAAC,0BAA0B,YAAY,EAAE,CAAC,CAAC;YACvD,MAAM,IAAI,KAAK,CAAC,2CAA2C,YAAY,EAAE,CAAC,CAAC;QAC7E,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,gBAAgB,CAC5B,IAAY,EACZ,KAAa,EACb,aAAqB,EACrB,eAAqC,EACrC,SAAiB,EACjB,SAAqB;QAErB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;QAEpC,IAAI,CAAC;YACH,MAAM,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;YAE1C,oCAAoC;YACpC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACzC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC;YACzB,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;YAE7B,MAAM,CAAC,IAAI,CAAC,WAAW,IAAI,CAAC,MAAM,SAAS,CAAC,CAAC;YAE7C,qBAAqB;YACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;gBAChD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;gBAC3C,IAAI,CAAC,oBAAoB,CAAC,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,eAAe,EAAE,SAAS,CAAC,CAAC;gBACpF,MAAM,CAAC,IAAI,CAAC,aAAa,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,CAAC,MAAM,SAAS,CAAC,CAAC;YAC3F,CAAC;QACH,CAAC;gBAAS,CAAC;YACT,MAAM,CAAC,OAAO,EAAE,CAAC;QACnB,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,aAAa,CACzB,IAAgB,EAChB,KAAa,EACb,aAAqB,EACrB,eAAqC,EACrC,SAAiB,EACjB,SAAqB;QAErB,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAE9C,IAAI,CAAC;YACH,MAAM,CAAC,IAAI,CAAC,yCAAyC,SAAS,EAAE,CAAC,CAAC;YAElE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,GAAG,MAAM,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAErD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzB,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;YAC/C,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,WAAW,IAAI,CAAC,MAAM,SAAS,CAAC,CAAC;YAE7C,qBAAqB;YACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;gBAChD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;gBAC3C,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,eAAe,EAAE,SAAS,CAAC,CAAC;gBACjF,MAAM,CAAC,IAAI,CAAC,aAAa,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,CAAC,MAAM,SAAS,CAAC,CAAC;YAC3F,CAAC;QACH,CAAC;gBAAS,CAAC;YACT,UAAU,CAAC,OAAO,EAAE,CAAC;QACvB,CAAC;IACH,CAAC;IAEO,oBAAoB,CAC1B,KAAY,EACZ,OAAc,EACd,aAAqB,EACrB,eAAqC,EACrC,SAAqB;QAErB,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;YACxB,iDAAiD;YACjD,MAAM,GAAG,GAAG,IAAI,CAAC,aAAa,CAAC,GAA0B,EAAE,aAAa,EAAE,eAAe,CAAC,CAAC;YAC3F,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IAEO,iBAAiB,CACvB,KAAY,EACZ,OAAc,EACd,aAAqB,EACrB,eAAqC,EACrC,SAAqB;QAErB,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;YACxB,MAAM,GAAG,GAAG,IAAI,CAAC,aAAa,CAAC,GAA0B,EAAE,aAAa,EAAE,eAAe,CAAC,CAAC;YAC3F,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IAEO,aAAa,CACnB,GAAwB,EACxB,aAAqB,EACrB,eAAqC;QAErC,kBAAkB;QAClB,MAAM,OAAO,GAAG,GAAG,CAAC,aAAa,CAAC,CAAC;QAEnC,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;YAC9C,MAAM,IAAI,KAAK,CAAC,mBAAmB,aAAa,+BAA+B,CAAC,CAAC;QACnF,CAAC;QAED,sCAAsC;QACtC,MAAM,WAAW,GAAG,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAE5E,0CAA0C;QAC1C,MAAM,QAAQ,GAAwB,EAAE,CAAC;QAEzC,IAAI,eAAe,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClD,KAAK,MAAM,MAAM,IAAI,eAAe,EAAE,CAAC;gBACrC,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;oBAClB,QAAQ,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;gBACjC,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,IAAI,QAAQ,CAAC;YAClB,WAAW;YACX,QAAQ;SACT,CAAC,CAAC;IACL,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../lib/knowledge/loaders/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAC5C,OAAO,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../lib/knowledge/loaders/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAC5C,OAAO,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { Document } from '@langchain/core/documents';
|
|
2
|
+
import type { S3SourceConfig } from '../types.js';
|
|
3
|
+
/**
|
|
4
|
+
* S3 document loader supporting AWS S3 and S3-compatible services (MinIO, Wasabi, etc.).
|
|
5
|
+
* Downloads files to temporary storage and uses appropriate loaders based on file type.
|
|
6
|
+
*/
|
|
7
|
+
export declare class S3Loader {
|
|
8
|
+
private config;
|
|
9
|
+
private s3Client;
|
|
10
|
+
constructor(config: S3SourceConfig);
|
|
11
|
+
load(): Promise<Document[]>;
|
|
12
|
+
private listObjects;
|
|
13
|
+
private loadFile;
|
|
14
|
+
private downloadToTemp;
|
|
15
|
+
private createLoader;
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=s3-loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"s3-loader.d.ts","sourceRoot":"","sources":["../../../../lib/knowledge/loaders/s3-loader.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AAUrD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAMlD;;;GAGG;AACH,qBAAa,QAAQ;IACnB,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,QAAQ,CAAW;gBAEf,MAAM,EAAE,cAAc;IA0B5B,IAAI,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YA8DnB,WAAW;YA8BX,QAAQ;YA8BR,cAAc;IA+B5B,OAAO,CAAC,YAAY;CAcrB"}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import { S3Client, ListObjectsV2Command, GetObjectCommand } from '@aws-sdk/client-s3';
|
|
2
|
+
import { TextLoader } from '@langchain/classic/document_loaders/fs/text';
|
|
3
|
+
import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
|
|
4
|
+
import { CSVLoader } from '@langchain/community/document_loaders/fs/csv';
|
|
5
|
+
import { JSONLoader } from '@langchain/classic/document_loaders/fs/json';
|
|
6
|
+
import * as fs from 'fs';
|
|
7
|
+
import * as os from 'os';
|
|
8
|
+
import * as path from 'path';
|
|
9
|
+
import { minimatch } from 'minimatch';
|
|
10
|
+
import { detectFileType, isSupportedFileType } from '../utils/file-type-detector.js';
|
|
11
|
+
import { createLogger } from '../../logger.js';
|
|
12
|
+
const logger = createLogger('S3Loader');
|
|
13
|
+
/**
|
|
14
|
+
* S3 document loader supporting AWS S3 and S3-compatible services (MinIO, Wasabi, etc.).
|
|
15
|
+
* Downloads files to temporary storage and uses appropriate loaders based on file type.
|
|
16
|
+
*/
|
|
17
|
+
export class S3Loader {
|
|
18
|
+
config;
|
|
19
|
+
s3Client;
|
|
20
|
+
constructor(config) {
|
|
21
|
+
this.config = config;
|
|
22
|
+
// Configure S3 client
|
|
23
|
+
const clientConfig = {
|
|
24
|
+
region: config.region,
|
|
25
|
+
forcePathStyle: config.forcePathStyle,
|
|
26
|
+
};
|
|
27
|
+
// Use custom endpoint if provided (for MinIO, Wasabi, etc.)
|
|
28
|
+
if (config.endpoint) {
|
|
29
|
+
clientConfig.endpoint = config.endpoint;
|
|
30
|
+
logger.info(`Using custom S3 endpoint: ${config.endpoint}`);
|
|
31
|
+
}
|
|
32
|
+
// Use credentials if provided, otherwise fall back to environment variables
|
|
33
|
+
if (config.accessKeyId && config.secretAccessKey) {
|
|
34
|
+
clientConfig.credentials = {
|
|
35
|
+
accessKeyId: config.accessKeyId,
|
|
36
|
+
secretAccessKey: config.secretAccessKey,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
this.s3Client = new S3Client(clientConfig);
|
|
40
|
+
}
|
|
41
|
+
async load() {
|
|
42
|
+
const { bucket, prefix, pattern } = this.config;
|
|
43
|
+
logger.info(`Loading documents from S3 bucket: ${bucket}`);
|
|
44
|
+
if (prefix) {
|
|
45
|
+
logger.info(`Using prefix: ${prefix}`);
|
|
46
|
+
}
|
|
47
|
+
if (pattern) {
|
|
48
|
+
logger.info(`Filtering with pattern: ${pattern}`);
|
|
49
|
+
}
|
|
50
|
+
try {
|
|
51
|
+
// List objects
|
|
52
|
+
const keys = await this.listObjects();
|
|
53
|
+
logger.info(`Found ${keys.length} object(s) in bucket`);
|
|
54
|
+
// Filter by pattern if specified
|
|
55
|
+
const filteredKeys = pattern
|
|
56
|
+
? keys.filter((key) => minimatch(key, pattern))
|
|
57
|
+
: keys;
|
|
58
|
+
logger.info(`Processing ${filteredKeys.length} file(s) after pattern filtering`);
|
|
59
|
+
// Filter out unsupported file types
|
|
60
|
+
const supportedKeys = filteredKeys.filter((key) => {
|
|
61
|
+
const supported = isSupportedFileType(key);
|
|
62
|
+
if (!supported) {
|
|
63
|
+
logger.warn(`Skipping unsupported file type: ${key}`);
|
|
64
|
+
}
|
|
65
|
+
return supported;
|
|
66
|
+
});
|
|
67
|
+
if (supportedKeys.length === 0) {
|
|
68
|
+
logger.warn(`No supported files found in S3 bucket`);
|
|
69
|
+
return [];
|
|
70
|
+
}
|
|
71
|
+
logger.info(`Loading ${supportedKeys.length} supported file(s)`);
|
|
72
|
+
// Load documents from each file
|
|
73
|
+
const allDocs = [];
|
|
74
|
+
for (const key of supportedKeys) {
|
|
75
|
+
try {
|
|
76
|
+
const docs = await this.loadFile(key);
|
|
77
|
+
allDocs.push(...docs);
|
|
78
|
+
logger.info(`Loaded ${docs.length} document(s) from ${key}`);
|
|
79
|
+
}
|
|
80
|
+
catch (error) {
|
|
81
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
82
|
+
logger.error(`Failed to load ${key}: ${errorMessage}`);
|
|
83
|
+
// Continue with other files
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
logger.info(`Loaded total of ${allDocs.length} document(s) from S3`);
|
|
87
|
+
return allDocs;
|
|
88
|
+
}
|
|
89
|
+
catch (error) {
|
|
90
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
91
|
+
logger.error(`S3 loading failed: ${errorMessage}`);
|
|
92
|
+
throw new Error(`Failed to load documents from S3: ${errorMessage}`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
async listObjects() {
|
|
96
|
+
const keys = [];
|
|
97
|
+
let continuationToken;
|
|
98
|
+
do {
|
|
99
|
+
const command = new ListObjectsV2Command({
|
|
100
|
+
Bucket: this.config.bucket,
|
|
101
|
+
Prefix: this.config.prefix,
|
|
102
|
+
ContinuationToken: continuationToken,
|
|
103
|
+
});
|
|
104
|
+
const response = await this.s3Client.send(command);
|
|
105
|
+
if (response.Contents) {
|
|
106
|
+
for (const obj of response.Contents) {
|
|
107
|
+
if (obj.Key) {
|
|
108
|
+
// Skip directories (keys ending with /)
|
|
109
|
+
if (!obj.Key.endsWith('/')) {
|
|
110
|
+
keys.push(obj.Key);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
continuationToken = response.NextContinuationToken;
|
|
116
|
+
} while (continuationToken);
|
|
117
|
+
return keys;
|
|
118
|
+
}
|
|
119
|
+
async loadFile(key) {
|
|
120
|
+
// Download file to temporary location
|
|
121
|
+
const tmpFile = await this.downloadToTemp(key);
|
|
122
|
+
try {
|
|
123
|
+
// Detect file type and create appropriate loader
|
|
124
|
+
const fileType = detectFileType(key);
|
|
125
|
+
const loader = this.createLoader(fileType, tmpFile);
|
|
126
|
+
// Load documents
|
|
127
|
+
const docs = await loader.load();
|
|
128
|
+
// Add S3 metadata to all documents
|
|
129
|
+
docs.forEach((doc) => {
|
|
130
|
+
doc.metadata.s3_bucket = this.config.bucket;
|
|
131
|
+
doc.metadata.s3_key = key;
|
|
132
|
+
doc.metadata.source = `s3://${this.config.bucket}/${key}`;
|
|
133
|
+
});
|
|
134
|
+
return docs;
|
|
135
|
+
}
|
|
136
|
+
finally {
|
|
137
|
+
// Clean up temporary file
|
|
138
|
+
try {
|
|
139
|
+
fs.unlinkSync(tmpFile);
|
|
140
|
+
}
|
|
141
|
+
catch (error) {
|
|
142
|
+
logger.warn(`Failed to delete temporary file ${tmpFile}: ${error}`);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
async downloadToTemp(key) {
|
|
147
|
+
const command = new GetObjectCommand({
|
|
148
|
+
Bucket: this.config.bucket,
|
|
149
|
+
Key: key,
|
|
150
|
+
});
|
|
151
|
+
const response = await this.s3Client.send(command);
|
|
152
|
+
if (!response.Body) {
|
|
153
|
+
throw new Error(`No body in S3 response for key: ${key}`);
|
|
154
|
+
}
|
|
155
|
+
// Create temporary file
|
|
156
|
+
const ext = path.extname(key);
|
|
157
|
+
const tmpFile = path.join(os.tmpdir(), `s3-loader-${Date.now()}-${Math.random().toString(36).substring(7)}${ext}`);
|
|
158
|
+
// Stream to file
|
|
159
|
+
const stream = response.Body;
|
|
160
|
+
const writeStream = fs.createWriteStream(tmpFile);
|
|
161
|
+
await new Promise((resolve, reject) => {
|
|
162
|
+
stream.pipe(writeStream);
|
|
163
|
+
stream.on('error', reject);
|
|
164
|
+
writeStream.on('error', reject);
|
|
165
|
+
writeStream.on('finish', () => resolve());
|
|
166
|
+
});
|
|
167
|
+
logger.info(`Downloaded ${key} to ${tmpFile}`);
|
|
168
|
+
return tmpFile;
|
|
169
|
+
}
|
|
170
|
+
createLoader(type, filePath) {
|
|
171
|
+
switch (type) {
|
|
172
|
+
case 'pdf':
|
|
173
|
+
return new PDFLoader(filePath);
|
|
174
|
+
case 'csv':
|
|
175
|
+
return new CSVLoader(filePath);
|
|
176
|
+
case 'json':
|
|
177
|
+
return new JSONLoader(filePath);
|
|
178
|
+
case 'text':
|
|
179
|
+
case 'markdown':
|
|
180
|
+
default:
|
|
181
|
+
return new TextLoader(filePath);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
//# sourceMappingURL=s3-loader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"s3-loader.js","sourceRoot":"","sources":["../../../../lib/knowledge/loaders/s3-loader.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAEtF,OAAO,EAAE,UAAU,EAAE,MAAM,6CAA6C,CAAC;AACzE,OAAO,EAAE,SAAS,EAAE,MAAM,8CAA8C,CAAC;AACzE,OAAO,EAAE,SAAS,EAAE,MAAM,8CAA8C,CAAC;AACzE,OAAO,EAAE,UAAU,EAAE,MAAM,6CAA6C,CAAC;AAEzE,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,OAAO,EAAE,cAAc,EAAE,mBAAmB,EAAE,MAAM,gCAAgC,CAAC;AACrF,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAE/C,MAAM,MAAM,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC;AAExC;;;GAGG;AACH,MAAM,OAAO,QAAQ;IACX,MAAM,CAAiB;IACvB,QAAQ,CAAW;IAE3B,YAAY,MAAsB;QAChC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QAErB,sBAAsB;QACtB,MAAM,YAAY,GAAQ;YACxB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,cAAc,EAAE,MAAM,CAAC,cAAc;SACtC,CAAC;QAEF,4DAA4D;QAC5D,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACpB,YAAY,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;YACxC,MAAM,CAAC,IAAI,CAAC,6BAA6B,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QAC9D,CAAC;QAED,4EAA4E;QAC5E,IAAI,MAAM,CAAC,WAAW,IAAI,MAAM,CAAC,eAAe,EAAE,CAAC;YACjD,YAAY,CAAC,WAAW,GAAG;gBACzB,WAAW,EAAE,MAAM,CAAC,WAAW;gBAC/B,eAAe,EAAE,MAAM,CAAC,eAAe;aACxC,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,QAAQ,GAAG,IAAI,QAAQ,CAAC,YAAY,CAAC,CAAC;IAC7C,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC;QAEhD,MAAM,CAAC,IAAI,CAAC,qCAAqC,MAAM,EAAE,CAAC,CAAC;QAC3D,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,CAAC,IAAI,CAAC,iBAAiB,MAAM,EAAE,CAAC,CAAC;QACzC,CAAC;QACD,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,CAAC,IAAI,CAAC,2BAA2B,OAAO,EAAE,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,CAAC;YACH,eAAe;YACf,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YACtC,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,MAAM,sBAAsB,CAAC,CAAC;YAExD,iCAAiC;YACjC,MAAM,YAAY,GAAG,OAAO;gBAC1B,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC/C,CAAC,CAAC,IAAI,CAAC;YAET,MAAM,CAAC,IAAI,CAAC,cAAc,YAAY,CAAC,MAAM,kCAAkC,CAAC,CAAC;YAEjF,oCAAoC;YACpC,MAAM,aAAa,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE;gBAChD,MAAM,SAAS,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC;gBAC3C,IAAI,CAAC,SAAS,EAAE,CAAC;oBACf,MAAM,CAAC,IAAI,CAAC,mCAAmC,GAAG,EAAE,CAAC,CAAC;gBACxD,CAAC;gBACD,OAAO,SAAS,CAAC;YACnB,CAAC,CAAC,CAAC;YAEH,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC/B,MAAM,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;gBACrD,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,WAAW,aAAa,CAAC,MAAM,oBAAoB,CAAC,CAAC;YAEjE,gCAAgC;YAChC,MAAM,OAAO,GAAe,EAAE,CAAC;YAC/B,KAAK,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;gBAChC,IAAI,CAAC;oBACH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;oBACtC,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;oBACtB,MAAM,CAAC,IAAI,CAAC,UAAU,IAAI,CAAC,MAAM,qBAAqB,GAAG,EAAE,CAAC,CAAC;gBAC/D,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;oBAC5E,MAAM,CAAC,KAAK,CAAC,kBAAkB,GAAG,KAAK,YAAY,EAAE,CAAC,CAAC;oBACvD,4BAA4B;gBAC9B,CAAC;YACH,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,mBAAmB,OAAO,CAAC,MAAM,sBAAsB,CAAC,CAAC;YACrE,OAAO,OAAO,CAAC;QACjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5E,MAAM,CAAC,KAAK,CAAC,sBAAsB,YAAY,EAAE,CAAC,CAAC;YACnD,MAAM,IAAI,KAAK,CAAC,qCAAqC,YAAY,EAAE,CAAC,CAAC;QACvE,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,WAAW;QACvB,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,IAAI,iBAAqC,CAAC;QAE1C,GAAG,CAAC;YACF,MAAM,OAAO,GAAG,IAAI,oBAAoB,CAAC;gBACvC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;gBAC1B,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;gBAC1B,iBAAiB,EAAE,iBAAiB;aACrC,CAAC,CAAC;YAEH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAEnD,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;gBACtB,KAAK,MAAM,GAAG,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;oBACpC,IAAI,GAAG,CAAC,GAAG,EAAE,CAAC;wBACZ,wCAAwC;wBACxC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;4BAC3B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;wBACrB,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;YAED,iBAAiB,GAAG,QAAQ,CAAC,qBAAqB,CAAC;QACrD,CAAC,QAAQ,iBAAiB,EAAE;QAE5B,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,KAAK,CAAC,QAAQ,CAAC,GAAW;QAChC,sCAAsC;QACtC,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;QAE/C,IAAI,CAAC;YACH,iDAAiD;YACjD,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC;YACrC,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAEpD,iBAAiB;YACjB,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;YAEjC,mCAAmC;YACnC,IAAI,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE;gBACnB,GAAG,CAAC,QAAQ,CAAC,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;gBAC5C,GAAG,CAAC,QAAQ,CAAC,MAAM,GAAG,GAAG,CAAC;gBAC1B,GAAG,CAAC,QAAQ,CAAC,MAAM,GAAG,QAAQ,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;YAC5D,CAAC,CAAC,CAAC;YAEH,OAAO,IAAI,CAAC;QACd,CAAC;gBAAS,CAAC;YACT,0BAA0B;YAC1B,IAAI,CAAC;gBACH,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;YACzB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,CAAC,IAAI,CAAC,mCAAmC,OAAO,KAAK,KAAK,EAAE,CAAC,CAAC;YACtE,CAAC;QACH,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,cAAc,CAAC,GAAW;QACtC,MAAM,OAAO,GAAG,IAAI,gBAAgB,CAAC;YACnC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;YAC1B,GAAG,EAAE,GAAG;SACT,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEnD,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,mCAAmC,GAAG,EAAE,CAAC,CAAC;QAC5D,CAAC;QAED,wBAAwB;QACxB,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAC9B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,aAAa,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC;QAEnH,iBAAiB;QACjB,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAgB,CAAC;QACzC,MAAM,WAAW,GAAG,EAAE,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAElD,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC1C,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YACzB,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YAC3B,WAAW,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YAChC,WAAW,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE,CAAC,OAAO,EAAE,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QAEH,MAAM,CAAC,IAAI,CAAC,cAAc,GAAG,OAAO,OAAO,EAAE,CAAC,CAAC;QAC/C,OAAO,OAAO,CAAC;IACjB,CAAC;IAEO,YAAY,CAAC,IAAY,EAAE,QAAgB;QACjD,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,KAAK;gBACR,OAAO,IAAI,SAAS,CAAC,QAAQ,CAAC,CAAC;YACjC,KAAK,KAAK;gBACR,OAAO,IAAI,SAAS,CAAC,QAAQ,CAAC,CAAC;YACjC,KAAK,MAAM;gBACT,OAAO,IAAI,UAAU,CAAC,QAAQ,CAAC,CAAC;YAClC,KAAK,MAAM,CAAC;YACZ,KAAK,UAAU,CAAC;YAChB;gBACE,OAAO,IAAI,UAAU,CAAC,QAAQ,CAAC,CAAC;QACpC,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { Document } from '@langchain/core/documents';
|
|
2
|
+
import type { WebSourceConfig } from '../types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Web scraping document loader using Cheerio.
|
|
5
|
+
* Supports CSS selectors for targeted content extraction.
|
|
6
|
+
*/
|
|
7
|
+
export declare class WebLoader {
|
|
8
|
+
private config;
|
|
9
|
+
constructor(config: WebSourceConfig);
|
|
10
|
+
load(): Promise<Document[]>;
|
|
11
|
+
}
|
|
12
|
+
//# sourceMappingURL=web-loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"web-loader.d.ts","sourceRoot":"","sources":["../../../../lib/knowledge/loaders/web-loader.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACrD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAKnD;;;GAGG;AACH,qBAAa,SAAS;IACpB,OAAO,CAAC,MAAM,CAAkB;gBAEpB,MAAM,EAAE,eAAe;IAI7B,IAAI,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;CAmDlC"}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { CheerioWebBaseLoader } from '@langchain/community/document_loaders/web/cheerio';
|
|
2
|
+
import { createLogger } from '../../logger.js';
|
|
3
|
+
const logger = createLogger('WebLoader');
|
|
4
|
+
/**
|
|
5
|
+
* Web scraping document loader using Cheerio.
|
|
6
|
+
* Supports CSS selectors for targeted content extraction.
|
|
7
|
+
*/
|
|
8
|
+
export class WebLoader {
|
|
9
|
+
config;
|
|
10
|
+
constructor(config) {
|
|
11
|
+
this.config = config;
|
|
12
|
+
}
|
|
13
|
+
async load() {
|
|
14
|
+
const { url, selector, headers } = this.config;
|
|
15
|
+
logger.info(`Loading documents from web: ${url}`);
|
|
16
|
+
if (selector) {
|
|
17
|
+
logger.info(`Using CSS selector: ${selector}`);
|
|
18
|
+
}
|
|
19
|
+
try {
|
|
20
|
+
const loaderOptions = {};
|
|
21
|
+
// Add custom headers if provided
|
|
22
|
+
if (headers) {
|
|
23
|
+
loaderOptions.requestOptions = { headers };
|
|
24
|
+
}
|
|
25
|
+
// Add CSS selector if provided
|
|
26
|
+
if (selector) {
|
|
27
|
+
loaderOptions.selector = selector;
|
|
28
|
+
}
|
|
29
|
+
const loader = new CheerioWebBaseLoader(url, loaderOptions);
|
|
30
|
+
const documents = await loader.load();
|
|
31
|
+
logger.info(`Loaded ${documents.length} document(s) from ${url}`);
|
|
32
|
+
// Add source URL to metadata
|
|
33
|
+
documents.forEach((doc) => {
|
|
34
|
+
doc.metadata.source = url;
|
|
35
|
+
doc.metadata.selector = selector || 'body';
|
|
36
|
+
});
|
|
37
|
+
return documents;
|
|
38
|
+
}
|
|
39
|
+
catch (error) {
|
|
40
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
41
|
+
logger.error(`Failed to load web content: ${errorMessage}`);
|
|
42
|
+
// Provide helpful error messages for common issues
|
|
43
|
+
if (errorMessage.includes('404')) {
|
|
44
|
+
throw new Error(`Web page not found (404): ${url}`);
|
|
45
|
+
}
|
|
46
|
+
if (errorMessage.includes('ENOTFOUND') || errorMessage.includes('ETIMEDOUT')) {
|
|
47
|
+
throw new Error(`Network error loading ${url}: ${errorMessage}`);
|
|
48
|
+
}
|
|
49
|
+
if (errorMessage.includes('selector')) {
|
|
50
|
+
throw new Error(`Invalid CSS selector "${selector}": ${errorMessage}`);
|
|
51
|
+
}
|
|
52
|
+
throw new Error(`Failed to load web content from ${url}: ${errorMessage}`);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
//# sourceMappingURL=web-loader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"web-loader.js","sourceRoot":"","sources":["../../../../lib/knowledge/loaders/web-loader.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,mDAAmD,CAAC;AAGzF,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAE/C,MAAM,MAAM,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC;AAEzC;;;GAGG;AACH,MAAM,OAAO,SAAS;IACZ,MAAM,CAAkB;IAEhC,YAAY,MAAuB;QACjC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC;QAE/C,MAAM,CAAC,IAAI,CAAC,+BAA+B,GAAG,EAAE,CAAC,CAAC;QAClD,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,CAAC,IAAI,CAAC,uBAAuB,QAAQ,EAAE,CAAC,CAAC;QACjD,CAAC;QAED,IAAI,CAAC;YACH,MAAM,aAAa,GAAQ,EAAE,CAAC;YAE9B,iCAAiC;YACjC,IAAI,OAAO,EAAE,CAAC;gBACZ,aAAa,CAAC,cAAc,GAAG,EAAE,OAAO,EAAE,CAAC;YAC7C,CAAC;YAED,+BAA+B;YAC/B,IAAI,QAAQ,EAAE,CAAC;gBACb,aAAa,CAAC,QAAQ,GAAG,QAAQ,CAAC;YACpC,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,oBAAoB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YAC5D,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;YAEtC,MAAM,CAAC,IAAI,CAAC,UAAU,SAAS,CAAC,MAAM,qBAAqB,GAAG,EAAE,CAAC,CAAC;YAElE,6BAA6B;YAC7B,SAAS,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE;gBACxB,GAAG,CAAC,QAAQ,CAAC,MAAM,GAAG,GAAG,CAAC;gBAC1B,GAAG,CAAC,QAAQ,CAAC,QAAQ,GAAG,QAAQ,IAAI,MAAM,CAAC;YAC7C,CAAC,CAAC,CAAC;YAEH,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5E,MAAM,CAAC,KAAK,CAAC,+BAA+B,YAAY,EAAE,CAAC,CAAC;YAE5D,mDAAmD;YACnD,IAAI,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACjC,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,EAAE,CAAC,CAAC;YACtD,CAAC;YACD,IAAI,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;gBAC7E,MAAM,IAAI,KAAK,CAAC,yBAAyB,GAAG,KAAK,YAAY,EAAE,CAAC,CAAC;YACnE,CAAC;YACD,IAAI,YAAY,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;gBACtC,MAAM,IAAI,KAAK,CAAC,yBAAyB,QAAQ,MAAM,YAAY,EAAE,CAAC,CAAC;YACzE,CAAC;YAED,MAAM,IAAI,KAAK,CAAC,mCAAmC,GAAG,KAAK,YAAY,EAAE,CAAC,CAAC;QAC7E,CAAC;IACH,CAAC;CACF"}
|