cogniscrape 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +308 -0
- package/dist/graphs/AbstractGraph.d.ts +27 -0
- package/dist/graphs/AbstractGraph.d.ts.map +1 -0
- package/dist/graphs/AbstractGraph.js +44 -0
- package/dist/graphs/AbstractGraph.js.map +1 -0
- package/dist/graphs/BaseGraph.d.ts +30 -0
- package/dist/graphs/BaseGraph.d.ts.map +1 -0
- package/dist/graphs/BaseGraph.js +62 -0
- package/dist/graphs/BaseGraph.js.map +1 -0
- package/dist/graphs/CSVScraperGraph.d.ts +16 -0
- package/dist/graphs/CSVScraperGraph.d.ts.map +1 -0
- package/dist/graphs/CSVScraperGraph.js +84 -0
- package/dist/graphs/CSVScraperGraph.js.map +1 -0
- package/dist/graphs/DepthSearchGraph.d.ts +14 -0
- package/dist/graphs/DepthSearchGraph.d.ts.map +1 -0
- package/dist/graphs/DepthSearchGraph.js +45 -0
- package/dist/graphs/DepthSearchGraph.js.map +1 -0
- package/dist/graphs/JSONScraperGraph.d.ts +18 -0
- package/dist/graphs/JSONScraperGraph.d.ts.map +1 -0
- package/dist/graphs/JSONScraperGraph.js +100 -0
- package/dist/graphs/JSONScraperGraph.js.map +1 -0
- package/dist/graphs/SearchGraph.d.ts +14 -0
- package/dist/graphs/SearchGraph.d.ts.map +1 -0
- package/dist/graphs/SearchGraph.js +42 -0
- package/dist/graphs/SearchGraph.js.map +1 -0
- package/dist/graphs/SmartScraperGraph.d.ts +16 -0
- package/dist/graphs/SmartScraperGraph.d.ts.map +1 -0
- package/dist/graphs/SmartScraperGraph.js +57 -0
- package/dist/graphs/SmartScraperGraph.js.map +1 -0
- package/dist/graphs/SmartScraperMultiGraph.d.ts +17 -0
- package/dist/graphs/SmartScraperMultiGraph.d.ts.map +1 -0
- package/dist/graphs/SmartScraperMultiGraph.js +71 -0
- package/dist/graphs/SmartScraperMultiGraph.js.map +1 -0
- package/dist/graphs/index.d.ts +12 -0
- package/dist/graphs/index.d.ts.map +1 -0
- package/dist/graphs/index.js +23 -0
- package/dist/graphs/index.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +43 -0
- package/dist/index.js.map +1 -0
- package/dist/models/GeminiModel.d.ts +16 -0
- package/dist/models/GeminiModel.d.ts.map +1 -0
- package/dist/models/GeminiModel.js +127 -0
- package/dist/models/GeminiModel.js.map +1 -0
- package/dist/models/OllamaModel.d.ts +15 -0
- package/dist/models/OllamaModel.d.ts.map +1 -0
- package/dist/models/OllamaModel.js +134 -0
- package/dist/models/OllamaModel.js.map +1 -0
- package/dist/models/index.d.ts +8 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +24 -0
- package/dist/models/index.js.map +1 -0
- package/dist/nodes/BaseNode.d.ts +37 -0
- package/dist/nodes/BaseNode.d.ts.map +1 -0
- package/dist/nodes/BaseNode.js +116 -0
- package/dist/nodes/BaseNode.js.map +1 -0
- package/dist/nodes/CSVExporterNode.d.ts +16 -0
- package/dist/nodes/CSVExporterNode.d.ts.map +1 -0
- package/dist/nodes/CSVExporterNode.js +85 -0
- package/dist/nodes/CSVExporterNode.js.map +1 -0
- package/dist/nodes/ConditionalNode.d.ts +16 -0
- package/dist/nodes/ConditionalNode.d.ts.map +1 -0
- package/dist/nodes/ConditionalNode.js +68 -0
- package/dist/nodes/ConditionalNode.js.map +1 -0
- package/dist/nodes/FetchNode.d.ts +15 -0
- package/dist/nodes/FetchNode.d.ts.map +1 -0
- package/dist/nodes/FetchNode.js +182 -0
- package/dist/nodes/FetchNode.js.map +1 -0
- package/dist/nodes/GenerateAnswerNode.d.ts +14 -0
- package/dist/nodes/GenerateAnswerNode.d.ts.map +1 -0
- package/dist/nodes/GenerateAnswerNode.js +86 -0
- package/dist/nodes/GenerateAnswerNode.js.map +1 -0
- package/dist/nodes/JSONExporterNode.d.ts +16 -0
- package/dist/nodes/JSONExporterNode.d.ts.map +1 -0
- package/dist/nodes/JSONExporterNode.js +42 -0
- package/dist/nodes/JSONExporterNode.js.map +1 -0
- package/dist/nodes/MergeNode.d.ts +10 -0
- package/dist/nodes/MergeNode.d.ts.map +1 -0
- package/dist/nodes/MergeNode.js +51 -0
- package/dist/nodes/MergeNode.js.map +1 -0
- package/dist/nodes/PDFScraperNode.d.ts +10 -0
- package/dist/nodes/PDFScraperNode.d.ts.map +1 -0
- package/dist/nodes/PDFScraperNode.js +80 -0
- package/dist/nodes/PDFScraperNode.js.map +1 -0
- package/dist/nodes/ParseNode.d.ts +12 -0
- package/dist/nodes/ParseNode.d.ts.map +1 -0
- package/dist/nodes/ParseNode.js +44 -0
- package/dist/nodes/ParseNode.js.map +1 -0
- package/dist/nodes/RAGNode.d.ts +13 -0
- package/dist/nodes/RAGNode.d.ts.map +1 -0
- package/dist/nodes/RAGNode.js +64 -0
- package/dist/nodes/RAGNode.js.map +1 -0
- package/dist/nodes/ReasoningNode.d.ts +10 -0
- package/dist/nodes/ReasoningNode.d.ts.map +1 -0
- package/dist/nodes/ReasoningNode.js +51 -0
- package/dist/nodes/ReasoningNode.js.map +1 -0
- package/dist/nodes/SearchNode.d.ts +13 -0
- package/dist/nodes/SearchNode.d.ts.map +1 -0
- package/dist/nodes/SearchNode.js +81 -0
- package/dist/nodes/SearchNode.js.map +1 -0
- package/dist/nodes/XMLScraperNode.d.ts +11 -0
- package/dist/nodes/XMLScraperNode.d.ts.map +1 -0
- package/dist/nodes/XMLScraperNode.js +99 -0
- package/dist/nodes/XMLScraperNode.js.map +1 -0
- package/dist/nodes/index.d.ts +17 -0
- package/dist/nodes/index.d.ts.map +1 -0
- package/dist/nodes/index.js +33 -0
- package/dist/nodes/index.js.map +1 -0
- package/dist/prompts/index.d.ts +12 -0
- package/dist/prompts/index.d.ts.map +1 -0
- package/dist/prompts/index.js +117 -0
- package/dist/prompts/index.js.map +1 -0
- package/dist/types.d.ts +106 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +13 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/cache.d.ts +28 -0
- package/dist/utils/cache.d.ts.map +1 -0
- package/dist/utils/cache.js +72 -0
- package/dist/utils/cache.js.map +1 -0
- package/dist/utils/chunking.d.ts +8 -0
- package/dist/utils/chunking.d.ts.map +1 -0
- package/dist/utils/chunking.js +51 -0
- package/dist/utils/chunking.js.map +1 -0
- package/dist/utils/cleanupHtml.d.ts +7 -0
- package/dist/utils/cleanupHtml.d.ts.map +1 -0
- package/dist/utils/cleanupHtml.js +81 -0
- package/dist/utils/cleanupHtml.js.map +1 -0
- package/dist/utils/convertToMarkdown.d.ts +6 -0
- package/dist/utils/convertToMarkdown.d.ts.map +1 -0
- package/dist/utils/convertToMarkdown.js +61 -0
- package/dist/utils/convertToMarkdown.js.map +1 -0
- package/dist/utils/index.d.ts +13 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +40 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/logger.d.ts +14 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +35 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/proxy.d.ts +30 -0
- package/dist/utils/proxy.d.ts.map +1 -0
- package/dist/utils/proxy.js +62 -0
- package/dist/utils/proxy.js.map +1 -0
- package/dist/utils/rateLimiter.d.ts +24 -0
- package/dist/utils/rateLimiter.d.ts.map +1 -0
- package/dist/utils/rateLimiter.js +61 -0
- package/dist/utils/rateLimiter.js.map +1 -0
- package/dist/utils/retry.d.ts +17 -0
- package/dist/utils/retry.d.ts.map +1 -0
- package/dist/utils/retry.js +43 -0
- package/dist/utils/retry.js.map +1 -0
- package/dist/utils/schemaValidator.d.ts +69 -0
- package/dist/utils/schemaValidator.d.ts.map +1 -0
- package/dist/utils/schemaValidator.js +133 -0
- package/dist/utils/schemaValidator.js.map +1 -0
- package/package.json +64 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ParseNode - Parses and chunks document content
|
|
3
|
+
*/
|
|
4
|
+
import { BaseNode } from './BaseNode';
|
|
5
|
+
import { GraphState, NodeConfig } from '../types';
|
|
6
|
+
export declare class ParseNode extends BaseNode {
|
|
7
|
+
private chunkSize;
|
|
8
|
+
private convertMd;
|
|
9
|
+
constructor(input: string, output: string[], config?: NodeConfig);
|
|
10
|
+
protected process(state: GraphState): Promise<GraphState>;
|
|
11
|
+
}
|
|
12
|
+
//# sourceMappingURL=ParseNode.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ParseNode.d.ts","sourceRoot":"","sources":["../../src/nodes/ParseNode.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,EAAE,UAAU,EAAY,UAAU,EAAE,MAAM,UAAU,CAAC;AAI5D,qBAAa,SAAU,SAAQ,QAAQ;IACrC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAU;gBAEf,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,MAAM,GAAE,UAAe;cAMpD,OAAO,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;CAgChE"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ParseNode - Parses and chunks document content
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.ParseNode = void 0;
|
|
7
|
+
const BaseNode_1 = require("./BaseNode");
|
|
8
|
+
const chunking_1 = require("../utils/chunking");
|
|
9
|
+
const convertToMarkdown_1 = require("../utils/convertToMarkdown");
|
|
10
|
+
class ParseNode extends BaseNode_1.BaseNode {
|
|
11
|
+
constructor(input, output, config = {}) {
|
|
12
|
+
super('Parse', 'node', input, output, 1, config);
|
|
13
|
+
this.chunkSize = config.chunkSize ?? 2000;
|
|
14
|
+
this.convertMd = true; // Always convert to markdown for better LLM processing
|
|
15
|
+
}
|
|
16
|
+
async process(state) {
|
|
17
|
+
const inputKeys = this.getInputKeys(state);
|
|
18
|
+
const documents = state[inputKeys[0]];
|
|
19
|
+
if (!documents || documents.length === 0) {
|
|
20
|
+
throw new Error('No documents to parse');
|
|
21
|
+
}
|
|
22
|
+
this.logger.info(`Parsing ${documents.length} document(s)`);
|
|
23
|
+
// Convert HTML to Markdown if needed
|
|
24
|
+
const processedDocs = documents.map(doc => {
|
|
25
|
+
if (doc.metadata.type === 'html' && this.convertMd) {
|
|
26
|
+
return {
|
|
27
|
+
...doc,
|
|
28
|
+
pageContent: (0, convertToMarkdown_1.convertToMarkdown)(doc.pageContent),
|
|
29
|
+
metadata: {
|
|
30
|
+
...doc.metadata,
|
|
31
|
+
converted: 'markdown',
|
|
32
|
+
},
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
return doc;
|
|
36
|
+
});
|
|
37
|
+
// Chunk the documents
|
|
38
|
+
const chunkedDocs = (0, chunking_1.chunkDocuments)(processedDocs, this.chunkSize);
|
|
39
|
+
this.logger.success(`Created ${chunkedDocs.length} chunks from ${documents.length} document(s)`);
|
|
40
|
+
return this.updateState(state, { parsedDoc: chunkedDocs });
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
exports.ParseNode = ParseNode;
|
|
44
|
+
//# sourceMappingURL=ParseNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ParseNode.js","sourceRoot":"","sources":["../../src/nodes/ParseNode.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAEH,yCAAsC;AAEtC,gDAAmD;AACnD,kEAA+D;AAE/D,MAAa,SAAU,SAAQ,mBAAQ;IAIrC,YAAY,KAAa,EAAE,MAAgB,EAAE,SAAqB,EAAE;QAClE,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,CAAC,CAAC;QACjD,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC;QAC1C,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC,uDAAuD;IAChF,CAAC;IAES,KAAK,CAAC,OAAO,CAAC,KAAiB;QACvC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,SAAS,GAAe,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QAElD,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzC,MAAM,IAAI,KAAK,CAAC,uBAAuB,CAAC,CAAC;QAC3C,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,SAAS,CAAC,MAAM,cAAc,CAAC,CAAC;QAE5D,qCAAqC;QACrC,MAAM,aAAa,GAAG,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE;YACxC,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,KAAK,MAAM,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;gBACnD,OAAO;oBACL,GAAG,GAAG;oBACN,WAAW,EAAE,IAAA,qCAAiB,EAAC,GAAG,CAAC,WAAW,CAAC;oBAC/C,QAAQ,EAAE;wBACR,GAAG,GAAG,CAAC,QAAQ;wBACf,SAAS,EAAE,UAAU;qBACtB;iBACF,CAAC;YACJ,CAAC;YACD,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,sBAAsB;QACtB,MAAM,WAAW,GAAG,IAAA,yBAAc,EAAC,aAAa,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QAElE,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,WAAW,WAAW,CAAC,MAAM,gBAAgB,SAAS,CAAC,MAAM,cAAc,CAAC,CAAC;QAEjG,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC,CAAC;IAC7D,CAAC;CACF;AA1CD,8BA0CC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RAGNode - Retrieval Augmented Generation
|
|
3
|
+
* Select relevant chunks from documents based on the query
|
|
4
|
+
*/
|
|
5
|
+
import { BaseNode } from './BaseNode';
|
|
6
|
+
import { GraphState, NodeConfig } from '../types';
|
|
7
|
+
export declare class RAGNode extends BaseNode {
|
|
8
|
+
private topK;
|
|
9
|
+
constructor(input: string, output: string[], config?: NodeConfig);
|
|
10
|
+
protected process(state: GraphState): Promise<GraphState>;
|
|
11
|
+
private calculateRelevanceScore;
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=RAGNode.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"RAGNode.d.ts","sourceRoot":"","sources":["../../src/nodes/RAGNode.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,EAAE,UAAU,EAAY,UAAU,EAAE,MAAM,UAAU,CAAC;AAE5D,qBAAa,OAAQ,SAAQ,QAAQ;IACnC,OAAO,CAAC,IAAI,CAAS;gBAET,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,MAAM,GAAE,UAAe;cAKpD,OAAO,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;IAiC/D,OAAO,CAAC,uBAAuB;CA6BhC"}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* RAGNode - Retrieval Augmented Generation
|
|
4
|
+
* Select relevant chunks from documents based on the query
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.RAGNode = void 0;
|
|
8
|
+
const BaseNode_1 = require("./BaseNode");
|
|
9
|
+
class RAGNode extends BaseNode_1.BaseNode {
|
|
10
|
+
constructor(input, output, config = {}) {
|
|
11
|
+
super('RAG', 'node', input, output, 2, config);
|
|
12
|
+
this.topK = 5; // Number of top chunks to retrieve
|
|
13
|
+
}
|
|
14
|
+
async process(state) {
|
|
15
|
+
const userPrompt = state.userPrompt;
|
|
16
|
+
const documents = (state.parsedDoc || state.doc);
|
|
17
|
+
if (!userPrompt) {
|
|
18
|
+
throw new Error('User prompt is required for RAG');
|
|
19
|
+
}
|
|
20
|
+
if (!documents || documents.length === 0) {
|
|
21
|
+
throw new Error('No documents available for RAG');
|
|
22
|
+
}
|
|
23
|
+
this.logger.info(`Running RAG on ${documents.length} document chunks`);
|
|
24
|
+
// Simple relevance scoring based on keyword matching
|
|
25
|
+
const scoredDocs = documents.map(doc => ({
|
|
26
|
+
doc,
|
|
27
|
+
score: this.calculateRelevanceScore(userPrompt, doc.pageContent),
|
|
28
|
+
}));
|
|
29
|
+
// Sort by score and take top K
|
|
30
|
+
const relevantChunks = scoredDocs
|
|
31
|
+
.sort((a, b) => b.score - a.score)
|
|
32
|
+
.slice(0, this.topK)
|
|
33
|
+
.map(item => item.doc);
|
|
34
|
+
this.logger.success(`Selected ${relevantChunks.length} most relevant chunks`);
|
|
35
|
+
return this.updateState(state, {
|
|
36
|
+
relevantChunks,
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
calculateRelevanceScore(query, text) {
|
|
40
|
+
const queryLower = query.toLowerCase();
|
|
41
|
+
const textLower = text.toLowerCase();
|
|
42
|
+
// Extract keywords from query (simple: remove common words)
|
|
43
|
+
const commonWords = new Set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'is', 'are', 'was', 'were', 'be', 'been', 'being']);
|
|
44
|
+
const keywords = queryLower
|
|
45
|
+
.split(/\W+/)
|
|
46
|
+
.filter(word => word.length > 2 && !commonWords.has(word));
|
|
47
|
+
// Calculate score based on keyword matches
|
|
48
|
+
let score = 0;
|
|
49
|
+
for (const keyword of keywords) {
|
|
50
|
+
// Exact match
|
|
51
|
+
const exactMatches = (textLower.match(new RegExp(keyword, 'g')) || []).length;
|
|
52
|
+
score += exactMatches * 10;
|
|
53
|
+
// Partial match
|
|
54
|
+
if (textLower.includes(keyword)) {
|
|
55
|
+
score += 5;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// Bonus for length (prefer longer, more detailed chunks)
|
|
59
|
+
score += Math.min(text.length / 100, 10);
|
|
60
|
+
return score;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
exports.RAGNode = RAGNode;
|
|
64
|
+
//# sourceMappingURL=RAGNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"RAGNode.js","sourceRoot":"","sources":["../../src/nodes/RAGNode.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAEH,yCAAsC;AAGtC,MAAa,OAAQ,SAAQ,mBAAQ;IAGnC,YAAY,KAAa,EAAE,MAAgB,EAAE,SAAqB,EAAE;QAClE,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,CAAC,CAAC;QAC/C,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,mCAAmC;IACpD,CAAC;IAES,KAAK,CAAC,OAAO,CAAC,KAAiB;QACvC,MAAM,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC;QACpC,MAAM,SAAS,GAAe,CAAC,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,GAAG,CAAe,CAAC;QAE3E,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACrD,CAAC;QAED,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzC,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,kBAAkB,SAAS,CAAC,MAAM,kBAAkB,CAAC,CAAC;QAEvE,qDAAqD;QACrD,MAAM,UAAU,GAAG,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACvC,GAAG;YACH,KAAK,EAAE,IAAI,CAAC,uBAAuB,CAAC,UAAU,EAAE,GAAG,CAAC,WAAW,CAAC;SACjE,CAAC,CAAC,CAAC;QAEJ,+BAA+B;QAC/B,MAAM,cAAc,GAAG,UAAU;aAC9B,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;aACjC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC;aACnB,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEzB,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,YAAY,cAAc,CAAC,MAAM,uBAAuB,CAAC,CAAC;QAE9E,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE;YAC7B,cAAc;SACf,CAAC,CAAC;IACL,CAAC;IAEO,uBAAuB,CAAC,KAAa,EAAE,IAAY;QACzD,MAAM,UAAU,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;QACvC,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAErC,4DAA4D;QAC5D,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;QACpK,MAAM,QAAQ,GAAG,UAAU;aACxB,KAAK,CAAC,KAAK,CAAC;aACZ,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC;QAE7D,2CAA2C;QAC3C,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,cAAc;YACd,MAAM,YAAY,GAAG,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;YAC9E,KAAK,IAAI,YAAY,GAAG,EAAE,CAAC;YAE3B,gBAAgB;YAChB,IAAI,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBAChC,KAAK,IAAI,CAAC,CAAC;YACb,CAAC;QACH,CAAC;QAED,yDAAyD;QACzD,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,EAAE,CAAC,CAAC;QAEzC,OAAO,KAAK,CAAC;IACf,CAAC;CACF;AAtED,0BAsEC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ReasoningNode - Add reasoning step before answer generation
|
|
3
|
+
*/
|
|
4
|
+
import { BaseNode } from './BaseNode';
|
|
5
|
+
import { GraphState, NodeConfig } from '../types';
|
|
6
|
+
export declare class ReasoningNode extends BaseNode {
|
|
7
|
+
constructor(input: string, output: string[], config?: NodeConfig);
|
|
8
|
+
protected process(state: GraphState): Promise<GraphState>;
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=ReasoningNode.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ReasoningNode.d.ts","sourceRoot":"","sources":["../../src/nodes/ReasoningNode.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAGlD,qBAAa,aAAc,SAAQ,QAAQ;gBAC7B,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,MAAM,GAAE,UAAe;cAQpD,OAAO,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;CAuChE"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ReasoningNode - Add reasoning step before answer generation
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.ReasoningNode = void 0;
|
|
7
|
+
const BaseNode_1 = require("./BaseNode");
|
|
8
|
+
const prompts_1 = require("../prompts");
|
|
9
|
+
class ReasoningNode extends BaseNode_1.BaseNode {
|
|
10
|
+
constructor(input, output, config = {}) {
|
|
11
|
+
super('Reasoning', 'node', input, output, 2, config);
|
|
12
|
+
if (!this.llmModel) {
|
|
13
|
+
throw new Error('LLM model is required for ReasoningNode');
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
async process(state) {
|
|
17
|
+
const userPrompt = state.userPrompt;
|
|
18
|
+
const content = state.relevantChunks || state.parsedDoc || state.doc;
|
|
19
|
+
if (!userPrompt) {
|
|
20
|
+
throw new Error('User prompt is required');
|
|
21
|
+
}
|
|
22
|
+
if (!content) {
|
|
23
|
+
throw new Error('No content available for reasoning');
|
|
24
|
+
}
|
|
25
|
+
this.logger.info('Performing reasoning step');
|
|
26
|
+
// Prepare content string
|
|
27
|
+
let contentStr;
|
|
28
|
+
if (Array.isArray(content)) {
|
|
29
|
+
contentStr = content.map(doc => doc.pageContent).join('\n\n');
|
|
30
|
+
}
|
|
31
|
+
else if (typeof content === 'string') {
|
|
32
|
+
contentStr = content;
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
contentStr = JSON.stringify(content);
|
|
36
|
+
}
|
|
37
|
+
// Generate reasoning
|
|
38
|
+
const prompt = (0, prompts_1.formatPrompt)(prompts_1.REASONING_TEMPLATE, {
|
|
39
|
+
user_prompt: userPrompt,
|
|
40
|
+
content: contentStr.slice(0, 50000),
|
|
41
|
+
});
|
|
42
|
+
const reasoning = await this.llmModel.generate(prompt);
|
|
43
|
+
this.logger.success('Reasoning generated');
|
|
44
|
+
this.logger.debug(`Reasoning: ${reasoning.slice(0, 200)}...`);
|
|
45
|
+
return this.updateState(state, {
|
|
46
|
+
reasoning,
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
exports.ReasoningNode = ReasoningNode;
|
|
51
|
+
//# sourceMappingURL=ReasoningNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ReasoningNode.js","sourceRoot":"","sources":["../../src/nodes/ReasoningNode.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAEH,yCAAsC;AAEtC,wCAA8D;AAE9D,MAAa,aAAc,SAAQ,mBAAQ;IACzC,YAAY,KAAa,EAAE,MAAgB,EAAE,SAAqB,EAAE;QAClE,KAAK,CAAC,WAAW,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,CAAC,CAAC;QAErD,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QAC7D,CAAC;IACH,CAAC;IAES,KAAK,CAAC,OAAO,CAAC,KAAiB;QACvC,MAAM,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC;QACpC,MAAM,OAAO,GAAG,KAAK,CAAC,cAAc,IAAI,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,GAAG,CAAC;QAErE,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,CAAC;QAED,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;QACxD,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QAE9C,yBAAyB;QACzB,IAAI,UAAkB,CAAC;QACvB,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;YAC3B,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChE,CAAC;aAAM,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;YACvC,UAAU,GAAG,OAAO,CAAC;QACvB,CAAC;aAAM,CAAC;YACN,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QACvC,CAAC;QAED,qBAAqB;QACrB,MAAM,MAAM,GAAG,IAAA,sBAAY,EAAC,4BAAkB,EAAE;YAC9C,WAAW,EAAE,UAAU;YACvB,OAAO,EAAE,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;SACpC,CAAC,CAAC;QAEH,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,QAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAExD,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,qBAAqB,CAAC,CAAC;QAC3C,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC;QAE9D,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE;YAC7B,SAAS;SACV,CAAC,CAAC;IACL,CAAC;CACF;AAhDD,sCAgDC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SearchNode - Search the internet using DuckDuckGo
|
|
3
|
+
*/
|
|
4
|
+
import { BaseNode } from './BaseNode';
|
|
5
|
+
import { GraphState, NodeConfig } from '../types';
|
|
6
|
+
export declare class SearchNode extends BaseNode {
|
|
7
|
+
private maxResults;
|
|
8
|
+
private searchEngine;
|
|
9
|
+
constructor(input: string, output: string[], config?: NodeConfig);
|
|
10
|
+
protected process(state: GraphState): Promise<GraphState>;
|
|
11
|
+
private searchDuckDuckGo;
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=SearchNode.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SearchNode.d.ts","sourceRoot":"","sources":["../../src/nodes/SearchNode.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,EAAE,UAAU,EAAY,UAAU,EAAE,MAAM,UAAU,CAAC;AAE5D,qBAAa,UAAW,SAAQ,QAAQ;IACtC,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,YAAY,CAAS;gBAEjB,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,MAAM,GAAE,UAAe;cAMpD,OAAO,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;YAmBjD,gBAAgB;CAmD/B"}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* SearchNode - Search the internet using DuckDuckGo
|
|
4
|
+
*/
|
|
5
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
6
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
7
|
+
};
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.SearchNode = void 0;
|
|
10
|
+
const axios_1 = __importDefault(require("axios"));
|
|
11
|
+
const BaseNode_1 = require("./BaseNode");
|
|
12
|
+
class SearchNode extends BaseNode_1.BaseNode {
|
|
13
|
+
constructor(input, output, config = {}) {
|
|
14
|
+
super('Search', 'node', input, output, 1, config);
|
|
15
|
+
this.maxResults = 10;
|
|
16
|
+
this.searchEngine = 'duckduckgo';
|
|
17
|
+
}
|
|
18
|
+
async process(state) {
|
|
19
|
+
const inputKeys = this.getInputKeys(state);
|
|
20
|
+
const query = state[inputKeys[0]];
|
|
21
|
+
if (!query) {
|
|
22
|
+
throw new Error('Search query is required');
|
|
23
|
+
}
|
|
24
|
+
this.logger.info(`Searching for: ${query}`);
|
|
25
|
+
const results = await this.searchDuckDuckGo(query);
|
|
26
|
+
this.logger.success(`Found ${results.length} search results`);
|
|
27
|
+
return this.updateState(state, {
|
|
28
|
+
searchResults: results,
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
async searchDuckDuckGo(query) {
|
|
32
|
+
try {
|
|
33
|
+
// Using DuckDuckGo's instant answer API
|
|
34
|
+
const response = await axios_1.default.get('https://api.duckduckgo.com/', {
|
|
35
|
+
params: {
|
|
36
|
+
q: query,
|
|
37
|
+
format: 'json',
|
|
38
|
+
no_html: 1,
|
|
39
|
+
skip_disambig: 1,
|
|
40
|
+
},
|
|
41
|
+
timeout: 10000,
|
|
42
|
+
});
|
|
43
|
+
const results = [];
|
|
44
|
+
// Add main abstract if available
|
|
45
|
+
if (response.data.Abstract) {
|
|
46
|
+
results.push({
|
|
47
|
+
pageContent: response.data.Abstract,
|
|
48
|
+
metadata: {
|
|
49
|
+
title: response.data.Heading || query,
|
|
50
|
+
url: response.data.AbstractURL,
|
|
51
|
+
source: 'duckduckgo',
|
|
52
|
+
type: 'abstract',
|
|
53
|
+
},
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
// Add related topics
|
|
57
|
+
if (response.data.RelatedTopics) {
|
|
58
|
+
for (const topic of response.data.RelatedTopics.slice(0, this.maxResults)) {
|
|
59
|
+
if (topic.Text && topic.FirstURL) {
|
|
60
|
+
results.push({
|
|
61
|
+
pageContent: topic.Text,
|
|
62
|
+
metadata: {
|
|
63
|
+
title: topic.Text.split(' - ')[0],
|
|
64
|
+
url: topic.FirstURL,
|
|
65
|
+
source: 'duckduckgo',
|
|
66
|
+
type: 'related',
|
|
67
|
+
},
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return results;
|
|
73
|
+
}
|
|
74
|
+
catch (error) {
|
|
75
|
+
this.logger.error(`Search failed: ${error.message}`);
|
|
76
|
+
throw new Error(`Search failed: ${error.message}`);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
exports.SearchNode = SearchNode;
|
|
81
|
+
//# sourceMappingURL=SearchNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SearchNode.js","sourceRoot":"","sources":["../../src/nodes/SearchNode.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;AAEH,kDAA0B;AAC1B,yCAAsC;AAGtC,MAAa,UAAW,SAAQ,mBAAQ;IAItC,YAAY,KAAa,EAAE,MAAgB,EAAE,SAAqB,EAAE;QAClE,KAAK,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,CAAC,CAAC;QAClD,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;QACrB,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;IACnC,CAAC;IAES,KAAK,CAAC,OAAO,CAAC,KAAiB;QACvC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,KAAK,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QAElC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,IAAI,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAC9C,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,kBAAkB,KAAK,EAAE,CAAC,CAAC;QAE5C,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;QAEnD,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,SAAS,OAAO,CAAC,MAAM,iBAAiB,CAAC,CAAC;QAE9D,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE;YAC7B,aAAa,EAAE,OAAO;SACvB,CAAC,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,gBAAgB,CAAC,KAAa;QAC1C,IAAI,CAAC;YACH,wCAAwC;YACxC,MAAM,QAAQ,GAAG,MAAM,eAAK,CAAC,GAAG,CAAC,6BAA6B,EAAE;gBAC9D,MAAM,EAAE;oBACN,CAAC,EAAE,KAAK;oBACR,MAAM,EAAE,MAAM;oBACd,OAAO,EAAE,CAAC;oBACV,aAAa,EAAE,CAAC;iBACjB;gBACD,OAAO,EAAE,KAAK;aACf,CAAC,CAAC;YAEH,MAAM,OAAO,GAAe,EAAE,CAAC;YAE/B,iCAAiC;YACjC,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAC3B,OAAO,CAAC,IAAI,CAAC;oBACX,WAAW,EAAE,QAAQ,CAAC,IAAI,CAAC,QAAQ;oBACnC,QAAQ,EAAE;wBACR,KAAK,EAAE,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,KAAK;wBACrC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC,WAAW;wBAC9B,MAAM,EAAE,YAAY;wBACpB,IAAI,EAAE,UAAU;qBACjB;iBACF,CAAC,CAAC;YACL,CAAC;YAED,qBAAqB;YACrB,IAAI,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;gBAChC,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;oBAC1E,IAAI,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;wBACjC,OAAO,CAAC,IAAI,CAAC;4BACX,WAAW,EAAE,KAAK,CAAC,IAAI;4BACvB,QAAQ,EAAE;gCACR,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gCACjC,GAAG,EAAE,KAAK,CAAC,QAAQ;gCACnB,MAAM,EAAE,YAAY;gCACpB,IAAI,EAAE,SAAS;6BAChB;yBACF,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;YAED,OAAO,OAAO,CAAC;QACjB,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,kBAAkB,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;YACrD,MAAM,IAAI,KAAK,CAAC,kBAAkB,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;CACF;AAhFD,gCAgFC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* XMLScraperNode - Parse and extract data from XML documents
|
|
3
|
+
*/
|
|
4
|
+
import { BaseNode } from './BaseNode';
|
|
5
|
+
import { GraphState, NodeConfig } from '../types';
|
|
6
|
+
export declare class XMLScraperNode extends BaseNode {
|
|
7
|
+
private parser;
|
|
8
|
+
constructor(input: string, output: string[], config?: NodeConfig);
|
|
9
|
+
protected process(state: GraphState): Promise<GraphState>;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=XMLScraperNode.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"XMLScraperNode.d.ts","sourceRoot":"","sources":["../../src/nodes/XMLScraperNode.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,EAAE,UAAU,EAAY,UAAU,EAAE,MAAM,UAAU,CAAC;AAI5D,qBAAa,cAAe,SAAQ,QAAQ;IAC1C,OAAO,CAAC,MAAM,CAAY;gBAEd,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,MAAM,GAAE,UAAe;cAcpD,OAAO,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;CAgDhE"}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* XMLScraperNode - Parse and extract data from XML documents
|
|
4
|
+
*/
|
|
5
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
6
|
+
if (k2 === undefined) k2 = k;
|
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
8
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
9
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
10
|
+
}
|
|
11
|
+
Object.defineProperty(o, k2, desc);
|
|
12
|
+
}) : (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
o[k2] = m[k];
|
|
15
|
+
}));
|
|
16
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
17
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
18
|
+
}) : function(o, v) {
|
|
19
|
+
o["default"] = v;
|
|
20
|
+
});
|
|
21
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
22
|
+
var ownKeys = function(o) {
|
|
23
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
24
|
+
var ar = [];
|
|
25
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
26
|
+
return ar;
|
|
27
|
+
};
|
|
28
|
+
return ownKeys(o);
|
|
29
|
+
};
|
|
30
|
+
return function (mod) {
|
|
31
|
+
if (mod && mod.__esModule) return mod;
|
|
32
|
+
var result = {};
|
|
33
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
34
|
+
__setModuleDefault(result, mod);
|
|
35
|
+
return result;
|
|
36
|
+
};
|
|
37
|
+
})();
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.XMLScraperNode = void 0;
|
|
40
|
+
const BaseNode_1 = require("./BaseNode");
|
|
41
|
+
const fs = __importStar(require("fs"));
|
|
42
|
+
const fast_xml_parser_1 = require("fast-xml-parser");
|
|
43
|
+
class XMLScraperNode extends BaseNode_1.BaseNode {
|
|
44
|
+
constructor(input, output, config = {}) {
|
|
45
|
+
super('XMLScraper', 'node', input, output, 1, config);
|
|
46
|
+
// Initialize XML parser with options
|
|
47
|
+
this.parser = new fast_xml_parser_1.XMLParser({
|
|
48
|
+
ignoreAttributes: false,
|
|
49
|
+
attributeNamePrefix: '@_',
|
|
50
|
+
textNodeName: '#text',
|
|
51
|
+
parseTagValue: true,
|
|
52
|
+
parseAttributeValue: true,
|
|
53
|
+
trimValues: true,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
async process(state) {
|
|
57
|
+
const inputKeys = this.getInputKeys(state);
|
|
58
|
+
const source = state[inputKeys[0]];
|
|
59
|
+
if (!source) {
|
|
60
|
+
throw new Error('XML source (path or content) is required');
|
|
61
|
+
}
|
|
62
|
+
this.logger.info(`Parsing XML from: ${source}`);
|
|
63
|
+
try {
|
|
64
|
+
let xmlContent;
|
|
65
|
+
// Check if source is a file path or XML content
|
|
66
|
+
if (source.startsWith('<')) {
|
|
67
|
+
// It's XML content
|
|
68
|
+
xmlContent = source;
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
// It's a file path
|
|
72
|
+
xmlContent = fs.readFileSync(source, 'utf-8');
|
|
73
|
+
}
|
|
74
|
+
// Parse XML to JavaScript object
|
|
75
|
+
const parsedData = this.parser.parse(xmlContent);
|
|
76
|
+
// Convert to JSON string for easier processing
|
|
77
|
+
const jsonContent = JSON.stringify(parsedData, null, 2);
|
|
78
|
+
const document = {
|
|
79
|
+
pageContent: jsonContent,
|
|
80
|
+
metadata: {
|
|
81
|
+
source,
|
|
82
|
+
type: 'xml',
|
|
83
|
+
originalXml: xmlContent.slice(0, 1000), // Store first 1000 chars
|
|
84
|
+
extractedAt: new Date().toISOString(),
|
|
85
|
+
},
|
|
86
|
+
};
|
|
87
|
+
this.logger.success(`Parsed XML successfully (${xmlContent.length} characters)`);
|
|
88
|
+
return this.updateState(state, {
|
|
89
|
+
doc: [document],
|
|
90
|
+
xmlData: parsedData, // Also store parsed object for direct access
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
catch (error) {
|
|
94
|
+
throw new Error(`Failed to parse XML: ${error.message}`);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
exports.XMLScraperNode = XMLScraperNode;
|
|
99
|
+
//# sourceMappingURL=XMLScraperNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"XMLScraperNode.js","sourceRoot":"","sources":["../../src/nodes/XMLScraperNode.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,yCAAsC;AAEtC,uCAAyB;AACzB,qDAA4C;AAE5C,MAAa,cAAe,SAAQ,mBAAQ;IAG1C,YAAY,KAAa,EAAE,MAAgB,EAAE,SAAqB,EAAE;QAClE,KAAK,CAAC,YAAY,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,CAAC,CAAC;QAEtD,qCAAqC;QACrC,IAAI,CAAC,MAAM,GAAG,IAAI,2BAAS,CAAC;YAC1B,gBAAgB,EAAE,KAAK;YACvB,mBAAmB,EAAE,IAAI;YACzB,YAAY,EAAE,OAAO;YACrB,aAAa,EAAE,IAAI;YACnB,mBAAmB,EAAE,IAAI;YACzB,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAES,KAAK,CAAC,OAAO,CAAC,KAAiB;QACvC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QAEnC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC;QAC9D,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,qBAAqB,MAAM,EAAE,CAAC,CAAC;QAEhD,IAAI,CAAC;YACH,IAAI,UAAkB,CAAC;YAEvB,gDAAgD;YAChD,IAAI,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC3B,mBAAmB;gBACnB,UAAU,GAAG,MAAM,CAAC;YACtB,CAAC;iBAAM,CAAC;gBACN,mBAAmB;gBACnB,UAAU,GAAG,EAAE,CAAC,YAAY,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YAChD,CAAC;YAED,iCAAiC;YACjC,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;YAEjD,+CAA+C;YAC/C,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YAExD,MAAM,QAAQ,GAAa;gBACzB,WAAW,EAAE,WAAW;gBACxB,QAAQ,EAAE;oBACR,MAAM;oBACN,IAAI,EAAE,KAAK;oBACX,WAAW,EAAE,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,yBAAyB;oBACjE,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;iBACtC;aACF,CAAC;YAEF,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,4BAA4B,UAAU,CAAC,MAAM,cAAc,CAAC,CAAC;YAEjF,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE;gBAC7B,GAAG,EAAE,CAAC,QAAQ,CAAC;gBACf,OAAO,EAAE,UAAU,EAAE,6CAA6C;aACnE,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,wBAAwB,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC;CACF;AAjED,wCAiEC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Nodes index
|
|
3
|
+
*/
|
|
4
|
+
export { BaseNode } from './BaseNode';
|
|
5
|
+
export { FetchNode } from './FetchNode';
|
|
6
|
+
export { ParseNode } from './ParseNode';
|
|
7
|
+
export { GenerateAnswerNode } from './GenerateAnswerNode';
|
|
8
|
+
export { ConditionalNode } from './ConditionalNode';
|
|
9
|
+
export { SearchNode } from './SearchNode';
|
|
10
|
+
export { RAGNode } from './RAGNode';
|
|
11
|
+
export { ReasoningNode } from './ReasoningNode';
|
|
12
|
+
export { MergeNode } from './MergeNode';
|
|
13
|
+
export { CSVExporterNode } from './CSVExporterNode';
|
|
14
|
+
export { JSONExporterNode } from './JSONExporterNode';
|
|
15
|
+
export { PDFScraperNode } from './PDFScraperNode';
|
|
16
|
+
export { XMLScraperNode } from './XMLScraperNode';
|
|
17
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/nodes/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC1C,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAClD,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Nodes index
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.XMLScraperNode = exports.PDFScraperNode = exports.JSONExporterNode = exports.CSVExporterNode = exports.MergeNode = exports.ReasoningNode = exports.RAGNode = exports.SearchNode = exports.ConditionalNode = exports.GenerateAnswerNode = exports.ParseNode = exports.FetchNode = exports.BaseNode = void 0;
|
|
7
|
+
var BaseNode_1 = require("./BaseNode");
|
|
8
|
+
Object.defineProperty(exports, "BaseNode", { enumerable: true, get: function () { return BaseNode_1.BaseNode; } });
|
|
9
|
+
var FetchNode_1 = require("./FetchNode");
|
|
10
|
+
Object.defineProperty(exports, "FetchNode", { enumerable: true, get: function () { return FetchNode_1.FetchNode; } });
|
|
11
|
+
var ParseNode_1 = require("./ParseNode");
|
|
12
|
+
Object.defineProperty(exports, "ParseNode", { enumerable: true, get: function () { return ParseNode_1.ParseNode; } });
|
|
13
|
+
var GenerateAnswerNode_1 = require("./GenerateAnswerNode");
|
|
14
|
+
Object.defineProperty(exports, "GenerateAnswerNode", { enumerable: true, get: function () { return GenerateAnswerNode_1.GenerateAnswerNode; } });
|
|
15
|
+
var ConditionalNode_1 = require("./ConditionalNode");
|
|
16
|
+
Object.defineProperty(exports, "ConditionalNode", { enumerable: true, get: function () { return ConditionalNode_1.ConditionalNode; } });
|
|
17
|
+
var SearchNode_1 = require("./SearchNode");
|
|
18
|
+
Object.defineProperty(exports, "SearchNode", { enumerable: true, get: function () { return SearchNode_1.SearchNode; } });
|
|
19
|
+
var RAGNode_1 = require("./RAGNode");
|
|
20
|
+
Object.defineProperty(exports, "RAGNode", { enumerable: true, get: function () { return RAGNode_1.RAGNode; } });
|
|
21
|
+
var ReasoningNode_1 = require("./ReasoningNode");
|
|
22
|
+
Object.defineProperty(exports, "ReasoningNode", { enumerable: true, get: function () { return ReasoningNode_1.ReasoningNode; } });
|
|
23
|
+
var MergeNode_1 = require("./MergeNode");
|
|
24
|
+
Object.defineProperty(exports, "MergeNode", { enumerable: true, get: function () { return MergeNode_1.MergeNode; } });
|
|
25
|
+
var CSVExporterNode_1 = require("./CSVExporterNode");
|
|
26
|
+
Object.defineProperty(exports, "CSVExporterNode", { enumerable: true, get: function () { return CSVExporterNode_1.CSVExporterNode; } });
|
|
27
|
+
var JSONExporterNode_1 = require("./JSONExporterNode");
|
|
28
|
+
Object.defineProperty(exports, "JSONExporterNode", { enumerable: true, get: function () { return JSONExporterNode_1.JSONExporterNode; } });
|
|
29
|
+
var PDFScraperNode_1 = require("./PDFScraperNode");
|
|
30
|
+
Object.defineProperty(exports, "PDFScraperNode", { enumerable: true, get: function () { return PDFScraperNode_1.PDFScraperNode; } });
|
|
31
|
+
var XMLScraperNode_1 = require("./XMLScraperNode");
|
|
32
|
+
Object.defineProperty(exports, "XMLScraperNode", { enumerable: true, get: function () { return XMLScraperNode_1.XMLScraperNode; } });
|
|
33
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/nodes/index.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAEH,uCAAsC;AAA7B,oGAAA,QAAQ,OAAA;AACjB,yCAAwC;AAA/B,sGAAA,SAAS,OAAA;AAClB,yCAAwC;AAA/B,sGAAA,SAAS,OAAA;AAClB,2DAA0D;AAAjD,wHAAA,kBAAkB,OAAA;AAC3B,qDAAoD;AAA3C,kHAAA,eAAe,OAAA;AACxB,2CAA0C;AAAjC,wGAAA,UAAU,OAAA;AACnB,qCAAoC;AAA3B,kGAAA,OAAO,OAAA;AAChB,iDAAgD;AAAvC,8GAAA,aAAa,OAAA;AACtB,yCAAwC;AAA/B,sGAAA,SAAS,OAAA;AAClB,qDAAoD;AAA3C,kHAAA,eAAe,OAAA;AACxB,uDAAsD;AAA7C,oHAAA,gBAAgB,OAAA;AACzB,mDAAkD;AAAzC,gHAAA,cAAc,OAAA;AACvB,mDAAkD;AAAzC,gHAAA,cAAc,OAAA"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt templates for different scraping scenarios
|
|
3
|
+
*/
|
|
4
|
+
export declare const TEMPLATE_NO_CHUNKS = "\nYou are a web scraping expert. Your task is to extract specific information from the provided HTML content based on the user's request.\n\nUser Request: {user_prompt}\n\nHTML Content:\n{content}\n\nInstructions:\n1. Carefully read the user's request\n2. Extract ONLY the requested information from the HTML content\n3. Return the data in a structured JSON format\n4. If information is not found, return null for that field\n5. Be precise and accurate\n\nRespond with valid JSON only.\n";
|
|
5
|
+
export declare const TEMPLATE_CHUNKS = "\nYou are a web scraping expert. Your task is to extract specific information from the provided content chunks based on the user's request.\n\nUser Request: {user_prompt}\n\nContent Chunks:\n{chunks}\n\nInstructions:\n1. Analyze all provided chunks\n2. Extract ONLY the requested information\n3. Combine information from multiple chunks if needed\n4. Return the data in a structured JSON format\n5. If information is not found, return null for that field\n\nRespond with valid JSON only.\n";
|
|
6
|
+
export declare const TEMPLATE_MERGE = "\nYou are a data merging expert. You have received multiple JSON responses from different content chunks. Your task is to merge them into a single, coherent response.\n\nUser Request: {user_prompt}\n\nResponses to Merge:\n{responses}\n\nInstructions:\n1. Merge all responses into a single JSON object\n2. Remove duplicates\n3. Ensure consistency across all fields\n4. Maintain the original structure\n5. If there are conflicts, prefer the most complete information\n\nRespond with valid JSON only.\n";
|
|
7
|
+
export declare const TEMPLATE_NO_CHUNKS_MD = "\nYou are a web scraping expert. Your task is to extract specific information from the provided Markdown content based on the user's request.\n\nUser Request: {user_prompt}\n\nMarkdown Content:\n{content}\n\nInstructions:\n1. Carefully read the user's request\n2. Extract ONLY the requested information from the Markdown content\n3. Return the data in a structured JSON format\n4. If information is not found, return null for that field\n5. Be precise and accurate\n\nRespond with valid JSON only.\n";
|
|
8
|
+
export declare const TEMPLATE_CHUNKS_MD = "\nYou are a web scraping expert. Your task is to extract specific information from the provided Markdown chunks based on the user's request.\n\nUser Request: {user_prompt}\n\nMarkdown Chunks:\n{chunks}\n\nInstructions:\n1. Analyze all provided chunks\n2. Extract ONLY the requested information\n3. Combine information from multiple chunks if needed\n4. Return the data in a structured JSON format\n5. If information is not found, return null for that field\n\nRespond with valid JSON only.\n";
|
|
9
|
+
export declare const TEMPLATE_MERGE_MD = "\nYou are a data merging expert. You have received multiple JSON responses from different content chunks. Your task is to merge them into a single, coherent response.\n\nUser Request: {user_prompt}\n\nResponses to Merge:\n{responses}\n\nInstructions:\n1. Merge all responses into a single JSON object\n2. Remove duplicates\n3. Ensure consistency across all fields\n4. Maintain the original structure\n5. If there are conflicts, prefer the most complete information\n\nRespond with valid JSON only.\n";
|
|
10
|
+
export declare const REASONING_TEMPLATE = "\nYou are an AI reasoning expert. Analyze the following content and user request to determine the best approach for extracting the requested information.\n\nUser Request: {user_prompt}\n\nContent:\n{content}\n\nInstructions:\n1. Identify what information the user is looking for\n2. Determine which parts of the content are relevant\n3. Suggest a strategy for extracting this information\n4. Consider edge cases and potential issues\n\nProvide your reasoning and strategy.\n";
|
|
11
|
+
export declare function formatPrompt(template: string, variables: Record<string, string>): string;
|
|
12
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/prompts/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,eAAO,MAAM,kBAAkB,4eAgB9B,CAAC;AAEF,eAAO,MAAM,eAAe,8eAgB3B,CAAC;AAEF,eAAO,MAAM,cAAc,wfAgB1B,CAAC;AAEF,eAAO,MAAM,qBAAqB,wfAgBjC,CAAC;AAEF,eAAO,MAAM,kBAAkB,gfAgB9B,CAAC;AAEF,eAAO,MAAM,iBAAiB,wfAAiB,CAAC;AAEhD,eAAO,MAAM,kBAAkB,+dAe9B,CAAC;AAEF,wBAAgB,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,MAAM,CAQxF"}
|