cogniscrape 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +308 -0
- package/dist/graphs/AbstractGraph.d.ts +27 -0
- package/dist/graphs/AbstractGraph.d.ts.map +1 -0
- package/dist/graphs/AbstractGraph.js +44 -0
- package/dist/graphs/AbstractGraph.js.map +1 -0
- package/dist/graphs/BaseGraph.d.ts +30 -0
- package/dist/graphs/BaseGraph.d.ts.map +1 -0
- package/dist/graphs/BaseGraph.js +62 -0
- package/dist/graphs/BaseGraph.js.map +1 -0
- package/dist/graphs/CSVScraperGraph.d.ts +16 -0
- package/dist/graphs/CSVScraperGraph.d.ts.map +1 -0
- package/dist/graphs/CSVScraperGraph.js +84 -0
- package/dist/graphs/CSVScraperGraph.js.map +1 -0
- package/dist/graphs/DepthSearchGraph.d.ts +14 -0
- package/dist/graphs/DepthSearchGraph.d.ts.map +1 -0
- package/dist/graphs/DepthSearchGraph.js +45 -0
- package/dist/graphs/DepthSearchGraph.js.map +1 -0
- package/dist/graphs/JSONScraperGraph.d.ts +18 -0
- package/dist/graphs/JSONScraperGraph.d.ts.map +1 -0
- package/dist/graphs/JSONScraperGraph.js +100 -0
- package/dist/graphs/JSONScraperGraph.js.map +1 -0
- package/dist/graphs/SearchGraph.d.ts +14 -0
- package/dist/graphs/SearchGraph.d.ts.map +1 -0
- package/dist/graphs/SearchGraph.js +42 -0
- package/dist/graphs/SearchGraph.js.map +1 -0
- package/dist/graphs/SmartScraperGraph.d.ts +16 -0
- package/dist/graphs/SmartScraperGraph.d.ts.map +1 -0
- package/dist/graphs/SmartScraperGraph.js +57 -0
- package/dist/graphs/SmartScraperGraph.js.map +1 -0
- package/dist/graphs/SmartScraperMultiGraph.d.ts +17 -0
- package/dist/graphs/SmartScraperMultiGraph.d.ts.map +1 -0
- package/dist/graphs/SmartScraperMultiGraph.js +71 -0
- package/dist/graphs/SmartScraperMultiGraph.js.map +1 -0
- package/dist/graphs/index.d.ts +12 -0
- package/dist/graphs/index.d.ts.map +1 -0
- package/dist/graphs/index.js +23 -0
- package/dist/graphs/index.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +43 -0
- package/dist/index.js.map +1 -0
- package/dist/models/GeminiModel.d.ts +16 -0
- package/dist/models/GeminiModel.d.ts.map +1 -0
- package/dist/models/GeminiModel.js +127 -0
- package/dist/models/GeminiModel.js.map +1 -0
- package/dist/models/OllamaModel.d.ts +15 -0
- package/dist/models/OllamaModel.d.ts.map +1 -0
- package/dist/models/OllamaModel.js +134 -0
- package/dist/models/OllamaModel.js.map +1 -0
- package/dist/models/index.d.ts +8 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +24 -0
- package/dist/models/index.js.map +1 -0
- package/dist/nodes/BaseNode.d.ts +37 -0
- package/dist/nodes/BaseNode.d.ts.map +1 -0
- package/dist/nodes/BaseNode.js +116 -0
- package/dist/nodes/BaseNode.js.map +1 -0
- package/dist/nodes/CSVExporterNode.d.ts +16 -0
- package/dist/nodes/CSVExporterNode.d.ts.map +1 -0
- package/dist/nodes/CSVExporterNode.js +85 -0
- package/dist/nodes/CSVExporterNode.js.map +1 -0
- package/dist/nodes/ConditionalNode.d.ts +16 -0
- package/dist/nodes/ConditionalNode.d.ts.map +1 -0
- package/dist/nodes/ConditionalNode.js +68 -0
- package/dist/nodes/ConditionalNode.js.map +1 -0
- package/dist/nodes/FetchNode.d.ts +15 -0
- package/dist/nodes/FetchNode.d.ts.map +1 -0
- package/dist/nodes/FetchNode.js +182 -0
- package/dist/nodes/FetchNode.js.map +1 -0
- package/dist/nodes/GenerateAnswerNode.d.ts +14 -0
- package/dist/nodes/GenerateAnswerNode.d.ts.map +1 -0
- package/dist/nodes/GenerateAnswerNode.js +86 -0
- package/dist/nodes/GenerateAnswerNode.js.map +1 -0
- package/dist/nodes/JSONExporterNode.d.ts +16 -0
- package/dist/nodes/JSONExporterNode.d.ts.map +1 -0
- package/dist/nodes/JSONExporterNode.js +42 -0
- package/dist/nodes/JSONExporterNode.js.map +1 -0
- package/dist/nodes/MergeNode.d.ts +10 -0
- package/dist/nodes/MergeNode.d.ts.map +1 -0
- package/dist/nodes/MergeNode.js +51 -0
- package/dist/nodes/MergeNode.js.map +1 -0
- package/dist/nodes/PDFScraperNode.d.ts +10 -0
- package/dist/nodes/PDFScraperNode.d.ts.map +1 -0
- package/dist/nodes/PDFScraperNode.js +80 -0
- package/dist/nodes/PDFScraperNode.js.map +1 -0
- package/dist/nodes/ParseNode.d.ts +12 -0
- package/dist/nodes/ParseNode.d.ts.map +1 -0
- package/dist/nodes/ParseNode.js +44 -0
- package/dist/nodes/ParseNode.js.map +1 -0
- package/dist/nodes/RAGNode.d.ts +13 -0
- package/dist/nodes/RAGNode.d.ts.map +1 -0
- package/dist/nodes/RAGNode.js +64 -0
- package/dist/nodes/RAGNode.js.map +1 -0
- package/dist/nodes/ReasoningNode.d.ts +10 -0
- package/dist/nodes/ReasoningNode.d.ts.map +1 -0
- package/dist/nodes/ReasoningNode.js +51 -0
- package/dist/nodes/ReasoningNode.js.map +1 -0
- package/dist/nodes/SearchNode.d.ts +13 -0
- package/dist/nodes/SearchNode.d.ts.map +1 -0
- package/dist/nodes/SearchNode.js +81 -0
- package/dist/nodes/SearchNode.js.map +1 -0
- package/dist/nodes/XMLScraperNode.d.ts +11 -0
- package/dist/nodes/XMLScraperNode.d.ts.map +1 -0
- package/dist/nodes/XMLScraperNode.js +99 -0
- package/dist/nodes/XMLScraperNode.js.map +1 -0
- package/dist/nodes/index.d.ts +17 -0
- package/dist/nodes/index.d.ts.map +1 -0
- package/dist/nodes/index.js +33 -0
- package/dist/nodes/index.js.map +1 -0
- package/dist/prompts/index.d.ts +12 -0
- package/dist/prompts/index.d.ts.map +1 -0
- package/dist/prompts/index.js +117 -0
- package/dist/prompts/index.js.map +1 -0
- package/dist/types.d.ts +106 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +13 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/cache.d.ts +28 -0
- package/dist/utils/cache.d.ts.map +1 -0
- package/dist/utils/cache.js +72 -0
- package/dist/utils/cache.js.map +1 -0
- package/dist/utils/chunking.d.ts +8 -0
- package/dist/utils/chunking.d.ts.map +1 -0
- package/dist/utils/chunking.js +51 -0
- package/dist/utils/chunking.js.map +1 -0
- package/dist/utils/cleanupHtml.d.ts +7 -0
- package/dist/utils/cleanupHtml.d.ts.map +1 -0
- package/dist/utils/cleanupHtml.js +81 -0
- package/dist/utils/cleanupHtml.js.map +1 -0
- package/dist/utils/convertToMarkdown.d.ts +6 -0
- package/dist/utils/convertToMarkdown.d.ts.map +1 -0
- package/dist/utils/convertToMarkdown.js +61 -0
- package/dist/utils/convertToMarkdown.js.map +1 -0
- package/dist/utils/index.d.ts +13 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +40 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/logger.d.ts +14 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +35 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/proxy.d.ts +30 -0
- package/dist/utils/proxy.d.ts.map +1 -0
- package/dist/utils/proxy.js +62 -0
- package/dist/utils/proxy.js.map +1 -0
- package/dist/utils/rateLimiter.d.ts +24 -0
- package/dist/utils/rateLimiter.d.ts.map +1 -0
- package/dist/utils/rateLimiter.js +61 -0
- package/dist/utils/rateLimiter.js.map +1 -0
- package/dist/utils/retry.d.ts +17 -0
- package/dist/utils/retry.d.ts.map +1 -0
- package/dist/utils/retry.js +43 -0
- package/dist/utils/retry.js.map +1 -0
- package/dist/utils/schemaValidator.d.ts +69 -0
- package/dist/utils/schemaValidator.d.ts.map +1 -0
- package/dist/utils/schemaValidator.js +133 -0
- package/dist/utils/schemaValidator.js.map +1 -0
- package/package.json +64 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ConditionalNode - Execute conditional logic in the graph
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.ConditionalNode = void 0;
|
|
7
|
+
const BaseNode_1 = require("./BaseNode");
|
|
8
|
+
class ConditionalNode extends BaseNode_1.BaseNode {
|
|
9
|
+
constructor(input, output, config) {
|
|
10
|
+
super('Conditional', 'conditional_node', input, output, 1, config);
|
|
11
|
+
if (!config.condition) {
|
|
12
|
+
throw new Error('ConditionalNode requires a condition');
|
|
13
|
+
}
|
|
14
|
+
if (!config.keyName) {
|
|
15
|
+
throw new Error('ConditionalNode requires a keyName');
|
|
16
|
+
}
|
|
17
|
+
this.condition = config.condition;
|
|
18
|
+
this.keyName = config.keyName;
|
|
19
|
+
}
|
|
20
|
+
async process(state) {
|
|
21
|
+
const value = state[this.keyName];
|
|
22
|
+
this.logger.info(`Evaluating condition: ${this.condition}`);
|
|
23
|
+
// Evaluate condition
|
|
24
|
+
const conditionMet = this.evaluateCondition(value, this.condition);
|
|
25
|
+
this.logger.info(`Condition result: ${conditionMet}`);
|
|
26
|
+
return this.updateState(state, {
|
|
27
|
+
[`${this.keyName}_condition_met`]: conditionMet,
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
evaluateCondition(value, condition) {
|
|
31
|
+
try {
|
|
32
|
+
// Simple condition evaluation
|
|
33
|
+
// Supports: ==, !=, >, <, >=, <=, includes, not, and, or
|
|
34
|
+
// Check for "not" conditions
|
|
35
|
+
if (condition.includes('not ')) {
|
|
36
|
+
const innerCondition = condition.replace('not ', '').trim();
|
|
37
|
+
return !this.evaluateCondition(value, innerCondition);
|
|
38
|
+
}
|
|
39
|
+
// Check for null/undefined
|
|
40
|
+
if (condition === 'null' || condition === 'undefined') {
|
|
41
|
+
return value == null;
|
|
42
|
+
}
|
|
43
|
+
// Check for empty
|
|
44
|
+
if (condition === 'empty') {
|
|
45
|
+
return !value || value.length === 0 || Object.keys(value).length === 0;
|
|
46
|
+
}
|
|
47
|
+
// Check for NA
|
|
48
|
+
if (condition.includes('=="NA"') || condition.includes("=='NA'")) {
|
|
49
|
+
return value === 'NA';
|
|
50
|
+
}
|
|
51
|
+
// Check for string includes
|
|
52
|
+
if (condition.includes('includes')) {
|
|
53
|
+
const match = condition.match(/includes\(['"](.+)['"]\)/);
|
|
54
|
+
if (match) {
|
|
55
|
+
return String(value).includes(match[1]);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// Default: check if value exists and is truthy
|
|
59
|
+
return !!value;
|
|
60
|
+
}
|
|
61
|
+
catch (error) {
|
|
62
|
+
this.logger.warn(`Condition evaluation error: ${error}`);
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
exports.ConditionalNode = ConditionalNode;
|
|
68
|
+
//# sourceMappingURL=ConditionalNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ConditionalNode.js","sourceRoot":"","sources":["../../src/nodes/ConditionalNode.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAEH,yCAAsC;AAGtC,MAAa,eAAgB,SAAQ,mBAAQ;IAI3C,YAAY,KAAa,EAAE,MAAgB,EAAE,MAA2D;QACtG,KAAK,CAAC,aAAa,EAAE,kBAAkB,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,CAAC,CAAC;QAEnE,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC1D,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;QACxD,CAAC;QAED,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;QAClC,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;IAChC,CAAC;IAES,KAAK,CAAC,OAAO,CAAC,KAAiB;QACvC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAElC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,yBAAyB,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;QAE5D,qBAAqB;QACrB,MAAM,YAAY,GAAG,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QAEnE,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,qBAAqB,YAAY,EAAE,CAAC,CAAC;QAEtD,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE;YAC7B,CAAC,GAAG,IAAI,CAAC,OAAO,gBAAgB,CAAC,EAAE,YAAY;SAChD,CAAC,CAAC;IACL,CAAC;IAEO,iBAAiB,CAAC,KAAU,EAAE,SAAiB;QACrD,IAAI,CAAC;YACH,8BAA8B;YAC9B,yDAAyD;YAEzD,6BAA6B;YAC7B,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC/B,MAAM,cAAc,GAAG,SAAS,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC5D,OAAO,CAAC,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC;YACxD,CAAC;YAED,2BAA2B;YAC3B,IAAI,SAAS,KAAK,MAAM,IAAI,SAAS,KAAK,WAAW,EAAE,CAAC;gBACtD,OAAO,KAAK,IAAI,IAAI,CAAC;YACvB,CAAC;YAED,kBAAkB;YAClB,IAAI,SAAS,KAAK,OAAO,EAAE,CAAC;gBAC1B,OAAO,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC;YACzE,CAAC;YAED,eAAe;YACf,IAAI,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACjE,OAAO,KAAK,KAAK,IAAI,CAAC;YACxB,CAAC;YAED,4BAA4B;YAC5B,IAAI,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;gBACnC,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;gBAC1D,IAAI,KAAK,EAAE,CAAC;oBACV,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1C,CAAC;YACH,CAAC;YAED,+CAA+C;YAC/C,OAAO,CAAC,CAAC,KAAK,CAAC;QACjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,+BAA+B,KAAK,EAAE,CAAC,CAAC;YACzD,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;CACF;AA1ED,0CA0EC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FetchNode - Fetches content from URLs or local files
|
|
3
|
+
*/
|
|
4
|
+
import { BaseNode } from './BaseNode';
|
|
5
|
+
import { GraphState, NodeConfig } from '../types';
|
|
6
|
+
export declare class FetchNode extends BaseNode {
|
|
7
|
+
private headless;
|
|
8
|
+
private timeout;
|
|
9
|
+
private cut;
|
|
10
|
+
constructor(input: string, output: string[], config?: NodeConfig);
|
|
11
|
+
protected process(state: GraphState): Promise<GraphState>;
|
|
12
|
+
private fetchFromUrl;
|
|
13
|
+
private fetchFromLocal;
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=FetchNode.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"FetchNode.d.ts","sourceRoot":"","sources":["../../src/nodes/FetchNode.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,EAAE,UAAU,EAAY,UAAU,EAAE,MAAM,UAAU,CAAC;AAO5D,qBAAa,SAAU,SAAQ,QAAQ;IACrC,OAAO,CAAC,QAAQ,CAAU;IAC1B,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,GAAG,CAAU;gBAET,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,MAAM,GAAE,UAAe;cAOpD,OAAO,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;YAejD,YAAY;YAkGZ,cAAc;CAsC7B"}
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* FetchNode - Fetches content from URLs or local files
|
|
4
|
+
*/
|
|
5
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
6
|
+
if (k2 === undefined) k2 = k;
|
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
8
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
9
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
10
|
+
}
|
|
11
|
+
Object.defineProperty(o, k2, desc);
|
|
12
|
+
}) : (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
o[k2] = m[k];
|
|
15
|
+
}));
|
|
16
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
17
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
18
|
+
}) : function(o, v) {
|
|
19
|
+
o["default"] = v;
|
|
20
|
+
});
|
|
21
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
22
|
+
var ownKeys = function(o) {
|
|
23
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
24
|
+
var ar = [];
|
|
25
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
26
|
+
return ar;
|
|
27
|
+
};
|
|
28
|
+
return ownKeys(o);
|
|
29
|
+
};
|
|
30
|
+
return function (mod) {
|
|
31
|
+
if (mod && mod.__esModule) return mod;
|
|
32
|
+
var result = {};
|
|
33
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
34
|
+
__setModuleDefault(result, mod);
|
|
35
|
+
return result;
|
|
36
|
+
};
|
|
37
|
+
})();
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.FetchNode = void 0;
|
|
40
|
+
const playwright_1 = require("playwright");
|
|
41
|
+
const fs = __importStar(require("fs"));
|
|
42
|
+
const path = __importStar(require("path"));
|
|
43
|
+
const BaseNode_1 = require("./BaseNode");
|
|
44
|
+
const cleanupHtml_1 = require("../utils/cleanupHtml");
|
|
45
|
+
const retry_1 = require("../utils/retry");
|
|
46
|
+
const rateLimiter_1 = require("../utils/rateLimiter");
|
|
47
|
+
const cache_1 = require("../utils/cache");
|
|
48
|
+
const proxy_1 = require("../utils/proxy");
|
|
49
|
+
class FetchNode extends BaseNode_1.BaseNode {
|
|
50
|
+
constructor(input, output, config = {}) {
|
|
51
|
+
super('Fetch', 'node', input, output, 1, config);
|
|
52
|
+
this.headless = config.headless ?? true;
|
|
53
|
+
this.timeout = config.timeout ?? 30000;
|
|
54
|
+
this.cut = config.cut ?? true;
|
|
55
|
+
}
|
|
56
|
+
async process(state) {
|
|
57
|
+
const inputKeys = this.getInputKeys(state);
|
|
58
|
+
const source = state[inputKeys[0]];
|
|
59
|
+
let documents;
|
|
60
|
+
if (source.startsWith('http://') || source.startsWith('https://')) {
|
|
61
|
+
documents = await this.fetchFromUrl(source);
|
|
62
|
+
}
|
|
63
|
+
else {
|
|
64
|
+
documents = await this.fetchFromLocal(source);
|
|
65
|
+
}
|
|
66
|
+
return this.updateState(state, { doc: documents });
|
|
67
|
+
}
|
|
68
|
+
async fetchFromUrl(url) {
|
|
69
|
+
// Check cache first
|
|
70
|
+
const cached = cache_1.cache.get(url);
|
|
71
|
+
if (cached) {
|
|
72
|
+
this.logger.info(`Using cached content for: ${url}`);
|
|
73
|
+
return cached;
|
|
74
|
+
}
|
|
75
|
+
this.logger.info(`Fetching content from URL: ${url}`);
|
|
76
|
+
// Rate limit the request
|
|
77
|
+
await rateLimiter_1.rateLimiter.wait();
|
|
78
|
+
// Retry with exponential backoff
|
|
79
|
+
const documents = await (0, retry_1.retry)(async () => {
|
|
80
|
+
const browser = await playwright_1.chromium.launch({ headless: this.headless });
|
|
81
|
+
try {
|
|
82
|
+
const contextOptions = {
|
|
83
|
+
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
|
84
|
+
};
|
|
85
|
+
// Use proxy if configured
|
|
86
|
+
if (this.config.proxyConfig?.enabled && this.config.proxyConfig.proxies) {
|
|
87
|
+
proxy_1.proxyRotator.setProxies(this.config.proxyConfig.proxies);
|
|
88
|
+
const proxy = proxy_1.proxyRotator.getNextProxy();
|
|
89
|
+
if (proxy) {
|
|
90
|
+
contextOptions.proxy = {
|
|
91
|
+
server: proxy,
|
|
92
|
+
};
|
|
93
|
+
this.logger.debug(`Using proxy: ${proxy}`);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
const context = await browser.newContext(contextOptions);
|
|
97
|
+
const page = await context.newPage();
|
|
98
|
+
// Set timeout
|
|
99
|
+
page.setDefaultTimeout(this.timeout);
|
|
100
|
+
// Navigate to URL
|
|
101
|
+
await page.goto(url, { waitUntil: 'networkidle' });
|
|
102
|
+
// Wait a bit for dynamic content
|
|
103
|
+
await page.waitForTimeout(2000);
|
|
104
|
+
// Get HTML content
|
|
105
|
+
const html = await page.content();
|
|
106
|
+
await browser.close();
|
|
107
|
+
// Clean up HTML
|
|
108
|
+
let cleanedHtml = (0, cleanupHtml_1.removeUnwantedTags)(html);
|
|
109
|
+
if (this.cut) {
|
|
110
|
+
cleanedHtml = (0, cleanupHtml_1.cleanupHtml)(cleanedHtml, true);
|
|
111
|
+
}
|
|
112
|
+
this.logger.success(`Fetched ${cleanedHtml.length} characters from ${url}`);
|
|
113
|
+
const result = [
|
|
114
|
+
{
|
|
115
|
+
pageContent: cleanedHtml,
|
|
116
|
+
metadata: {
|
|
117
|
+
source: url,
|
|
118
|
+
type: 'html',
|
|
119
|
+
fetchedAt: new Date().toISOString(),
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
];
|
|
123
|
+
// Cache the result
|
|
124
|
+
cache_1.cache.set(url, result, this.config.cacheConfig?.ttl);
|
|
125
|
+
return result;
|
|
126
|
+
}
|
|
127
|
+
catch (error) {
|
|
128
|
+
await browser.close();
|
|
129
|
+
// Mark proxy as failed if configured
|
|
130
|
+
if (this.config.proxyConfig?.enabled) {
|
|
131
|
+
proxy_1.proxyRotator.markProxyFailed();
|
|
132
|
+
}
|
|
133
|
+
throw error;
|
|
134
|
+
}
|
|
135
|
+
}, {
|
|
136
|
+
maxRetries: this.config.retryConfig?.maxRetries || 3,
|
|
137
|
+
initialDelay: this.config.retryConfig?.initialDelay || 1000,
|
|
138
|
+
maxDelay: this.config.retryConfig?.maxDelay || 10000,
|
|
139
|
+
backoffMultiplier: this.config.retryConfig?.backoffMultiplier || 2,
|
|
140
|
+
});
|
|
141
|
+
return documents;
|
|
142
|
+
}
|
|
143
|
+
async fetchFromLocal(localPath) {
|
|
144
|
+
this.logger.info(`Reading content from local file: ${localPath}`);
|
|
145
|
+
try {
|
|
146
|
+
if (!fs.existsSync(localPath)) {
|
|
147
|
+
throw new Error(`File not found: ${localPath}`);
|
|
148
|
+
}
|
|
149
|
+
const ext = path.extname(localPath).toLowerCase();
|
|
150
|
+
let content;
|
|
151
|
+
if (ext === '.html' || ext === '.htm') {
|
|
152
|
+
content = fs.readFileSync(localPath, 'utf-8');
|
|
153
|
+
content = this.cut ? (0, cleanupHtml_1.cleanupHtml)(content, true) : content;
|
|
154
|
+
}
|
|
155
|
+
else if (ext === '.txt' || ext === '.md') {
|
|
156
|
+
content = fs.readFileSync(localPath, 'utf-8');
|
|
157
|
+
}
|
|
158
|
+
else if (ext === '.json') {
|
|
159
|
+
content = fs.readFileSync(localPath, 'utf-8');
|
|
160
|
+
}
|
|
161
|
+
else {
|
|
162
|
+
throw new Error(`Unsupported file type: ${ext}`);
|
|
163
|
+
}
|
|
164
|
+
this.logger.success(`Read ${content.length} characters from ${localPath}`);
|
|
165
|
+
return [
|
|
166
|
+
{
|
|
167
|
+
pageContent: content,
|
|
168
|
+
metadata: {
|
|
169
|
+
source: localPath,
|
|
170
|
+
type: ext.slice(1),
|
|
171
|
+
fetchedAt: new Date().toISOString(),
|
|
172
|
+
},
|
|
173
|
+
},
|
|
174
|
+
];
|
|
175
|
+
}
|
|
176
|
+
catch (error) {
|
|
177
|
+
throw new Error(`Failed to read local file ${localPath}: ${error.message}`);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
exports.FetchNode = FetchNode;
|
|
182
|
+
//# sourceMappingURL=FetchNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"FetchNode.js","sourceRoot":"","sources":["../../src/nodes/FetchNode.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,2CAAsC;AAEtC,uCAAyB;AACzB,2CAA6B;AAC7B,yCAAsC;AAEtC,sDAAuE;AACvE,0CAAuC;AACvC,sDAAmD;AACnD,0CAAuC;AACvC,0CAA8C;AAE9C,MAAa,SAAU,SAAQ,mBAAQ;IAKrC,YAAY,KAAa,EAAE,MAAgB,EAAE,SAAqB,EAAE;QAClE,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,CAAC,CAAC;QACjD,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,IAAI,IAAI,CAAC;QACxC,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK,CAAC;QACvC,IAAI,CAAC,GAAG,GAAG,MAAM,CAAC,GAAG,IAAI,IAAI,CAAC;IAChC,CAAC;IAES,KAAK,CAAC,OAAO,CAAC,KAAiB;QACvC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QAEnC,IAAI,SAAqB,CAAC;QAE1B,IAAI,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,MAAM,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAClE,SAAS,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAC9C,CAAC;aAAM,CAAC;YACN,SAAS,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;QAChD,CAAC;QAED,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC,CAAC;IACrD,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,GAAW;QACpC,oBAAoB;QACpB,MAAM,MAAM,GAAG,aAAK,CAAC,GAAG,CAAa,GAAG,CAAC,CAAC;QAC1C,IAAI,MAAM,EAAE,CAAC;YACX,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,6BAA6B,GAAG,EAAE,CAAC,CAAC;YACrD,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;QAEtD,yBAAyB;QACzB,MAAM,yBAAW,CAAC,IAAI,EAAE,CAAC;QAEzB,iCAAiC;QACjC,MAAM,SAAS,GAAG,MAAM,IAAA,aAAK,EAC3B,KAAK,IAAI,EAAE;YACT,MAAM,OAAO,GAAG,MAAM,qBAAQ,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;YAEnE,IAAI,CAAC;gBACH,MAAM,cAAc,GAAQ;oBAC1B,SAAS,EAAE,8DAA8D;iBAC1E,CAAC;gBAEF,0BAA0B;gBAC1B,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,OAAO,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC;oBACxE,oBAAY,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;oBACzD,MAAM,KAAK,GAAG,oBAAY,CAAC,YAAY,EAAE,CAAC;oBAC1C,IAAI,KAAK,EAAE,CAAC;wBACV,cAAc,CAAC,KAAK,GAAG;4BACrB,MAAM,EAAE,KAAK;yBACd,CAAC;wBACF,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,KAAK,EAAE,CAAC,CAAC;oBAC7C,CAAC;gBACH,CAAC;gBAED,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;gBAEzD,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;gBAErC,cAAc;gBACd,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAErC,kBAAkB;gBAClB,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,aAAa,EAAE,CAAC,CAAC;gBAEnD,iCAAiC;gBACjC,MAAM,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;gBAEpC,mBAAmB;gBACnB,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;gBAElC,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;gBAEtB,gBAAgB;gBAChB,IAAI,WAAW,GAAG,IAAA,gCAAkB,EAAC,IAAI,CAAC,CAAC;gBAC3C,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC;oBACb,WAAW,GAAG,IAAA,yBAAW,EAAC,WAAW,EAAE,IAAI,CAAC,CAAC;gBAC/C,CAAC;gBAED,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,WAAW,WAAW,CAAC,MAAM,oBAAoB,GAAG,EAAE,CAAC,CAAC;gBAE5E,MAAM,MAAM,GAAG;oBACb;wBACE,WAAW,EAAE,WAAW;wBACxB,QAAQ,EAAE;4BACR,MAAM,EAAE,GAAG;4BACX,IAAI,EAAE,MAAM;4BACZ,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;yBACpC;qBACF;iBACF,CAAC;gBAEF,mBAAmB;gBACnB,aAAK,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;gBAErD,OAAO,MAAM,CAAC;YAChB,CAAC;YAAC,OAAO,KAAU,EAAE,CAAC;gBACpB,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;gBAEtB,qCAAqC;gBACrC,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,OAAO,EAAE,CAAC;oBACrC,oBAAY,CAAC,eAAe,EAAE,CAAC;gBACjC,CAAC;gBAED,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC,EACD;YACE,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,UAAU,IAAI,CAAC;YACpD,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,YAAY,IAAI,IAAI;YAC3D,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,IAAI,KAAK;YACpD,iBAAiB,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,iBAAiB,IAAI,CAAC;SACnE,CACF,CAAC;QAEF,OAAO,SAAS,CAAC;IACjB,CAAC;IAES,KAAK,CAAC,cAAc,CAAC,SAAiB;QAC5C,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,oCAAoC,SAAS,EAAE,CAAC,CAAC;QAElE,IAAI,CAAC;YACH,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC9B,MAAM,IAAI,KAAK,CAAC,mBAAmB,SAAS,EAAE,CAAC,CAAC;YAClD,CAAC;YAED,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;YAClD,IAAI,OAAe,CAAC;YAEpB,IAAI,GAAG,KAAK,OAAO,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;gBACtC,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;gBAC9C,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAA,yBAAW,EAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YAC5D,CAAC;iBAAM,IAAI,GAAG,KAAK,MAAM,IAAI,GAAG,KAAK,KAAK,EAAE,CAAC;gBAC3C,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;YAChD,CAAC;iBAAM,IAAI,GAAG,KAAK,OAAO,EAAE,CAAC;gBAC3B,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;YAChD,CAAC;iBAAM,CAAC;gBACN,MAAM,IAAI,KAAK,CAAC,0BAA0B,GAAG,EAAE,CAAC,CAAC;YACnD,CAAC;YAED,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,OAAO,CAAC,MAAM,oBAAoB,SAAS,EAAE,CAAC,CAAC;YAE3E,OAAO;gBACL;oBACE,WAAW,EAAE,OAAO;oBACpB,QAAQ,EAAE;wBACR,MAAM,EAAE,SAAS;wBACjB,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;wBAClB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;qBACpC;iBACF;aACF,CAAC;QACJ,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,6BAA6B,SAAS,KAAK,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC9E,CAAC;IACH,CAAC;CACF;AAnKD,8BAmKC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GenerateAnswerNode - Generates answers using LLM
|
|
3
|
+
*/
|
|
4
|
+
import { BaseNode } from './BaseNode';
|
|
5
|
+
import { GraphState, NodeConfig } from '../types';
|
|
6
|
+
export declare class GenerateAnswerNode extends BaseNode {
|
|
7
|
+
private additionalInfo?;
|
|
8
|
+
private schema?;
|
|
9
|
+
constructor(input: string, output: string[], config?: NodeConfig);
|
|
10
|
+
protected process(state: GraphState): Promise<GraphState>;
|
|
11
|
+
private processSingleContent;
|
|
12
|
+
private processChunks;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=GenerateAnswerNode.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"GenerateAnswerNode.d.ts","sourceRoot":"","sources":["../../src/nodes/GenerateAnswerNode.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,EAAE,UAAU,EAAY,UAAU,EAAE,MAAM,UAAU,CAAC;AAS5D,qBAAa,kBAAmB,SAAQ,QAAQ;IAC9C,OAAO,CAAC,cAAc,CAAC,CAAS;IAChC,OAAO,CAAC,MAAM,CAAC,CAAM;gBAET,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,MAAM,GAAE,UAAe;cAUpD,OAAO,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;YAiCjD,oBAAoB;YAiBpB,aAAa;CAmC5B"}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* GenerateAnswerNode - Generates answers using LLM
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.GenerateAnswerNode = void 0;
|
|
7
|
+
const BaseNode_1 = require("./BaseNode");
|
|
8
|
+
const prompts_1 = require("../prompts");
|
|
9
|
+
class GenerateAnswerNode extends BaseNode_1.BaseNode {
|
|
10
|
+
constructor(input, output, config = {}) {
|
|
11
|
+
super('GenerateAnswer', 'node', input, output, 2, config);
|
|
12
|
+
this.additionalInfo = config.additionalInfo;
|
|
13
|
+
this.schema = config.schema;
|
|
14
|
+
if (!this.llmModel) {
|
|
15
|
+
throw new Error('LLM model is required for GenerateAnswerNode');
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
async process(state) {
|
|
19
|
+
const userPrompt = state.userPrompt;
|
|
20
|
+
if (!userPrompt) {
|
|
21
|
+
throw new Error('User prompt is required');
|
|
22
|
+
}
|
|
23
|
+
// Get content from state (try different keys)
|
|
24
|
+
const content = state.relevantChunks || state.parsedDoc || state.doc || state.content;
|
|
25
|
+
if (!content) {
|
|
26
|
+
throw new Error('No content found in state');
|
|
27
|
+
}
|
|
28
|
+
this.logger.info('Generating answer using LLM');
|
|
29
|
+
let answer;
|
|
30
|
+
if (Array.isArray(content) && content.length > 0) {
|
|
31
|
+
// Multiple documents/chunks
|
|
32
|
+
answer = await this.processChunks(userPrompt, content);
|
|
33
|
+
}
|
|
34
|
+
else if (typeof content === 'string') {
|
|
35
|
+
// Single string content
|
|
36
|
+
answer = await this.processSingleContent(userPrompt, content);
|
|
37
|
+
}
|
|
38
|
+
else {
|
|
39
|
+
throw new Error('Invalid content format');
|
|
40
|
+
}
|
|
41
|
+
this.logger.success('Answer generated successfully');
|
|
42
|
+
return this.updateState(state, { answer });
|
|
43
|
+
}
|
|
44
|
+
async processSingleContent(userPrompt, content) {
|
|
45
|
+
const isMarkdown = content.includes('#') || content.includes('[') || content.includes(']');
|
|
46
|
+
const template = isMarkdown ? prompts_1.TEMPLATE_NO_CHUNKS_MD : prompts_1.TEMPLATE_NO_CHUNKS;
|
|
47
|
+
const prompt = (0, prompts_1.formatPrompt)(template, {
|
|
48
|
+
user_prompt: userPrompt,
|
|
49
|
+
content: content.slice(0, 100000), // Limit content size
|
|
50
|
+
});
|
|
51
|
+
if (this.additionalInfo) {
|
|
52
|
+
const fullPrompt = `${prompt}\n\nAdditional Information:\n${this.additionalInfo}`;
|
|
53
|
+
return await this.llmModel.generateJson(fullPrompt);
|
|
54
|
+
}
|
|
55
|
+
return await this.llmModel.generateJson(prompt);
|
|
56
|
+
}
|
|
57
|
+
async processChunks(userPrompt, chunks) {
|
|
58
|
+
if (chunks.length === 0) {
|
|
59
|
+
throw new Error('No chunks to process');
|
|
60
|
+
}
|
|
61
|
+
// If only one chunk, process directly
|
|
62
|
+
if (chunks.length === 1) {
|
|
63
|
+
return this.processSingleContent(userPrompt, chunks[0].pageContent);
|
|
64
|
+
}
|
|
65
|
+
this.logger.info(`Processing ${chunks.length} chunks`);
|
|
66
|
+
// Check if content is markdown
|
|
67
|
+
const isMarkdown = chunks[0].metadata?.converted === 'markdown' ||
|
|
68
|
+
chunks[0].pageContent.includes('#');
|
|
69
|
+
const template = isMarkdown ? prompts_1.TEMPLATE_CHUNKS_MD : prompts_1.TEMPLATE_CHUNKS;
|
|
70
|
+
// Combine chunks
|
|
71
|
+
const combinedChunks = chunks
|
|
72
|
+
.map((chunk, i) => `--- Chunk ${i + 1} ---\n${chunk.pageContent}`)
|
|
73
|
+
.join('\n\n');
|
|
74
|
+
const prompt = (0, prompts_1.formatPrompt)(template, {
|
|
75
|
+
user_prompt: userPrompt,
|
|
76
|
+
chunks: combinedChunks.slice(0, 100000), // Limit size
|
|
77
|
+
});
|
|
78
|
+
if (this.additionalInfo) {
|
|
79
|
+
const fullPrompt = `${prompt}\n\nAdditional Information:\n${this.additionalInfo}`;
|
|
80
|
+
return await this.llmModel.generateJson(fullPrompt);
|
|
81
|
+
}
|
|
82
|
+
return await this.llmModel.generateJson(prompt);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
exports.GenerateAnswerNode = GenerateAnswerNode;
|
|
86
|
+
//# sourceMappingURL=GenerateAnswerNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"GenerateAnswerNode.js","sourceRoot":"","sources":["../../src/nodes/GenerateAnswerNode.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAEH,yCAAsC;AAEtC,wCAMoB;AAEpB,MAAa,kBAAmB,SAAQ,mBAAQ;IAI9C,YAAY,KAAa,EAAE,MAAgB,EAAE,SAAqB,EAAE;QAClE,KAAK,CAAC,gBAAgB,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,CAAC,CAAC;QAC1D,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,cAAc,CAAC;QAC5C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAE5B,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAES,KAAK,CAAC,OAAO,CAAC,KAAiB;QACvC,MAAM,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC;QAEpC,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,CAAC;QAED,8CAA8C;QAC9C,MAAM,OAAO,GAAG,KAAK,CAAC,cAAc,IAAI,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,GAAG,IAAI,KAAK,CAAC,OAAO,CAAC;QAEtF,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;QAC/C,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;QAEhD,IAAI,MAAW,CAAC;QAEhB,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjD,4BAA4B;YAC5B,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;QACzD,CAAC;aAAM,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;YACvC,wBAAwB;YACxB,MAAM,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;QAChE,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;QAC5C,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,+BAA+B,CAAC,CAAC;QAErD,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;IAC7C,CAAC;IAEO,KAAK,CAAC,oBAAoB,CAAC,UAAkB,EAAE,OAAe;QACpE,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QAC3F,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,+BAAqB,CAAC,CAAC,CAAC,4BAAkB,CAAC;QAEzE,MAAM,MAAM,GAAG,IAAA,sBAAY,EAAC,QAAQ,EAAE;YACpC,WAAW,EAAE,UAAU;YACvB,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,qBAAqB;SACzD,CAAC,CAAC;QAEH,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,MAAM,UAAU,GAAG,GAAG,MAAM,gCAAgC,IAAI,CAAC,cAAc,EAAE,CAAC;YAClF,OAAO,MAAM,IAAI,CAAC,QAAS,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;QACvD,CAAC;QAED,OAAO,MAAM,IAAI,CAAC,QAAS,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;IACnD,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,UAAkB,EAAE,MAAkB;QAChE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,MAAM,IAAI,KAAK,CAAC,sBAAsB,CAAC,CAAC;QAC1C,CAAC;QAED,sCAAsC;QACtC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO,IAAI,CAAC,oBAAoB,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QACtE,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,cAAc,MAAM,CAAC,MAAM,SAAS,CAAC,CAAC;QAEvD,+BAA+B;QAC/B,MAAM,UAAU,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,SAAS,KAAK,UAAU;YAC5C,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QAEvD,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,4BAAkB,CAAC,CAAC,CAAC,yBAAe,CAAC;QAEnE,iBAAiB;QACjB,MAAM,cAAc,GAAG,MAAM;aAC1B,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,SAAS,KAAK,CAAC,WAAW,EAAE,CAAC;aACjE,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,MAAM,MAAM,GAAG,IAAA,sBAAY,EAAC,QAAQ,EAAE;YACpC,WAAW,EAAE,UAAU;YACvB,MAAM,EAAE,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,aAAa;SACvD,CAAC,CAAC;QAEH,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,MAAM,UAAU,GAAG,GAAG,MAAM,gCAAgC,IAAI,CAAC,cAAc,EAAE,CAAC;YAClF,OAAO,MAAM,IAAI,CAAC,QAAS,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;QACvD,CAAC;QAED,OAAO,MAAM,IAAI,CAAC,QAAS,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;IACnD,CAAC;CACF;AAnGD,gDAmGC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSONExporterNode - Export data to JSON format
|
|
3
|
+
*/
|
|
4
|
+
import { BaseNode } from './BaseNode';
|
|
5
|
+
import { GraphState, NodeConfig } from '../types';
|
|
6
|
+
export declare class JSONExporterNode extends BaseNode {
|
|
7
|
+
private outputPath?;
|
|
8
|
+
private pretty;
|
|
9
|
+
constructor(input: string, output: string[], config?: NodeConfig & {
|
|
10
|
+
outputPath?: string;
|
|
11
|
+
pretty?: boolean;
|
|
12
|
+
});
|
|
13
|
+
protected process(state: GraphState): Promise<GraphState>;
|
|
14
|
+
private convertToJSON;
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=JSONExporterNode.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"JSONExporterNode.d.ts","sourceRoot":"","sources":["../../src/nodes/JSONExporterNode.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAElD,qBAAa,gBAAiB,SAAQ,QAAQ;IAC5C,OAAO,CAAC,UAAU,CAAC,CAAS;IAC5B,OAAO,CAAC,MAAM,CAAU;gBAEZ,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,MAAM,GAAE,UAAU,GAAG;QAAE,UAAU,CAAC,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,OAAO,CAAA;KAAO;cAMhG,OAAO,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;IAmB/D,OAAO,CAAC,aAAa;CAWtB"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* JSONExporterNode - Export data to JSON format
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.JSONExporterNode = void 0;
|
|
7
|
+
const BaseNode_1 = require("./BaseNode");
|
|
8
|
+
class JSONExporterNode extends BaseNode_1.BaseNode {
|
|
9
|
+
constructor(input, output, config = {}) {
|
|
10
|
+
super('JSONExporter', 'node', input, output, 1, config);
|
|
11
|
+
this.outputPath = config.outputPath;
|
|
12
|
+
this.pretty = config.pretty ?? true;
|
|
13
|
+
}
|
|
14
|
+
async process(state) {
|
|
15
|
+
const data = state.answer || state.parsedDoc;
|
|
16
|
+
if (!data) {
|
|
17
|
+
throw new Error('No data available to export to JSON');
|
|
18
|
+
}
|
|
19
|
+
this.logger.info('Converting data to JSON format');
|
|
20
|
+
const json = this.convertToJSON(data);
|
|
21
|
+
this.logger.success('Generated JSON output');
|
|
22
|
+
return this.updateState(state, {
|
|
23
|
+
json,
|
|
24
|
+
jsonPath: this.outputPath,
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
convertToJSON(data) {
|
|
28
|
+
try {
|
|
29
|
+
if (this.pretty) {
|
|
30
|
+
return JSON.stringify(data, null, 2);
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
return JSON.stringify(data);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
catch (error) {
|
|
37
|
+
throw new Error(`Failed to convert data to JSON: ${error.message}`);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
exports.JSONExporterNode = JSONExporterNode;
|
|
42
|
+
//# sourceMappingURL=JSONExporterNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"JSONExporterNode.js","sourceRoot":"","sources":["../../src/nodes/JSONExporterNode.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAEH,yCAAsC;AAGtC,MAAa,gBAAiB,SAAQ,mBAAQ;IAI5C,YAAY,KAAa,EAAE,MAAgB,EAAE,SAAiE,EAAE;QAC9G,KAAK,CAAC,cAAc,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,CAAC,CAAC;QACxD,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;QACpC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC;IACtC,CAAC;IAES,KAAK,CAAC,OAAO,CAAC,KAAiB;QACvC,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,SAAS,CAAC;QAE7C,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;QAEnD,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAEtC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,uBAAuB,CAAC,CAAC;QAE7C,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE;YAC7B,IAAI;YACJ,QAAQ,EAAE,IAAI,CAAC,UAAU;SAC1B,CAAC,CAAC;IACL,CAAC;IAEO,aAAa,CAAC,IAAS;QAC7B,IAAI,CAAC;YACH,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBAChB,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YACvC,CAAC;iBAAM,CAAC;gBACN,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,mCAAmC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACtE,CAAC;IACH,CAAC;CACF;AAxCD,4CAwCC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MergeNode - Merge multiple LLM answers into a single response
|
|
3
|
+
*/
|
|
4
|
+
import { BaseNode } from './BaseNode';
|
|
5
|
+
import { GraphState, NodeConfig } from '../types';
|
|
6
|
+
export declare class MergeNode extends BaseNode {
|
|
7
|
+
constructor(input: string, output: string[], config?: NodeConfig);
|
|
8
|
+
protected process(state: GraphState): Promise<GraphState>;
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=MergeNode.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"MergeNode.d.ts","sourceRoot":"","sources":["../../src/nodes/MergeNode.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAGlD,qBAAa,SAAU,SAAQ,QAAQ;gBACzB,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,MAAM,GAAE,UAAe;cAQpD,OAAO,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;CA0ChE"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* MergeNode - Merge multiple LLM answers into a single response
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.MergeNode = void 0;
|
|
7
|
+
const BaseNode_1 = require("./BaseNode");
|
|
8
|
+
const prompts_1 = require("../prompts");
|
|
9
|
+
class MergeNode extends BaseNode_1.BaseNode {
|
|
10
|
+
constructor(input, output, config = {}) {
|
|
11
|
+
super('Merge', 'node', input, output, 2, config);
|
|
12
|
+
if (!this.llmModel) {
|
|
13
|
+
throw new Error('LLM model is required for MergeNode');
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
async process(state) {
|
|
17
|
+
const userPrompt = state.userPrompt;
|
|
18
|
+
const answers = state.answers || [];
|
|
19
|
+
if (!userPrompt) {
|
|
20
|
+
throw new Error('User prompt is required');
|
|
21
|
+
}
|
|
22
|
+
if (!Array.isArray(answers) || answers.length === 0) {
|
|
23
|
+
throw new Error('No answers to merge');
|
|
24
|
+
}
|
|
25
|
+
this.logger.info(`Merging ${answers.length} answers`);
|
|
26
|
+
// If only one answer, return it directly
|
|
27
|
+
if (answers.length === 1) {
|
|
28
|
+
this.logger.info('Only one answer, returning directly');
|
|
29
|
+
return this.updateState(state, {
|
|
30
|
+
answer: answers[0],
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
// Format answers for merging
|
|
34
|
+
const responsesStr = answers
|
|
35
|
+
.map((ans, idx) => `Response ${idx + 1}:\n${JSON.stringify(ans, null, 2)}`)
|
|
36
|
+
.join('\n\n');
|
|
37
|
+
// Generate merge prompt
|
|
38
|
+
const prompt = (0, prompts_1.formatPrompt)(prompts_1.TEMPLATE_MERGE, {
|
|
39
|
+
user_prompt: userPrompt,
|
|
40
|
+
responses: responsesStr,
|
|
41
|
+
});
|
|
42
|
+
// Get merged answer from LLM
|
|
43
|
+
const mergedAnswer = await this.llmModel.generateJson(prompt);
|
|
44
|
+
this.logger.success('Answers merged successfully');
|
|
45
|
+
return this.updateState(state, {
|
|
46
|
+
answer: mergedAnswer,
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
exports.MergeNode = MergeNode;
|
|
51
|
+
//# sourceMappingURL=MergeNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"MergeNode.js","sourceRoot":"","sources":["../../src/nodes/MergeNode.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAEH,yCAAsC;AAEtC,wCAA0D;AAE1D,MAAa,SAAU,SAAQ,mBAAQ;IACrC,YAAY,KAAa,EAAE,MAAgB,EAAE,SAAqB,EAAE;QAClE,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,CAAC,CAAC;QAEjD,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;IACH,CAAC;IAES,KAAK,CAAC,OAAO,CAAC,KAAiB;QACvC,MAAM,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC;QACpC,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,IAAI,EAAE,CAAC;QAEpC,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACpD,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;QACzC,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,OAAO,CAAC,MAAM,UAAU,CAAC,CAAC;QAEtD,yCAAyC;QACzC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;YACxD,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE;gBAC7B,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;aACnB,CAAC,CAAC;QACL,CAAC;QAED,6BAA6B;QAC7B,MAAM,YAAY,GAAG,OAAO;aACzB,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,YAAY,GAAG,GAAG,CAAC,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;aAC1E,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,wBAAwB;QACxB,MAAM,MAAM,GAAG,IAAA,sBAAY,EAAC,wBAAc,EAAE;YAC1C,WAAW,EAAE,UAAU;YACvB,SAAS,EAAE,YAAY;SACxB,CAAC,CAAC;QAEH,6BAA6B;QAC7B,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,QAAS,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAE/D,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,6BAA6B,CAAC,CAAC;QAEnD,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE;YAC7B,MAAM,EAAE,YAAY;SACrB,CAAC,CAAC;IACL,CAAC;CACF;AAnDD,8BAmDC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDFScraperNode - Extract text from PDF documents
|
|
3
|
+
*/
|
|
4
|
+
import { BaseNode } from './BaseNode';
|
|
5
|
+
import { GraphState, NodeConfig } from '../types';
|
|
6
|
+
export declare class PDFScraperNode extends BaseNode {
|
|
7
|
+
constructor(input: string, output: string[], config?: NodeConfig);
|
|
8
|
+
protected process(state: GraphState): Promise<GraphState>;
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=PDFScraperNode.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PDFScraperNode.d.ts","sourceRoot":"","sources":["../../src/nodes/PDFScraperNode.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,EAAE,UAAU,EAAY,UAAU,EAAE,MAAM,UAAU,CAAC;AAI5D,qBAAa,cAAe,SAAQ,QAAQ;gBAC9B,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,MAAM,GAAE,UAAe;cAIpD,OAAO,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;CAqChE"}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* PDFScraperNode - Extract text from PDF documents
|
|
4
|
+
*/
|
|
5
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
6
|
+
if (k2 === undefined) k2 = k;
|
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
8
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
9
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
10
|
+
}
|
|
11
|
+
Object.defineProperty(o, k2, desc);
|
|
12
|
+
}) : (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
o[k2] = m[k];
|
|
15
|
+
}));
|
|
16
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
17
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
18
|
+
}) : function(o, v) {
|
|
19
|
+
o["default"] = v;
|
|
20
|
+
});
|
|
21
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
22
|
+
var ownKeys = function(o) {
|
|
23
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
24
|
+
var ar = [];
|
|
25
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
26
|
+
return ar;
|
|
27
|
+
};
|
|
28
|
+
return ownKeys(o);
|
|
29
|
+
};
|
|
30
|
+
return function (mod) {
|
|
31
|
+
if (mod && mod.__esModule) return mod;
|
|
32
|
+
var result = {};
|
|
33
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
34
|
+
__setModuleDefault(result, mod);
|
|
35
|
+
return result;
|
|
36
|
+
};
|
|
37
|
+
})();
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.PDFScraperNode = void 0;
|
|
40
|
+
const BaseNode_1 = require("./BaseNode");
|
|
41
|
+
const fs = __importStar(require("fs"));
|
|
42
|
+
const pdfParse = __importStar(require("pdf-parse"));
|
|
43
|
+
class PDFScraperNode extends BaseNode_1.BaseNode {
|
|
44
|
+
constructor(input, output, config = {}) {
|
|
45
|
+
super('PDFScraper', 'node', input, output, 1, config);
|
|
46
|
+
}
|
|
47
|
+
async process(state) {
|
|
48
|
+
const inputKeys = this.getInputKeys(state);
|
|
49
|
+
const source = state[inputKeys[0]];
|
|
50
|
+
if (!source) {
|
|
51
|
+
throw new Error('PDF source path is required');
|
|
52
|
+
}
|
|
53
|
+
this.logger.info(`Extracting text from PDF: ${source}`);
|
|
54
|
+
try {
|
|
55
|
+
// Read PDF file
|
|
56
|
+
const dataBuffer = fs.readFileSync(source);
|
|
57
|
+
// Parse PDF
|
|
58
|
+
const pdfData = await pdfParse.default(dataBuffer);
|
|
59
|
+
const document = {
|
|
60
|
+
pageContent: pdfData.text,
|
|
61
|
+
metadata: {
|
|
62
|
+
source,
|
|
63
|
+
type: 'pdf',
|
|
64
|
+
pages: pdfData.numpages,
|
|
65
|
+
info: pdfData.info,
|
|
66
|
+
extractedAt: new Date().toISOString(),
|
|
67
|
+
},
|
|
68
|
+
};
|
|
69
|
+
this.logger.success(`Extracted ${pdfData.text.length} characters from ${pdfData.numpages} pages`);
|
|
70
|
+
return this.updateState(state, {
|
|
71
|
+
doc: [document],
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
catch (error) {
|
|
75
|
+
throw new Error(`Failed to parse PDF: ${error.message}`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
exports.PDFScraperNode = PDFScraperNode;
|
|
80
|
+
//# sourceMappingURL=PDFScraperNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PDFScraperNode.js","sourceRoot":"","sources":["../../src/nodes/PDFScraperNode.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,yCAAsC;AAEtC,uCAAyB;AACzB,oDAAsC;AAEtC,MAAa,cAAe,SAAQ,mBAAQ;IAC1C,YAAY,KAAa,EAAE,MAAgB,EAAE,SAAqB,EAAE;QAClE,KAAK,CAAC,YAAY,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,CAAC,CAAC;IACxD,CAAC;IAES,KAAK,CAAC,OAAO,CAAC,KAAiB;QACvC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QAEnC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,CAAC;QACjD,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,6BAA6B,MAAM,EAAE,CAAC,CAAC;QAExD,IAAI,CAAC;YACH,gBAAgB;YAChB,MAAM,UAAU,GAAG,EAAE,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAE3C,YAAY;YACZ,MAAM,OAAO,GAAG,MAAO,QAAgB,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;YAE5D,MAAM,QAAQ,GAAa;gBACzB,WAAW,EAAE,OAAO,CAAC,IAAI;gBACzB,QAAQ,EAAE;oBACR,MAAM;oBACN,IAAI,EAAE,KAAK;oBACX,KAAK,EAAE,OAAO,CAAC,QAAQ;oBACvB,IAAI,EAAE,OAAO,CAAC,IAAI;oBAClB,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;iBACtC;aACF,CAAC;YAEF,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,aAAa,OAAO,CAAC,IAAI,CAAC,MAAM,oBAAoB,OAAO,CAAC,QAAQ,QAAQ,CAAC,CAAC;YAElG,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE;gBAC7B,GAAG,EAAE,CAAC,QAAQ,CAAC;aAChB,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,wBAAwB,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC;CACF;AA1CD,wCA0CC"}
|