@gravity-platform/ingest 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/dist/Abyssale/node/executor.d.ts +12 -0
- package/dist/Abyssale/node/executor.d.ts.map +1 -0
- package/dist/Abyssale/node/executor.js +72 -0
- package/dist/Abyssale/node/executor.js.map +1 -0
- package/dist/Abyssale/node/index.d.ts +10 -0
- package/dist/Abyssale/node/index.d.ts.map +1 -0
- package/dist/Abyssale/node/index.js +90 -0
- package/dist/Abyssale/node/index.js.map +1 -0
- package/dist/Abyssale/service/abyssaleService.d.ts +45 -0
- package/dist/Abyssale/service/abyssaleService.d.ts.map +1 -0
- package/dist/Abyssale/service/abyssaleService.js +360 -0
- package/dist/Abyssale/service/abyssaleService.js.map +1 -0
- package/dist/Abyssale/util/types.d.ts +62 -0
- package/dist/Abyssale/util/types.d.ts.map +1 -0
- package/dist/Abyssale/util/types.js +6 -0
- package/dist/Abyssale/util/types.js.map +1 -0
- package/dist/AirtableInsert/node/executor.d.ts +7 -0
- package/dist/AirtableInsert/node/executor.d.ts.map +1 -0
- package/dist/AirtableInsert/node/executor.js +32 -0
- package/dist/AirtableInsert/node/executor.js.map +1 -0
- package/dist/AirtableInsert/node/index.d.ts +10 -0
- package/dist/AirtableInsert/node/index.d.ts.map +1 -0
- package/dist/AirtableInsert/node/index.js +98 -0
- package/dist/AirtableInsert/node/index.js.map +1 -0
- package/dist/AirtableInsert/service/airtableInsertService.d.ts +8 -0
- package/dist/AirtableInsert/service/airtableInsertService.d.ts.map +1 -0
- package/dist/AirtableInsert/service/airtableInsertService.js +99 -0
- package/dist/AirtableInsert/service/airtableInsertService.js.map +1 -0
- package/dist/AirtableInsert/util/types.d.ts +15 -0
- package/dist/AirtableInsert/util/types.d.ts.map +1 -0
- package/dist/AirtableInsert/util/types.js +3 -0
- package/dist/AirtableInsert/util/types.js.map +1 -0
- package/dist/ApifyResults/node/executor.d.ts +32 -0
- package/dist/ApifyResults/node/executor.d.ts.map +1 -0
- package/dist/ApifyResults/node/executor.js +204 -0
- package/dist/ApifyResults/node/executor.js.map +1 -0
- package/dist/ApifyResults/node/index.d.ts +10 -0
- package/dist/ApifyResults/node/index.d.ts.map +1 -0
- package/dist/ApifyResults/node/index.js +79 -0
- package/dist/ApifyResults/node/index.js.map +1 -0
- package/dist/ApifyResults/service/apifyService.d.ts +10 -0
- package/dist/ApifyResults/service/apifyService.d.ts.map +1 -0
- package/dist/ApifyResults/service/apifyService.js +85 -0
- package/dist/ApifyResults/service/apifyService.js.map +1 -0
- package/dist/ApifyResults/util/types.d.ts +93 -0
- package/dist/ApifyResults/util/types.d.ts.map +1 -0
- package/dist/ApifyResults/util/types.js +6 -0
- package/dist/ApifyResults/util/types.js.map +1 -0
- package/dist/ApifyStarter/node/executor.d.ts +12 -0
- package/dist/ApifyStarter/node/executor.d.ts.map +1 -0
- package/dist/ApifyStarter/node/executor.js +97 -0
- package/dist/ApifyStarter/node/executor.js.map +1 -0
- package/dist/ApifyStarter/node/index.d.ts +10 -0
- package/dist/ApifyStarter/node/index.d.ts.map +1 -0
- package/dist/ApifyStarter/node/index.js +87 -0
- package/dist/ApifyStarter/node/index.js.map +1 -0
- package/dist/ApifyStarter/service/apifyActorService.d.ts +11 -0
- package/dist/ApifyStarter/service/apifyActorService.d.ts.map +1 -0
- package/dist/ApifyStarter/service/apifyActorService.js +104 -0
- package/dist/ApifyStarter/service/apifyActorService.js.map +1 -0
- package/dist/ApifyStarter/util/types.d.ts +23 -0
- package/dist/ApifyStarter/util/types.d.ts.map +1 -0
- package/dist/ApifyStarter/util/types.js +6 -0
- package/dist/ApifyStarter/util/types.js.map +1 -0
- package/dist/ApifyStarter/util/validation.d.ts +9 -0
- package/dist/ApifyStarter/util/validation.d.ts.map +1 -0
- package/dist/ApifyStarter/util/validation.js +59 -0
- package/dist/ApifyStarter/util/validation.js.map +1 -0
- package/dist/ApolloCompany/node/executor.d.ts +8 -0
- package/dist/ApolloCompany/node/executor.d.ts.map +1 -0
- package/dist/ApolloCompany/node/executor.js +38 -0
- package/dist/ApolloCompany/node/executor.js.map +1 -0
- package/dist/ApolloCompany/node/index.d.ts +10 -0
- package/dist/ApolloCompany/node/index.d.ts.map +1 -0
- package/dist/ApolloCompany/node/index.js +120 -0
- package/dist/ApolloCompany/node/index.js.map +1 -0
- package/dist/ApolloCompany/service/apolloSearchCompanyService.d.ts +7 -0
- package/dist/ApolloCompany/service/apolloSearchCompanyService.d.ts.map +1 -0
- package/dist/ApolloCompany/service/apolloSearchCompanyService.js +124 -0
- package/dist/ApolloCompany/service/apolloSearchCompanyService.js.map +1 -0
- package/dist/ApolloCompany/util/types.d.ts +46 -0
- package/dist/ApolloCompany/util/types.d.ts.map +1 -0
- package/dist/ApolloCompany/util/types.js +6 -0
- package/dist/ApolloCompany/util/types.js.map +1 -0
- package/dist/ApolloCompanyEnrich/node/executor.d.ts +8 -0
- package/dist/ApolloCompanyEnrich/node/executor.d.ts.map +1 -0
- package/dist/ApolloCompanyEnrich/node/executor.js +25 -0
- package/dist/ApolloCompanyEnrich/node/executor.js.map +1 -0
- package/dist/ApolloCompanyEnrich/node/index.d.ts +10 -0
- package/dist/ApolloCompanyEnrich/node/index.d.ts.map +1 -0
- package/dist/ApolloCompanyEnrich/node/index.js +67 -0
- package/dist/ApolloCompanyEnrich/node/index.js.map +1 -0
- package/dist/ApolloCompanyEnrich/service/apolloCompanyEnrichService.d.ts +9 -0
- package/dist/ApolloCompanyEnrich/service/apolloCompanyEnrichService.d.ts.map +1 -0
- package/dist/ApolloCompanyEnrich/service/apolloCompanyEnrichService.js +40 -0
- package/dist/ApolloCompanyEnrich/service/apolloCompanyEnrichService.js.map +1 -0
- package/dist/ApolloCompanyEnrich/util/types.d.ts +13 -0
- package/dist/ApolloCompanyEnrich/util/types.d.ts.map +1 -0
- package/dist/ApolloCompanyEnrich/util/types.js +6 -0
- package/dist/ApolloCompanyEnrich/util/types.js.map +1 -0
- package/dist/ApolloPeople/node/executor.d.ts +8 -0
- package/dist/ApolloPeople/node/executor.d.ts.map +1 -0
- package/dist/ApolloPeople/node/executor.js +41 -0
- package/dist/ApolloPeople/node/executor.js.map +1 -0
- package/dist/ApolloPeople/node/index.d.ts +10 -0
- package/dist/ApolloPeople/node/index.d.ts.map +1 -0
- package/dist/ApolloPeople/node/index.js +128 -0
- package/dist/ApolloPeople/node/index.js.map +1 -0
- package/dist/ApolloPeople/service/apolloSearchService.d.ts +6 -0
- package/dist/ApolloPeople/service/apolloSearchService.d.ts.map +1 -0
- package/dist/ApolloPeople/service/apolloSearchService.js +127 -0
- package/dist/ApolloPeople/service/apolloSearchService.js.map +1 -0
- package/dist/ApolloPeople/util/types.d.ts +70 -0
- package/dist/ApolloPeople/util/types.d.ts.map +1 -0
- package/dist/ApolloPeople/util/types.js +6 -0
- package/dist/ApolloPeople/util/types.js.map +1 -0
- package/dist/ApolloPeopleEnrich/node/executor.d.ts +8 -0
- package/dist/ApolloPeopleEnrich/node/executor.d.ts.map +1 -0
- package/dist/ApolloPeopleEnrich/node/executor.js +25 -0
- package/dist/ApolloPeopleEnrich/node/executor.js.map +1 -0
- package/dist/ApolloPeopleEnrich/node/index.d.ts +10 -0
- package/dist/ApolloPeopleEnrich/node/index.d.ts.map +1 -0
- package/dist/ApolloPeopleEnrich/node/index.js +116 -0
- package/dist/ApolloPeopleEnrich/node/index.js.map +1 -0
- package/dist/ApolloPeopleEnrich/service/apolloPeopleEnrichService.d.ts +10 -0
- package/dist/ApolloPeopleEnrich/service/apolloPeopleEnrichService.d.ts.map +1 -0
- package/dist/ApolloPeopleEnrich/service/apolloPeopleEnrichService.js +54 -0
- package/dist/ApolloPeopleEnrich/service/apolloPeopleEnrichService.js.map +1 -0
- package/dist/ApolloPeopleEnrich/util/types.d.ts +20 -0
- package/dist/ApolloPeopleEnrich/util/types.d.ts.map +1 -0
- package/dist/ApolloPeopleEnrich/util/types.js +6 -0
- package/dist/ApolloPeopleEnrich/util/types.js.map +1 -0
- package/dist/ApolloSearch/node/executor.d.ts +9 -0
- package/dist/ApolloSearch/node/executor.d.ts.map +1 -0
- package/dist/ApolloSearch/node/executor.js +51 -0
- package/dist/ApolloSearch/node/executor.js.map +1 -0
- package/dist/ApolloSearch/node/index.d.ts +10 -0
- package/dist/ApolloSearch/node/index.d.ts.map +1 -0
- package/dist/ApolloSearch/node/index.js +128 -0
- package/dist/ApolloSearch/node/index.js.map +1 -0
- package/dist/ApolloSearch/service/apolloSearchService.d.ts +6 -0
- package/dist/ApolloSearch/service/apolloSearchService.d.ts.map +1 -0
- package/dist/ApolloSearch/service/apolloSearchService.js +127 -0
- package/dist/ApolloSearch/service/apolloSearchService.js.map +1 -0
- package/dist/ApolloSearch/util/types.d.ts +70 -0
- package/dist/ApolloSearch/util/types.d.ts.map +1 -0
- package/dist/ApolloSearch/util/types.js +6 -0
- package/dist/ApolloSearch/util/types.js.map +1 -0
- package/dist/ApolloSearchCompany/node/executor.d.ts +9 -0
- package/dist/ApolloSearchCompany/node/executor.d.ts.map +1 -0
- package/dist/ApolloSearchCompany/node/executor.js +49 -0
- package/dist/ApolloSearchCompany/node/executor.js.map +1 -0
- package/dist/ApolloSearchCompany/node/index.d.ts +10 -0
- package/dist/ApolloSearchCompany/node/index.d.ts.map +1 -0
- package/dist/ApolloSearchCompany/node/index.js +128 -0
- package/dist/ApolloSearchCompany/node/index.js.map +1 -0
- package/dist/ApolloSearchCompany/service/apolloSearchCompanyService.d.ts +7 -0
- package/dist/ApolloSearchCompany/service/apolloSearchCompanyService.d.ts.map +1 -0
- package/dist/ApolloSearchCompany/service/apolloSearchCompanyService.js +98 -0
- package/dist/ApolloSearchCompany/service/apolloSearchCompanyService.js.map +1 -0
- package/dist/ApolloSearchCompany/util/types.d.ts +48 -0
- package/dist/ApolloSearchCompany/util/types.d.ts.map +1 -0
- package/dist/ApolloSearchCompany/util/types.js +6 -0
- package/dist/ApolloSearchCompany/util/types.js.map +1 -0
- package/dist/Document/node/executor.d.ts +22 -0
- package/dist/Document/node/executor.d.ts.map +1 -0
- package/dist/Document/node/executor.js +85 -0
- package/dist/Document/node/executor.js.map +1 -0
- package/dist/Document/node/index.d.ts +10 -0
- package/dist/Document/node/index.d.ts.map +1 -0
- package/dist/Document/node/index.js +58 -0
- package/dist/Document/node/index.js.map +1 -0
- package/dist/Document/service/documentCache.d.ts +11 -0
- package/dist/Document/service/documentCache.d.ts.map +1 -0
- package/dist/Document/service/documentCache.js +166 -0
- package/dist/Document/service/documentCache.js.map +1 -0
- package/dist/Document/util/types.d.ts +70 -0
- package/dist/Document/util/types.d.ts.map +1 -0
- package/dist/Document/util/types.js +6 -0
- package/dist/Document/util/types.js.map +1 -0
- package/dist/DocumentParser/node/executor.d.ts +12 -0
- package/dist/DocumentParser/node/executor.d.ts.map +1 -0
- package/dist/DocumentParser/node/executor.js +200 -0
- package/dist/DocumentParser/node/executor.js.map +1 -0
- package/dist/DocumentParser/node/index.d.ts +10 -0
- package/dist/DocumentParser/node/index.d.ts.map +1 -0
- package/dist/DocumentParser/node/index.js +66 -0
- package/dist/DocumentParser/node/index.js.map +1 -0
- package/dist/DocumentParser/service/docxParser.d.ts +12 -0
- package/dist/DocumentParser/service/docxParser.d.ts.map +1 -0
- package/dist/DocumentParser/service/docxParser.js +27 -0
- package/dist/DocumentParser/service/docxParser.js.map +1 -0
- package/dist/DocumentParser/service/fileTypeDetector.d.ts +19 -0
- package/dist/DocumentParser/service/fileTypeDetector.d.ts.map +1 -0
- package/dist/DocumentParser/service/fileTypeDetector.js +75 -0
- package/dist/DocumentParser/service/fileTypeDetector.js.map +1 -0
- package/dist/DocumentParser/service/index.d.ts +5 -0
- package/dist/DocumentParser/service/index.d.ts.map +1 -0
- package/dist/DocumentParser/service/index.js +14 -0
- package/dist/DocumentParser/service/index.js.map +1 -0
- package/dist/DocumentParser/service/pdfParser.d.ts +16 -0
- package/dist/DocumentParser/service/pdfParser.d.ts.map +1 -0
- package/dist/DocumentParser/service/pdfParser.js +30 -0
- package/dist/DocumentParser/service/pdfParser.js.map +1 -0
- package/dist/DocumentParser/service/txtParser.d.ts +11 -0
- package/dist/DocumentParser/service/txtParser.d.ts.map +1 -0
- package/dist/DocumentParser/service/txtParser.js +35 -0
- package/dist/DocumentParser/service/txtParser.js.map +1 -0
- package/dist/DocumentParser/util/hashUtils.d.ts +17 -0
- package/dist/DocumentParser/util/hashUtils.d.ts.map +1 -0
- package/dist/DocumentParser/util/hashUtils.js +34 -0
- package/dist/DocumentParser/util/hashUtils.js.map +1 -0
- package/dist/DocumentParser/util/types.d.ts +43 -0
- package/dist/DocumentParser/util/types.d.ts.map +1 -0
- package/dist/DocumentParser/util/types.js +6 -0
- package/dist/DocumentParser/util/types.js.map +1 -0
- package/dist/ElevenLabs/node/executor.d.ts +12 -0
- package/dist/ElevenLabs/node/executor.d.ts.map +1 -0
- package/dist/ElevenLabs/node/executor.js +65 -0
- package/dist/ElevenLabs/node/executor.js.map +1 -0
- package/dist/ElevenLabs/node/index.d.ts +10 -0
- package/dist/ElevenLabs/node/index.d.ts.map +1 -0
- package/dist/ElevenLabs/node/index.js +92 -0
- package/dist/ElevenLabs/node/index.js.map +1 -0
- package/dist/ElevenLabs/service/elevenlabsService.d.ts +27 -0
- package/dist/ElevenLabs/service/elevenlabsService.d.ts.map +1 -0
- package/dist/ElevenLabs/service/elevenlabsService.js +278 -0
- package/dist/ElevenLabs/service/elevenlabsService.js.map +1 -0
- package/dist/ElevenLabs/util/types.d.ts +43 -0
- package/dist/ElevenLabs/util/types.d.ts.map +1 -0
- package/dist/ElevenLabs/util/types.js +7 -0
- package/dist/ElevenLabs/util/types.js.map +1 -0
- package/dist/GoogleSheet/node/executor.d.ts +21 -0
- package/dist/GoogleSheet/node/executor.d.ts.map +1 -0
- package/dist/GoogleSheet/node/executor.js +156 -0
- package/dist/GoogleSheet/node/executor.js.map +1 -0
- package/dist/GoogleSheet/node/index.d.ts +10 -0
- package/dist/GoogleSheet/node/index.d.ts.map +1 -0
- package/dist/GoogleSheet/node/index.js +90 -0
- package/dist/GoogleSheet/node/index.js.map +1 -0
- package/dist/GoogleSheet/service/googleSheetsService.d.ts +10 -0
- package/dist/GoogleSheet/service/googleSheetsService.d.ts.map +1 -0
- package/dist/GoogleSheet/service/googleSheetsService.js +44 -0
- package/dist/GoogleSheet/service/googleSheetsService.js.map +1 -0
- package/dist/GoogleSheet/util/types.d.ts +39 -0
- package/dist/GoogleSheet/util/types.d.ts.map +1 -0
- package/dist/GoogleSheet/util/types.js +6 -0
- package/dist/GoogleSheet/util/types.js.map +1 -0
- package/dist/Hyperbrowser/node/executor.d.ts +12 -0
- package/dist/Hyperbrowser/node/executor.d.ts.map +1 -0
- package/dist/Hyperbrowser/node/executor.js +67 -0
- package/dist/Hyperbrowser/node/executor.js.map +1 -0
- package/dist/Hyperbrowser/node/index.d.ts +10 -0
- package/dist/Hyperbrowser/node/index.d.ts.map +1 -0
- package/dist/Hyperbrowser/node/index.js +93 -0
- package/dist/Hyperbrowser/node/index.js.map +1 -0
- package/dist/Hyperbrowser/service/hyperbrowserService.d.ts +38 -0
- package/dist/Hyperbrowser/service/hyperbrowserService.d.ts.map +1 -0
- package/dist/Hyperbrowser/service/hyperbrowserService.js +108 -0
- package/dist/Hyperbrowser/service/hyperbrowserService.js.map +1 -0
- package/dist/Hyperbrowser/util/types.d.ts +44 -0
- package/dist/Hyperbrowser/util/types.d.ts.map +1 -0
- package/dist/Hyperbrowser/util/types.js +6 -0
- package/dist/Hyperbrowser/util/types.js.map +1 -0
- package/dist/PlaidTransactions/node/executor.d.ts +10 -0
- package/dist/PlaidTransactions/node/executor.d.ts.map +1 -0
- package/dist/PlaidTransactions/node/executor.js +142 -0
- package/dist/PlaidTransactions/node/executor.js.map +1 -0
- package/dist/PlaidTransactions/node/index.d.ts +10 -0
- package/dist/PlaidTransactions/node/index.d.ts.map +1 -0
- package/dist/PlaidTransactions/node/index.js +120 -0
- package/dist/PlaidTransactions/node/index.js.map +1 -0
- package/dist/PlaidTransactions/service/plaidService.d.ts +12 -0
- package/dist/PlaidTransactions/service/plaidService.d.ts.map +1 -0
- package/dist/PlaidTransactions/service/plaidService.js +179 -0
- package/dist/PlaidTransactions/service/plaidService.js.map +1 -0
- package/dist/PlaidTransactions/util/types.d.ts +90 -0
- package/dist/PlaidTransactions/util/types.d.ts.map +1 -0
- package/dist/PlaidTransactions/util/types.js +6 -0
- package/dist/PlaidTransactions/util/types.js.map +1 -0
- package/dist/SearchWeb/node/executor.d.ts +12 -0
- package/dist/SearchWeb/node/executor.d.ts.map +1 -0
- package/dist/SearchWeb/node/executor.js +56 -0
- package/dist/SearchWeb/node/executor.js.map +1 -0
- package/dist/SearchWeb/node/index.d.ts +10 -0
- package/dist/SearchWeb/node/index.d.ts.map +1 -0
- package/dist/SearchWeb/node/index.js +114 -0
- package/dist/SearchWeb/node/index.js.map +1 -0
- package/dist/SearchWeb/service/searchWebService.d.ts +23 -0
- package/dist/SearchWeb/service/searchWebService.d.ts.map +1 -0
- package/dist/SearchWeb/service/searchWebService.js +146 -0
- package/dist/SearchWeb/service/searchWebService.js.map +1 -0
- package/dist/SearchWeb/util/types.d.ts +50 -0
- package/dist/SearchWeb/util/types.d.ts.map +1 -0
- package/dist/SearchWeb/util/types.js +6 -0
- package/dist/SearchWeb/util/types.js.map +1 -0
- package/dist/SpatialIngest/node/executor.d.ts +8 -0
- package/dist/SpatialIngest/node/executor.d.ts.map +1 -0
- package/dist/SpatialIngest/node/executor.js +58 -0
- package/dist/SpatialIngest/node/executor.js.map +1 -0
- package/dist/SpatialIngest/node/index.d.ts +10 -0
- package/dist/SpatialIngest/node/index.d.ts.map +1 -0
- package/dist/SpatialIngest/node/index.js +99 -0
- package/dist/SpatialIngest/node/index.js.map +1 -0
- package/dist/SpatialIngest/service/spatialIngestService.d.ts +29 -0
- package/dist/SpatialIngest/service/spatialIngestService.d.ts.map +1 -0
- package/dist/SpatialIngest/service/spatialIngestService.js +306 -0
- package/dist/SpatialIngest/service/spatialIngestService.js.map +1 -0
- package/dist/SpatialIngest/util/types.d.ts +26 -0
- package/dist/SpatialIngest/util/types.d.ts.map +1 -0
- package/dist/SpatialIngest/util/types.js +3 -0
- package/dist/SpatialIngest/util/types.js.map +1 -0
- package/dist/credentials/index.d.ts +141 -0
- package/dist/credentials/index.d.ts.map +1 -0
- package/dist/credentials/index.js +180 -0
- package/dist/credentials/index.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +104 -0
- package/dist/index.js.map +1 -0
- package/dist/shared/platform.d.ts +11 -0
- package/dist/shared/platform.d.ts.map +1 -0
- package/dist/shared/platform.js +21 -0
- package/dist/shared/platform.js.map +1 -0
- package/package.json +53 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/// <reference path="../util/pdf-parse.d.ts" />
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.DocumentParserExecutor = void 0;
|
|
5
|
+
const platform_1 = require("../../shared/platform");
|
|
6
|
+
const service_1 = require("../service");
|
|
7
|
+
const hashUtils_1 = require("../util/hashUtils");
|
|
8
|
+
const NODE_TYPE = "DocumentParser";
|
|
9
|
+
class DocumentParserExecutor extends platform_1.PromiseNode {
|
|
10
|
+
constructor() {
|
|
11
|
+
super(NODE_TYPE);
|
|
12
|
+
}
|
|
13
|
+
async executeNode(inputs, config, context) {
|
|
14
|
+
const logger = (0, platform_1.createLogger)("DocumentParser");
|
|
15
|
+
const file = config.file;
|
|
16
|
+
// Enhanced debugging
|
|
17
|
+
logger.info('DocumentParser executeNode - comprehensive debug:', {
|
|
18
|
+
nodeId: context.nodeId,
|
|
19
|
+
configFile: file,
|
|
20
|
+
configKeys: Object.keys(config),
|
|
21
|
+
inputsKeys: Object.keys(inputs || {}),
|
|
22
|
+
contextInputs: inputs,
|
|
23
|
+
contextInputsKeys: inputs ? Object.keys(inputs) : [],
|
|
24
|
+
contextHasLoop1: !!(inputs && inputs.loop1),
|
|
25
|
+
loop1Output: inputs && inputs.loop1
|
|
26
|
+
});
|
|
27
|
+
// Validate input
|
|
28
|
+
if (!file) {
|
|
29
|
+
throw new Error('No file input provided');
|
|
30
|
+
}
|
|
31
|
+
// Validate file object has required properties
|
|
32
|
+
if (!file.key || (!file.content && !file.downloadUrl)) {
|
|
33
|
+
throw new Error('Invalid file input: missing key and both content and downloadUrl properties');
|
|
34
|
+
}
|
|
35
|
+
logger.info('Starting document parsing', {
|
|
36
|
+
fileKey: file.key,
|
|
37
|
+
fileSize: file.size,
|
|
38
|
+
parserType: config.parserType || 'auto'
|
|
39
|
+
});
|
|
40
|
+
// Check file size limit
|
|
41
|
+
const maxSizeBytes = (config.maxFileSizeMB || 10) * 1024 * 1024;
|
|
42
|
+
if (file.size && file.size > maxSizeBytes) {
|
|
43
|
+
throw new Error(`File size (${file.size} bytes) exceeds maximum allowed size (${maxSizeBytes} bytes)`);
|
|
44
|
+
}
|
|
45
|
+
// Get content either from file.content or by fetching from downloadUrl
|
|
46
|
+
let documentBuffer;
|
|
47
|
+
if (file.content) {
|
|
48
|
+
// Use provided content (base64 or Buffer)
|
|
49
|
+
if (Buffer.isBuffer(file.content)) {
|
|
50
|
+
documentBuffer = file.content;
|
|
51
|
+
}
|
|
52
|
+
else if (typeof file.content === 'string') {
|
|
53
|
+
// Assume base64 encoded
|
|
54
|
+
documentBuffer = Buffer.from(file.content, 'base64');
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
throw new Error('Invalid file content: must be Buffer or base64 string');
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
else if (file.downloadUrl) {
|
|
61
|
+
// Fetch content from downloadUrl to avoid large payloads in subscriptions
|
|
62
|
+
logger.info('Fetching document from downloadUrl', { downloadUrl: file.downloadUrl });
|
|
63
|
+
try {
|
|
64
|
+
const response = await fetch(file.downloadUrl);
|
|
65
|
+
if (!response.ok) {
|
|
66
|
+
throw new Error(`Failed to fetch document: ${response.status} ${response.statusText}`);
|
|
67
|
+
}
|
|
68
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
69
|
+
documentBuffer = Buffer.from(arrayBuffer);
|
|
70
|
+
logger.info('Successfully fetched document from downloadUrl', {
|
|
71
|
+
downloadUrl: file.downloadUrl,
|
|
72
|
+
fetchedSize: documentBuffer.length
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
catch (error) {
|
|
76
|
+
logger.error('Failed to fetch document from downloadUrl', {
|
|
77
|
+
downloadUrl: file.downloadUrl,
|
|
78
|
+
error: error.message
|
|
79
|
+
});
|
|
80
|
+
throw new Error(`Failed to fetch document from downloadUrl: ${error.message}`);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
throw new Error('Neither content nor downloadUrl provided');
|
|
85
|
+
}
|
|
86
|
+
// Determine parser type
|
|
87
|
+
let parserType = config.parserType || 'auto';
|
|
88
|
+
if (parserType === 'auto') {
|
|
89
|
+
const detectedType = (0, service_1.detectFileType)(file.key);
|
|
90
|
+
if (!(0, service_1.isSupportedFileType)(detectedType)) {
|
|
91
|
+
throw new Error(`Unsupported file type for file: ${file.key}`);
|
|
92
|
+
}
|
|
93
|
+
// Use the detected type directly
|
|
94
|
+
parserType = detectedType;
|
|
95
|
+
}
|
|
96
|
+
// Parse document based on type
|
|
97
|
+
let result;
|
|
98
|
+
try {
|
|
99
|
+
switch (parserType) {
|
|
100
|
+
case service_1.FileType.PDF:
|
|
101
|
+
case 'pdf': {
|
|
102
|
+
const pdfResult = await (0, service_1.parsePDF)(documentBuffer);
|
|
103
|
+
result = {
|
|
104
|
+
fileKey: file.key,
|
|
105
|
+
text: pdfResult.text,
|
|
106
|
+
pageCount: pdfResult.pageCount,
|
|
107
|
+
metadata: pdfResult.metadata,
|
|
108
|
+
fileType: 'pdf',
|
|
109
|
+
fileSize: documentBuffer.length,
|
|
110
|
+
bucket: file.bucket,
|
|
111
|
+
universalId: file.universalId,
|
|
112
|
+
downloadUrl: file.downloadUrl,
|
|
113
|
+
};
|
|
114
|
+
// Add content hash for change detection
|
|
115
|
+
result.contentId = (0, hashUtils_1.createContentHash)(result);
|
|
116
|
+
break;
|
|
117
|
+
}
|
|
118
|
+
case service_1.FileType.DOCX:
|
|
119
|
+
case 'docx': {
|
|
120
|
+
const docxResult = await (0, service_1.parseDOCX)(documentBuffer);
|
|
121
|
+
result = {
|
|
122
|
+
fileKey: file.key,
|
|
123
|
+
text: docxResult.text,
|
|
124
|
+
pageCount: 1, // DOCX doesn't provide page count
|
|
125
|
+
metadata: {
|
|
126
|
+
html: docxResult.html,
|
|
127
|
+
messages: docxResult.messages,
|
|
128
|
+
},
|
|
129
|
+
fileType: 'docx',
|
|
130
|
+
fileSize: documentBuffer.length,
|
|
131
|
+
bucket: file.bucket,
|
|
132
|
+
universalId: file.universalId,
|
|
133
|
+
downloadUrl: file.downloadUrl,
|
|
134
|
+
};
|
|
135
|
+
// Add content hash for change detection
|
|
136
|
+
result.contentId = (0, hashUtils_1.createContentHash)(result);
|
|
137
|
+
break;
|
|
138
|
+
}
|
|
139
|
+
case service_1.FileType.TXT:
|
|
140
|
+
case 'txt': {
|
|
141
|
+
const txtResult = await (0, service_1.parseTXT)(documentBuffer);
|
|
142
|
+
result = {
|
|
143
|
+
fileKey: file.key,
|
|
144
|
+
text: txtResult.text,
|
|
145
|
+
pageCount: 1,
|
|
146
|
+
metadata: {
|
|
147
|
+
encoding: txtResult.encoding,
|
|
148
|
+
},
|
|
149
|
+
fileType: 'txt',
|
|
150
|
+
fileSize: documentBuffer.length,
|
|
151
|
+
bucket: file.bucket,
|
|
152
|
+
universalId: file.universalId,
|
|
153
|
+
downloadUrl: file.downloadUrl,
|
|
154
|
+
};
|
|
155
|
+
// Add content hash for change detection
|
|
156
|
+
result.contentId = (0, hashUtils_1.createContentHash)(result);
|
|
157
|
+
break;
|
|
158
|
+
}
|
|
159
|
+
default:
|
|
160
|
+
throw new Error(`Unsupported parser type: ${parserType}`);
|
|
161
|
+
}
|
|
162
|
+
logger.info('Document parsed successfully', {
|
|
163
|
+
fileKey: file.key,
|
|
164
|
+
fileType: result.fileType,
|
|
165
|
+
textLength: result.text.length,
|
|
166
|
+
pageCount: result.pageCount,
|
|
167
|
+
});
|
|
168
|
+
// Wrap in __outputs pattern
|
|
169
|
+
return {
|
|
170
|
+
__outputs: {
|
|
171
|
+
output: result
|
|
172
|
+
}
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
catch (error) {
|
|
176
|
+
logger.error('Document parsing failed', {
|
|
177
|
+
fileKey: file.key,
|
|
178
|
+
parserType,
|
|
179
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
180
|
+
});
|
|
181
|
+
throw error;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Build credential context from execution context
|
|
186
|
+
*/
|
|
187
|
+
buildCredentialContext(context) {
|
|
188
|
+
return {
|
|
189
|
+
credentials: {
|
|
190
|
+
aws: context.credentials?.awsCredential || {},
|
|
191
|
+
},
|
|
192
|
+
nodeType: NODE_TYPE,
|
|
193
|
+
workflowId: context.workflow?.id || "",
|
|
194
|
+
executionId: context.executionId || "",
|
|
195
|
+
nodeId: context.nodeId || "",
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
exports.DocumentParserExecutor = DocumentParserExecutor;
|
|
200
|
+
//# sourceMappingURL=executor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"executor.js","sourceRoot":"","sources":["../../../src/DocumentParser/node/executor.ts"],"names":[],"mappings":";AAAA,+CAA+C;;;AAI/C,oDAAkE;AAClE,wCAOoB;AACpB,iDAAsD;AAEtD,MAAM,SAAS,GAAG,gBAAgB,CAAC;AAEnC,MAAa,sBAAuB,SAAQ,sBAAiC;IAC3E;QACE,KAAK,CAAC,SAAS,CAAC,CAAC;IACnB,CAAC;IAES,KAAK,CAAC,WAAW,CACzB,MAA2B,EAC3B,MAA4B,EAC5B,OAA6B;QAE7B,MAAM,MAAM,GAAG,IAAA,uBAAY,EAAC,gBAAgB,CAAC,CAAC;QAC9C,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC;QAEzB,qBAAqB;QACrB,MAAM,CAAC,IAAI,CAAC,mDAAmD,EAAE;YAC/D,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,UAAU,EAAE,IAAI;YAChB,UAAU,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC;YAC/B,UAAU,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,IAAI,EAAE,CAAC;YACrC,aAAa,EAAE,MAAM;YACrB,iBAAiB,EAAE,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;YACpD,eAAe,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,MAAM,CAAC,KAAK,CAAC;YAC3C,WAAW,EAAE,MAAM,IAAI,MAAM,CAAC,KAAK;SACpC,CAAC,CAAC;QAEH,iBAAiB;QACjB,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;QAC5C,CAAC;QAED,+CAA+C;QAC/C,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC;YACtD,MAAM,IAAI,KAAK,CAAC,6EAA6E,CAAC,CAAC;QACjG,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,2BAA2B,EAAE;YACvC,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,MAAM;SACxC,CAAC,CAAC;QAEH,wBAAwB;QACxB,MAAM,YAAY,GAAG,CAAC,MAAM,CAAC,aAAa,IAAI,EAAE,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC;QAChE,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,GAAG,YAAY,EAAE,CAAC;YAC1C,MAAM,IAAI,KAAK,CAAC,cAAc,IAAI,CAAC,IAAI,yCAAyC,YAAY,SAAS,CAAC,CAAC;QACzG,CAAC;QAED,uEAAuE;QACvE,IAAI,cAAsB,CAAC;QAE3B,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,0CAA0C;YAC1C,IAAI,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBAClC,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC;YAChC,CAAC;iBAAM,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAC5C,wBAAwB;gBACxB,cAAc,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;YACvD,CAAC;iBAAM,CAAC;gBACN,MAAM,IAAI,KAAK,CAAC,uDAAuD,CAAC,CAAC;YAC3E,CAAC;QACH,CAAC;aAAM,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YAC5B,0EAA0E;YAC1E,MAAM,CAAC,IAAI,CAAC,oCAAoC,EAAE,EAAE,WAAW,EAAE,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;YAErF,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;gBAC/C,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;oBACjB,MAAM,IAAI,KAAK,CAAC,6BAA6B,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;gBACzF,CAAC;gBAED,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;gBACjD,cAAc,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;gBAE1C,MAAM,CAAC,IAAI,CAAC,gDAAgD,EAAE;oBAC5D,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,WAAW,EAAE,cAAc,CAAC,MAAM;iBACnC,CAAC,CAAC;YACL,CAAC;YAAC,OAAO,KAAU,EAAE,CAAC;gBACpB,MAAM,CAAC,KAAK,CAAC,2CAA2C,EAAE;oBACxD,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,KAAK,EAAE,KAAK,CAAC,OAAO;iBACrB,CAAC,CAAC;gBACH,MAAM,IAAI,KAAK,CAAC,8CAA8C,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;YACjF,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC;QAC9D,CAAC;QAED,wBAAwB;QACxB,IAAI,UAAU,GAAW,MAAM,CAAC,UAAU,IAAI,MAAM,CAAC;QACrD,IAAI,UAAU,KAAK,MAAM,EAAE,CAAC;YAC1B,MAAM,YAAY,GAAG,IAAA,wBAAc,EAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC9C,IAAI,CAAC,IAAA,6BAAmB,EAAC,YAAY,CAAC,EAAE,CAAC;gBACvC,MAAM,IAAI,KAAK,CAAC,mCAAmC,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;YACjE,CAAC;YACD,iCAAiC;YACjC,UAAU,GAAG,YAAY,CAAC;QAC5B,CAAC;QAED,+BAA+B;QAC/B,IAAI,MAA4B,CAAC;QAEjC,IAAI,CAAC;YACH,QAAQ,UAAU,EAAE,CAAC;gBACnB,KAAK,kBAAQ,CAAC,GAAG,CAAC;gBAClB,KAAK,KAAK,CAAC,CAAC,CAAC;oBACX,MAAM,SAAS,GAAG,MAAM,IAAA,kBAAQ,EAAC,cAAc,CAAC,CAAC;oBACjD,MAAM,GAAG;wBACP,OAAO,EAAE,IAAI,CAAC,GAAG;wBACjB,IAAI,EAAE,SAAS,CAAC,IAAI;wBACpB,SAAS,EAAE,SAAS,CAAC,SAAS;wBAC9B,QAAQ,EAAE,SAAS,CAAC,QAAQ;wBAC5B,QAAQ,EAAE,KAAK;wBACf,QAAQ,EAAE,cAAc,CAAC,MAAM;wBAC/B,MAAM,EAAE,IAAI,CAAC,MAAM;wBACnB,WAAW,EAAE,IAAI,CAAC,WAAW;wBAC7B,WAAW,EAAE,IAAI,CAAC,WAAW;qBAC9B,CAAC;oBAEF,wCAAwC;oBACxC,MAAM,CAAC,SAAS,GAAG,IAAA,6BAAiB,EAAC,MAAM,CAAC,CAAC;oBAC7C,MAAM;gBACR,CAAC;gBAED,KAAK,kBAAQ,CAAC,IAAI,CAAC;gBACnB,KAAK,MAAM,CAAC,CAAC,CAAC;oBACZ,MAAM,UAAU,GAAG,MAAM,IAAA,mBAAS,EAAC,cAAc,CAAC,CAAC;oBACnD,MAAM,GAAG;wBACP,OAAO,EAAE,IAAI,CAAC,GAAG;wBACjB,IAAI,EAAE,UAAU,CAAC,IAAI;wBACrB,SAAS,EAAE,CAAC,EAAE,kCAAkC;wBAChD,QAAQ,EAAE;4BACR,IAAI,EAAE,UAAU,CAAC,IAAI;4BACrB,QAAQ,EAAE,UAAU,CAAC,QAAQ;yBAC9B;wBACD,QAAQ,EAAE,MAAM;wBAChB,QAAQ,EAAE,cAAc,CAAC,MAAM;wBAC/B,MAAM,EAAE,IAAI,CAAC,MAAM;wBACnB,WAAW,EAAE,IAAI,CAAC,WAAW;wBAC7B,WAAW,EAAE,IAAI,CAAC,WAAW;qBAC9B,CAAC;oBAEF,wCAAwC;oBACxC,MAAM,CAAC,SAAS,GAAG,IAAA,6BAAiB,EAAC,MAAM,CAAC,CAAC;oBAC7C,MAAM;gBACR,CAAC;gBAED,KAAK,kBAAQ,CAAC,GAAG,CAAC;gBAClB,KAAK,KAAK,CAAC,CAAC,CAAC;oBACX,MAAM,SAAS,GAAG,MAAM,IAAA,kBAAQ,EAAC,cAAc,CAAC,CAAC;oBACjD,MAAM,GAAG;wBACP,OAAO,EAAE,IAAI,CAAC,GAAG;wBACjB,IAAI,EAAE,SAAS,CAAC,IAAI;wBACpB,SAAS,EAAE,CAAC;wBACZ,QAAQ,EAAE;4BACR,QAAQ,EAAE,SAAS,CAAC,QAAQ;yBAC7B;wBACD,QAAQ,EAAE,KAAK;wBACf,QAAQ,EAAE,cAAc,CAAC,MAAM;wBAC/B,MAAM,EAAE,IAAI,CAAC,MAAM;wBACnB,WAAW,EAAE,IAAI,CAAC,WAAW;wBAC7B,WAAW,EAAE,IAAI,CAAC,WAAW;qBAC9B,CAAC;oBAEF,wCAAwC;oBACxC,MAAM,CAAC,SAAS,GAAG,IAAA,6BAAiB,EAAC,MAAM,CAAC,CAAC;oBAC7C,MAAM;gBACR,CAAC;gBAED;oBACE,MAAM,IAAI,KAAK,CAAC,4BAA4B,UAAU,EAAE,CAAC,CAAC;YAC9D,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,8BAA8B,EAAE;gBAC1C,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,QAAQ,EAAE,MAAM,CAAC,QAAQ;gBACzB,UAAU,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM;gBAC9B,SAAS,EAAE,MAAM,CAAC,SAAS;aAC5B,CAAC,CAAC;YAEH,4BAA4B;YAC5B,OAAO;gBACL,SAAS,EAAE;oBACT,MAAM,EAAE,MAAM;iBACf;aACF,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,yBAAyB,EAAE;gBACtC,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,UAAU;gBACV,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;aAChE,CAAC,CAAC;YACH,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACK,sBAAsB,CAAC,OAA6B;QAC1D,OAAO;YACL,WAAW,EAAE;gBACX,GAAG,EAAE,OAAO,CAAC,WAAW,EAAE,aAAa,IAAI,EAAE;aAC9C;YACD,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,OAAO,CAAC,QAAQ,EAAE,EAAE,IAAI,EAAE;YACtC,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,EAAE;YACtC,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,EAAE;SAC7B,CAAC;IACJ,CAAC;CACF;AAlND,wDAkNC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { type EnhancedNodeDefinition } from "@gravity-platform/plugin-base";
|
|
2
|
+
import { DocumentParserExecutor } from "./executor";
|
|
3
|
+
export declare const NODE_TYPE = "DocumentParser";
|
|
4
|
+
declare function createNodeDefinition(): EnhancedNodeDefinition;
|
|
5
|
+
export declare const DocumentParserNode: {
|
|
6
|
+
definition: any;
|
|
7
|
+
executor: typeof DocumentParserExecutor;
|
|
8
|
+
};
|
|
9
|
+
export { createNodeDefinition };
|
|
10
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/DocumentParser/node/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAA2B,KAAK,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AACrG,OAAO,EAAE,sBAAsB,EAAE,MAAM,YAAY,CAAC;AAEpD,eAAO,MAAM,SAAS,mBAAmB,CAAC;AAE1C,iBAAS,oBAAoB,IAAI,sBAAsB,CAwDtD;AAID,eAAO,MAAM,kBAAkB;;;CAG9B,CAAC;AAEF,OAAO,EAAE,oBAAoB,EAAE,CAAC"}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DocumentParserNode = exports.NODE_TYPE = void 0;
|
|
4
|
+
exports.createNodeDefinition = createNodeDefinition;
|
|
5
|
+
const plugin_base_1 = require("@gravity-platform/plugin-base");
|
|
6
|
+
const executor_1 = require("./executor");
|
|
7
|
+
exports.NODE_TYPE = "DocumentParser";
|
|
8
|
+
function createNodeDefinition() {
|
|
9
|
+
const { NodeInputType, NodeConcurrency } = (0, plugin_base_1.getPlatformDependencies)();
|
|
10
|
+
return {
|
|
11
|
+
packageVersion: "1.1.1",
|
|
12
|
+
type: exports.NODE_TYPE,
|
|
13
|
+
name: "Document Parser",
|
|
14
|
+
description: "Parse documents (PDF, DOCX, TXT) and extract text content",
|
|
15
|
+
category: "Ingest",
|
|
16
|
+
logoUrl: "https://cdn-icons-png.flaticon.com/512/2991/2991112.png",
|
|
17
|
+
color: "#4A90E2",
|
|
18
|
+
inputs: [
|
|
19
|
+
{
|
|
20
|
+
name: "signal",
|
|
21
|
+
type: NodeInputType.OBJECT,
|
|
22
|
+
description: "File object",
|
|
23
|
+
},
|
|
24
|
+
],
|
|
25
|
+
outputs: [
|
|
26
|
+
{
|
|
27
|
+
name: "output",
|
|
28
|
+
type: NodeInputType.OBJECT,
|
|
29
|
+
description: "Parsed document",
|
|
30
|
+
},
|
|
31
|
+
],
|
|
32
|
+
configSchema: {
|
|
33
|
+
type: "object",
|
|
34
|
+
properties: {
|
|
35
|
+
file: {
|
|
36
|
+
type: "object",
|
|
37
|
+
title: "File",
|
|
38
|
+
description: "Parse me",
|
|
39
|
+
default: "",
|
|
40
|
+
"ui:field": "template",
|
|
41
|
+
},
|
|
42
|
+
parserType: {
|
|
43
|
+
type: "string",
|
|
44
|
+
title: "Parser Type",
|
|
45
|
+
description: "Force specific parser or use auto-detect",
|
|
46
|
+
enum: ["auto", "pdf", "docx", "txt"],
|
|
47
|
+
default: "auto",
|
|
48
|
+
},
|
|
49
|
+
maxFileSizeMB: {
|
|
50
|
+
type: "number",
|
|
51
|
+
title: "Max File Size (MB)",
|
|
52
|
+
description: "Maximum file size to process in megabytes",
|
|
53
|
+
default: 10,
|
|
54
|
+
minimum: 1,
|
|
55
|
+
maximum: 100,
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
},
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
const definition = createNodeDefinition();
|
|
62
|
+
exports.DocumentParserNode = {
|
|
63
|
+
definition,
|
|
64
|
+
executor: executor_1.DocumentParserExecutor,
|
|
65
|
+
};
|
|
66
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/DocumentParser/node/index.ts"],"names":[],"mappings":";;;AAsES,oDAAoB;AAtE7B,+DAAqG;AACrG,yCAAoD;AAEvC,QAAA,SAAS,GAAG,gBAAgB,CAAC;AAE1C,SAAS,oBAAoB;IAC3B,MAAM,EAAE,aAAa,EAAE,eAAe,EAAE,GAAG,IAAA,qCAAuB,GAAE,CAAC;IAErE,OAAO;QACL,cAAc,EAAE,OAAO;QACvB,IAAI,EAAE,iBAAS;QACf,IAAI,EAAE,iBAAiB;QACvB,WAAW,EAAE,2DAA2D;QACxE,QAAQ,EAAE,QAAQ;QAClB,OAAO,EAAE,yDAAyD;QAClE,KAAK,EAAE,SAAS;QAEhB,MAAM,EAAE;YACN;gBACE,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,aAAa,CAAC,MAAM;gBAC1B,WAAW,EAAE,aAAa;aAC3B;SACF;QAED,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,aAAa,CAAC,MAAM;gBAC1B,WAAW,EAAE,iBAAiB;aAC/B;SACF;QAED,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,IAAI,EAAE;oBACJ,IAAI,EAAE,QAAQ;oBACd,KAAK,EAAE,MAAM;oBACb,WAAW,EAAE,UAAU;oBACvB,OAAO,EAAE,EAAE;oBACX,UAAU,EAAE,UAAU;iBACvB;gBACD,UAAU,EAAE;oBACV,IAAI,EAAE,QAAQ;oBACd,KAAK,EAAE,aAAa;oBACpB,WAAW,EAAE,0CAA0C;oBACvD,IAAI,EAAE,CAAC,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,CAAC;oBACpC,OAAO,EAAE,MAAM;iBAChB;gBACD,aAAa,EAAE;oBACb,IAAI,EAAE,QAAQ;oBACd,KAAK,EAAE,oBAAoB;oBAC3B,WAAW,EAAE,2CAA2C;oBACxD,OAAO,EAAE,EAAE;oBACX,OAAO,EAAE,CAAC;oBACV,OAAO,EAAE,GAAG;iBACb;aACF;SACF;KACF,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,GAAG,oBAAoB,EAAE,CAAC;AAE7B,QAAA,kBAAkB,GAAG;IAChC,UAAU;IACV,QAAQ,EAAE,iCAAsB;CACjC,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export interface DOCXParseResult {
|
|
2
|
+
text: string;
|
|
3
|
+
html: string;
|
|
4
|
+
messages: any[];
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Parse DOCX file and extract text content
|
|
8
|
+
* @param buffer DOCX file buffer
|
|
9
|
+
* @returns Parsed DOCX content
|
|
10
|
+
*/
|
|
11
|
+
export declare function parseDOCX(buffer: Buffer): Promise<DOCXParseResult>;
|
|
12
|
+
//# sourceMappingURL=docxParser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"docxParser.d.ts","sourceRoot":"","sources":["../../../src/DocumentParser/service/docxParser.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,GAAG,EAAE,CAAC;CACjB;AAED;;;;GAIG;AACH,wBAAsB,SAAS,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAaxE"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.parseDOCX = parseDOCX;
|
|
7
|
+
const mammoth_1 = __importDefault(require("mammoth"));
|
|
8
|
+
/**
|
|
9
|
+
* Parse DOCX file and extract text content
|
|
10
|
+
* @param buffer DOCX file buffer
|
|
11
|
+
* @returns Parsed DOCX content
|
|
12
|
+
*/
|
|
13
|
+
async function parseDOCX(buffer) {
|
|
14
|
+
try {
|
|
15
|
+
const result = await mammoth_1.default.extractRawText({ buffer });
|
|
16
|
+
const htmlResult = await mammoth_1.default.convertToHtml({ buffer });
|
|
17
|
+
return {
|
|
18
|
+
text: result.value,
|
|
19
|
+
html: htmlResult.value,
|
|
20
|
+
messages: [...result.messages, ...htmlResult.messages],
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
catch (error) {
|
|
24
|
+
throw new Error(`DOCX parsing failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=docxParser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"docxParser.js","sourceRoot":"","sources":["../../../src/DocumentParser/service/docxParser.ts"],"names":[],"mappings":";;;;;AAaA,8BAaC;AA1BD,sDAA8B;AAQ9B;;;;GAIG;AACI,KAAK,UAAU,SAAS,CAAC,MAAc;IAC5C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,iBAAO,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QACxD,MAAM,UAAU,GAAG,MAAM,iBAAO,CAAC,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QAE3D,OAAO;YACL,IAAI,EAAE,MAAM,CAAC,KAAK;YAClB,IAAI,EAAE,UAAU,CAAC,KAAK;YACtB,QAAQ,EAAE,CAAC,GAAG,MAAM,CAAC,QAAQ,EAAE,GAAG,UAAU,CAAC,QAAQ,CAAC;SACvD,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,wBAAwB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IACtG,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export declare enum FileType {
|
|
2
|
+
PDF = "pdf",
|
|
3
|
+
DOCX = "docx",
|
|
4
|
+
TXT = "txt",
|
|
5
|
+
UNKNOWN = "unknown"
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Detect file type from file key/path
|
|
9
|
+
* @param fileKey File key or path
|
|
10
|
+
* @returns Detected file type
|
|
11
|
+
*/
|
|
12
|
+
export declare function detectFileType(fileKey: string): FileType;
|
|
13
|
+
/**
|
|
14
|
+
* Validate if file type is supported
|
|
15
|
+
* @param fileType File type to validate
|
|
16
|
+
* @returns True if supported
|
|
17
|
+
*/
|
|
18
|
+
export declare function isSupportedFileType(fileType: FileType): boolean;
|
|
19
|
+
//# sourceMappingURL=fileTypeDetector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fileTypeDetector.d.ts","sourceRoot":"","sources":["../../../src/DocumentParser/service/fileTypeDetector.ts"],"names":[],"mappings":"AAEA,oBAAY,QAAQ;IAClB,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,GAAG,QAAQ;IACX,OAAO,YAAY;CACpB;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,QAAQ,CAexD;AAED;;;;GAIG;AACH,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAE/D"}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.FileType = void 0;
|
|
37
|
+
exports.detectFileType = detectFileType;
|
|
38
|
+
exports.isSupportedFileType = isSupportedFileType;
|
|
39
|
+
const path = __importStar(require("path"));
|
|
40
|
+
var FileType;
|
|
41
|
+
(function (FileType) {
|
|
42
|
+
FileType["PDF"] = "pdf";
|
|
43
|
+
FileType["DOCX"] = "docx";
|
|
44
|
+
FileType["TXT"] = "txt";
|
|
45
|
+
FileType["UNKNOWN"] = "unknown";
|
|
46
|
+
})(FileType || (exports.FileType = FileType = {}));
|
|
47
|
+
/**
|
|
48
|
+
* Detect file type from file key/path
|
|
49
|
+
* @param fileKey File key or path
|
|
50
|
+
* @returns Detected file type
|
|
51
|
+
*/
|
|
52
|
+
function detectFileType(fileKey) {
|
|
53
|
+
const ext = path.extname(fileKey).toLowerCase().slice(1);
|
|
54
|
+
switch (ext) {
|
|
55
|
+
case 'pdf':
|
|
56
|
+
return FileType.PDF;
|
|
57
|
+
case 'docx':
|
|
58
|
+
case 'doc':
|
|
59
|
+
return FileType.DOCX;
|
|
60
|
+
case 'txt':
|
|
61
|
+
case 'text':
|
|
62
|
+
return FileType.TXT;
|
|
63
|
+
default:
|
|
64
|
+
return FileType.UNKNOWN;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Validate if file type is supported
|
|
69
|
+
* @param fileType File type to validate
|
|
70
|
+
* @returns True if supported
|
|
71
|
+
*/
|
|
72
|
+
function isSupportedFileType(fileType) {
|
|
73
|
+
return fileType !== FileType.UNKNOWN;
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=fileTypeDetector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fileTypeDetector.js","sourceRoot":"","sources":["../../../src/DocumentParser/service/fileTypeDetector.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAcA,wCAeC;AAOD,kDAEC;AAtCD,2CAA6B;AAE7B,IAAY,QAKX;AALD,WAAY,QAAQ;IAClB,uBAAW,CAAA;IACX,yBAAa,CAAA;IACb,uBAAW,CAAA;IACX,+BAAmB,CAAA;AACrB,CAAC,EALW,QAAQ,wBAAR,QAAQ,QAKnB;AAED;;;;GAIG;AACH,SAAgB,cAAc,CAAC,OAAe;IAC5C,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAEzD,QAAQ,GAAG,EAAE,CAAC;QACZ,KAAK,KAAK;YACR,OAAO,QAAQ,CAAC,GAAG,CAAC;QACtB,KAAK,MAAM,CAAC;QACZ,KAAK,KAAK;YACR,OAAO,QAAQ,CAAC,IAAI,CAAC;QACvB,KAAK,KAAK,CAAC;QACX,KAAK,MAAM;YACT,OAAO,QAAQ,CAAC,GAAG,CAAC;QACtB;YACE,OAAO,QAAQ,CAAC,OAAO,CAAC;IAC5B,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,SAAgB,mBAAmB,CAAC,QAAkB;IACpD,OAAO,QAAQ,KAAK,QAAQ,CAAC,OAAO,CAAC;AACvC,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export { parsePDF, type PDFParseResult } from './pdfParser';
|
|
2
|
+
export { parseDOCX, type DOCXParseResult } from './docxParser';
|
|
3
|
+
export { parseTXT, type TXTParseResult } from './txtParser';
|
|
4
|
+
export { detectFileType, isSupportedFileType, FileType } from './fileTypeDetector';
|
|
5
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/DocumentParser/service/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,KAAK,cAAc,EAAE,MAAM,aAAa,CAAC;AAC5D,OAAO,EAAE,SAAS,EAAE,KAAK,eAAe,EAAE,MAAM,cAAc,CAAC;AAC/D,OAAO,EAAE,QAAQ,EAAE,KAAK,cAAc,EAAE,MAAM,aAAa,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,mBAAmB,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.FileType = exports.isSupportedFileType = exports.detectFileType = exports.parseTXT = exports.parseDOCX = exports.parsePDF = void 0;
|
|
4
|
+
var pdfParser_1 = require("./pdfParser");
|
|
5
|
+
Object.defineProperty(exports, "parsePDF", { enumerable: true, get: function () { return pdfParser_1.parsePDF; } });
|
|
6
|
+
var docxParser_1 = require("./docxParser");
|
|
7
|
+
Object.defineProperty(exports, "parseDOCX", { enumerable: true, get: function () { return docxParser_1.parseDOCX; } });
|
|
8
|
+
var txtParser_1 = require("./txtParser");
|
|
9
|
+
Object.defineProperty(exports, "parseTXT", { enumerable: true, get: function () { return txtParser_1.parseTXT; } });
|
|
10
|
+
var fileTypeDetector_1 = require("./fileTypeDetector");
|
|
11
|
+
Object.defineProperty(exports, "detectFileType", { enumerable: true, get: function () { return fileTypeDetector_1.detectFileType; } });
|
|
12
|
+
Object.defineProperty(exports, "isSupportedFileType", { enumerable: true, get: function () { return fileTypeDetector_1.isSupportedFileType; } });
|
|
13
|
+
Object.defineProperty(exports, "FileType", { enumerable: true, get: function () { return fileTypeDetector_1.FileType; } });
|
|
14
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/DocumentParser/service/index.ts"],"names":[],"mappings":";;;AAAA,yCAA4D;AAAnD,qGAAA,QAAQ,OAAA;AACjB,2CAA+D;AAAtD,uGAAA,SAAS,OAAA;AAClB,yCAA4D;AAAnD,qGAAA,QAAQ,OAAA;AACjB,uDAAmF;AAA1E,kHAAA,cAAc,OAAA;AAAE,uHAAA,mBAAmB,OAAA;AAAE,4GAAA,QAAQ,OAAA"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export interface PDFParseResult {
|
|
2
|
+
text: string;
|
|
3
|
+
pageCount: number;
|
|
4
|
+
metadata: {
|
|
5
|
+
info?: any;
|
|
6
|
+
metadata?: any;
|
|
7
|
+
version?: string;
|
|
8
|
+
};
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Parse PDF file and extract text content
|
|
12
|
+
* @param buffer PDF file buffer
|
|
13
|
+
* @returns Parsed PDF content and metadata
|
|
14
|
+
*/
|
|
15
|
+
export declare function parsePDF(buffer: Buffer): Promise<PDFParseResult>;
|
|
16
|
+
//# sourceMappingURL=pdfParser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdfParser.d.ts","sourceRoot":"","sources":["../../../src/DocumentParser/service/pdfParser.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE;QACR,IAAI,CAAC,EAAE,GAAG,CAAC;QACX,QAAQ,CAAC,EAAE,GAAG,CAAC;QACf,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;CACH;AAED;;;;GAIG;AACH,wBAAsB,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC,CAgBtE"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.parsePDF = parsePDF;
|
|
7
|
+
const pdf_parse_1 = __importDefault(require("pdf-parse"));
|
|
8
|
+
/**
|
|
9
|
+
* Parse PDF file and extract text content
|
|
10
|
+
* @param buffer PDF file buffer
|
|
11
|
+
* @returns Parsed PDF content and metadata
|
|
12
|
+
*/
|
|
13
|
+
async function parsePDF(buffer) {
|
|
14
|
+
try {
|
|
15
|
+
const data = await (0, pdf_parse_1.default)(buffer);
|
|
16
|
+
return {
|
|
17
|
+
text: data.text,
|
|
18
|
+
pageCount: data.numpages,
|
|
19
|
+
metadata: {
|
|
20
|
+
info: data.info,
|
|
21
|
+
metadata: data.metadata,
|
|
22
|
+
version: data.version,
|
|
23
|
+
},
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
catch (error) {
|
|
27
|
+
throw new Error(`PDF parsing failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=pdfParser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdfParser.js","sourceRoot":"","sources":["../../../src/DocumentParser/service/pdfParser.ts"],"names":[],"mappings":";;;;;AAiBA,4BAgBC;AAjCD,0DAAiC;AAYjC;;;;GAIG;AACI,KAAK,UAAU,QAAQ,CAAC,MAAc;IAC3C,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,IAAA,mBAAQ,EAAC,MAAM,CAAC,CAAC;QAEpC,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,SAAS,EAAE,IAAI,CAAC,QAAQ;YACxB,QAAQ,EAAE;gBACR,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,OAAO,EAAE,IAAI,CAAC,OAAO;aACtB;SACF,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,uBAAuB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IACrG,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export interface TXTParseResult {
|
|
2
|
+
text: string;
|
|
3
|
+
encoding: string;
|
|
4
|
+
}
|
|
5
|
+
/**
|
|
6
|
+
* Parse TXT file and extract text content
|
|
7
|
+
* @param buffer TXT file buffer
|
|
8
|
+
* @returns Parsed TXT content
|
|
9
|
+
*/
|
|
10
|
+
export declare function parseTXT(buffer: Buffer): Promise<TXTParseResult>;
|
|
11
|
+
//# sourceMappingURL=txtParser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"txtParser.d.ts","sourceRoot":"","sources":["../../../src/DocumentParser/service/txtParser.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED;;;;GAIG;AACH,wBAAsB,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC,CAyBtE"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.parseTXT = parseTXT;
|
|
4
|
+
/**
|
|
5
|
+
* Parse TXT file and extract text content
|
|
6
|
+
* @param buffer TXT file buffer
|
|
7
|
+
* @returns Parsed TXT content
|
|
8
|
+
*/
|
|
9
|
+
async function parseTXT(buffer) {
|
|
10
|
+
try {
|
|
11
|
+
// Try UTF-8 first, then fall back to latin1 if needed
|
|
12
|
+
let text;
|
|
13
|
+
let encoding = 'utf8';
|
|
14
|
+
try {
|
|
15
|
+
text = buffer.toString('utf8');
|
|
16
|
+
// Check for invalid UTF-8 characters
|
|
17
|
+
if (text.includes('\ufffd')) {
|
|
18
|
+
throw new Error('Invalid UTF-8');
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
// Fall back to latin1
|
|
23
|
+
text = buffer.toString('latin1');
|
|
24
|
+
encoding = 'latin1';
|
|
25
|
+
}
|
|
26
|
+
return {
|
|
27
|
+
text,
|
|
28
|
+
encoding,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
catch (error) {
|
|
32
|
+
throw new Error(`TXT parsing failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=txtParser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"txtParser.js","sourceRoot":"","sources":["../../../src/DocumentParser/service/txtParser.ts"],"names":[],"mappings":";;AAUA,4BAyBC;AA9BD;;;;GAIG;AACI,KAAK,UAAU,QAAQ,CAAC,MAAc;IAC3C,IAAI,CAAC;QACH,sDAAsD;QACtD,IAAI,IAAY,CAAC;QACjB,IAAI,QAAQ,GAAG,MAAM,CAAC;QAEtB,IAAI,CAAC;YACH,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC/B,qCAAqC;YACrC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,MAAM,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC;YACnC,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,sBAAsB;YACtB,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YACjC,QAAQ,GAAG,QAAQ,CAAC;QACtB,CAAC;QAED,OAAO;YACL,IAAI;YACJ,QAAQ;SACT,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,uBAAuB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IACrG,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Creates a content hash for a parsed document result object
|
|
3
|
+
* Excludes fields that can change independently of content (universalId, downloadUrl)
|
|
4
|
+
* @param result - The parsed document result object
|
|
5
|
+
* @returns Short hash (12 characters) representing the content
|
|
6
|
+
*/
|
|
7
|
+
export declare function createContentHash(result: any): string;
|
|
8
|
+
/**
|
|
9
|
+
* Creates a content hash from specific content fields
|
|
10
|
+
* More targeted approach if you want to hash only core content
|
|
11
|
+
* @param text - Document text content
|
|
12
|
+
* @param metadata - Document metadata
|
|
13
|
+
* @param pageCount - Number of pages (optional)
|
|
14
|
+
* @returns Short hash (12 characters) representing the content
|
|
15
|
+
*/
|
|
16
|
+
export declare function createContentHashFromFields(text: string, metadata: any, pageCount?: number): string;
|
|
17
|
+
//# sourceMappingURL=hashUtils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hashUtils.d.ts","sourceRoot":"","sources":["../../../src/DocumentParser/util/hashUtils.ts"],"names":[],"mappings":"AAEA;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,GAAG,GAAG,MAAM,CAUrD;AAED;;;;;;;GAOG;AACH,wBAAgB,2BAA2B,CACzC,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,GAAG,EACb,SAAS,CAAC,EAAE,MAAM,GACjB,MAAM,CAIR"}
|