n8n-nodes-lite-parser 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1,2 @@
1
1
  export { LiteParseNode } from './nodes/LiteParse/LiteParse.node';
2
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,kCAAkC,CAAC"}
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,mEAAiE;AAAxD,+GAAA,aAAa,OAAA"}
@@ -0,0 +1,6 @@
1
+ import { IExecuteFunctions, INodeExecutionData, INodeType, INodeTypeDescription } from 'n8n-workflow';
2
+ export declare class LiteParseNode implements INodeType {
3
+ description: INodeTypeDescription;
4
+ execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]>;
5
+ }
6
+ //# sourceMappingURL=LiteParse.node.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"LiteParse.node.d.ts","sourceRoot":"","sources":["../../../src/nodes/LiteParse/LiteParse.node.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,iBAAiB,EACjB,kBAAkB,EAClB,SAAS,EACT,oBAAoB,EAErB,MAAM,cAAc,CAAC;AAEtB,qBAAa,aAAc,YAAW,SAAS;IAC7C,WAAW,EAAE,oBAAoB,CAoK/B;IAEI,OAAO,CAAC,IAAI,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC;CAqGxE"}
@@ -0,0 +1,260 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.LiteParseNode = void 0;
4
+ const n8n_workflow_1 = require("n8n-workflow");
5
+ class LiteParseNode {
6
+ constructor() {
7
+ this.description = {
8
+ displayName: 'LiteParse',
9
+ name: 'liteParse',
10
+ icon: 'file:liteparse.svg',
11
+ group: ['transform'],
12
+ version: 1,
13
+ subtitle: '={{ $parameter["outputFormat"] }}',
14
+ description: 'Parse PDFs, Office docs, and images into Markdown, JSON, or text — locally, no API keys',
15
+ defaults: {
16
+ name: 'LiteParse',
17
+ },
18
+ inputs: [n8n_workflow_1.NodeConnectionTypes.Main],
19
+ outputs: [n8n_workflow_1.NodeConnectionTypes.Main],
20
+ properties: [
21
+ {
22
+ displayName: 'Input Binary Field',
23
+ name: 'binaryPropertyName',
24
+ type: 'string',
25
+ default: 'data',
26
+ required: true,
27
+ description: 'Name of the binary property containing the document to parse',
28
+ },
29
+ {
30
+ displayName: 'Output Format',
31
+ name: 'outputFormat',
32
+ type: 'options',
33
+ options: [
34
+ {
35
+ name: 'Markdown',
36
+ value: 'markdown',
37
+ description: 'Structured Markdown with headings, tables, lists, and links',
38
+ },
39
+ {
40
+ name: 'Text',
41
+ value: 'text',
42
+ description: 'Plain text with layout preserved',
43
+ },
44
+ {
45
+ name: 'JSON',
46
+ value: 'json',
47
+ description: 'Structured JSON with bounding boxes and spatial data',
48
+ },
49
+ ],
50
+ default: 'markdown',
51
+ description: 'Format of the parsed output',
52
+ },
53
+ {
54
+ displayName: 'OCR Enabled',
55
+ name: 'ocrEnabled',
56
+ type: 'boolean',
57
+ default: true,
58
+ description: 'Whether to run OCR on scanned documents and images',
59
+ },
60
+ {
61
+ displayName: 'OCR Language',
62
+ name: 'ocrLanguage',
63
+ type: 'string',
64
+ default: 'eng',
65
+ displayOptions: {
66
+ show: {
67
+ ocrEnabled: [true],
68
+ },
69
+ },
70
+ description: 'Tesseract language code (e.g. eng, fra, deu, ara, ara+eng)',
71
+ },
72
+ {
73
+ displayName: 'Password',
74
+ name: 'password',
75
+ type: 'string',
76
+ typeOptions: { password: true },
77
+ default: '',
78
+ description: 'Password for encrypted/protected PDFs (leave empty if not encrypted)',
79
+ },
80
+ {
81
+ displayName: 'Target Pages',
82
+ name: 'targetPages',
83
+ type: 'string',
84
+ default: '',
85
+ placeholder: '1-5,10,15-20',
86
+ description: 'Specific pages to parse (leave empty for all pages)',
87
+ },
88
+ {
89
+ displayName: 'DPI',
90
+ name: 'dpi',
91
+ type: 'number',
92
+ default: 150,
93
+ typeOptions: {
94
+ minValue: 72,
95
+ maxValue: 600,
96
+ },
97
+ description: 'Rendering DPI for OCR and screenshots (higher = better quality, slower)',
98
+ },
99
+ {
100
+ displayName: 'Options',
101
+ name: 'options',
102
+ type: 'collection',
103
+ placeholder: 'Add Option',
104
+ default: {},
105
+ options: [
106
+ {
107
+ displayName: 'Image Mode',
108
+ name: 'imageMode',
109
+ type: 'options',
110
+ options: [
111
+ {
112
+ name: 'Placeholder',
113
+ value: 'placeholder',
114
+ description: 'Reference images with placeholder links',
115
+ },
116
+ {
117
+ name: 'Off',
118
+ value: 'off',
119
+ description: 'Strip all images from output',
120
+ },
121
+ {
122
+ name: 'Embed',
123
+ value: 'embed',
124
+ description: 'Write image PNGs to disk and reference them',
125
+ },
126
+ ],
127
+ default: 'placeholder',
128
+ description: 'How to handle images in Markdown output',
129
+ displayOptions: {
130
+ show: {
131
+ '/outputFormat': ['markdown'],
132
+ },
133
+ },
134
+ },
135
+ {
136
+ displayName: 'Extract Links',
137
+ name: 'extractLinks',
138
+ type: 'boolean',
139
+ default: true,
140
+ description: 'Whether to render hyperlinks as [text](url) in Markdown output',
141
+ displayOptions: {
142
+ show: {
143
+ '/outputFormat': ['markdown'],
144
+ },
145
+ },
146
+ },
147
+ {
148
+ displayName: 'Max Pages',
149
+ name: 'maxPages',
150
+ type: 'number',
151
+ default: 1000,
152
+ description: 'Maximum number of pages to parse',
153
+ },
154
+ {
155
+ displayName: 'OCR Workers',
156
+ name: 'numWorkers',
157
+ type: 'number',
158
+ default: 4,
159
+ description: 'Number of concurrent OCR workers',
160
+ },
161
+ {
162
+ displayName: 'Quiet Mode',
163
+ name: 'quiet',
164
+ type: 'boolean',
165
+ default: true,
166
+ description: 'Suppress progress output from LiteParse',
167
+ },
168
+ ],
169
+ },
170
+ ],
171
+ };
172
+ }
173
+ async execute() {
174
+ // Use eval to preserve native import() — @llamaindex/liteparse is ESM-only,
175
+ // TypeScript's CommonJS output would convert import() to require() which breaks.
176
+ const { LiteParse } = await eval("import('@llamaindex/liteparse')");
177
+ const items = this.getInputData();
178
+ const returnData = [];
179
+ for (let i = 0; i < items.length; i++) {
180
+ try {
181
+ const binaryPropertyName = this.getNodeParameter('binaryPropertyName', i);
182
+ const outputFormat = this.getNodeParameter('outputFormat', i);
183
+ const ocrEnabled = this.getNodeParameter('ocrEnabled', i);
184
+ const ocrLanguage = this.getNodeParameter('ocrLanguage', i);
185
+ const password = this.getNodeParameter('password', i);
186
+ const targetPages = this.getNodeParameter('targetPages', i);
187
+ const dpi = this.getNodeParameter('dpi', i);
188
+ const options = this.getNodeParameter('options', i);
189
+ // Validate binary data exists
190
+ const binaryMetadata = items[i].binary?.[binaryPropertyName];
191
+ if (!binaryMetadata) {
192
+ throw new Error(`Binary property "${binaryPropertyName}" does not exist on item ${i}`);
193
+ }
194
+ // Get the file buffer
195
+ const buffer = await this.helpers.getBinaryDataBuffer(i, binaryPropertyName);
196
+ // Build parser config
197
+ const parserConfig = {
198
+ outputFormat,
199
+ ocrEnabled,
200
+ dpi,
201
+ };
202
+ if (ocrEnabled && ocrLanguage) {
203
+ parserConfig.ocrLanguage = ocrLanguage;
204
+ }
205
+ if (password) {
206
+ parserConfig.password = password;
207
+ }
208
+ if (targetPages) {
209
+ parserConfig.targetPages = targetPages;
210
+ }
211
+ if (options.imageMode && outputFormat === 'markdown') {
212
+ parserConfig.imageMode = options.imageMode;
213
+ }
214
+ if (options.extractLinks !== undefined && outputFormat === 'markdown') {
215
+ parserConfig.extractLinks = options.extractLinks;
216
+ }
217
+ if (options.maxPages) {
218
+ parserConfig.maxPages = options.maxPages;
219
+ }
220
+ if (options.numWorkers) {
221
+ parserConfig.numWorkers = options.numWorkers;
222
+ }
223
+ if (options.quiet !== undefined) {
224
+ parserConfig.quiet = options.quiet;
225
+ }
226
+ // Parse the document
227
+ const parser = new LiteParse(parserConfig);
228
+ const result = await parser.parse(buffer);
229
+ // Build output
230
+ const outputJson = {
231
+ text: result.text,
232
+ pages: result.pages?.length || 0,
233
+ format: outputFormat,
234
+ };
235
+ // Include structured page data for JSON format
236
+ if (outputFormat === 'json' && result.pages) {
237
+ outputJson.pageData = result.pages;
238
+ }
239
+ returnData.push({
240
+ json: outputJson,
241
+ binary: items[i].binary,
242
+ pairedItem: { item: i },
243
+ });
244
+ }
245
+ catch (error) {
246
+ if (this.continueOnFail()) {
247
+ returnData.push({
248
+ json: { error: error.message },
249
+ pairedItem: { item: i },
250
+ });
251
+ continue;
252
+ }
253
+ throw error;
254
+ }
255
+ }
256
+ return [returnData];
257
+ }
258
+ }
259
+ exports.LiteParseNode = LiteParseNode;
260
+ //# sourceMappingURL=LiteParse.node.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"LiteParse.node.js","sourceRoot":"","sources":["../../../src/nodes/LiteParse/LiteParse.node.ts"],"names":[],"mappings":";;;AAAA,+CAMsB;AAEtB,MAAa,aAAa;IAA1B;QACE,gBAAW,GAAyB;YAClC,WAAW,EAAE,WAAW;YACxB,IAAI,EAAE,WAAW;YACjB,IAAI,EAAE,oBAAoB;YAC1B,KAAK,EAAE,CAAC,WAAW,CAAC;YACpB,OAAO,EAAE,CAAC;YACV,QAAQ,EAAE,mCAAmC;YAC7C,WAAW,EAAE,yFAAyF;YACtG,QAAQ,EAAE;gBACR,IAAI,EAAE,WAAW;aAClB;YACD,MAAM,EAAE,CAAC,kCAAmB,CAAC,IAAI,CAAC;YAClC,OAAO,EAAE,CAAC,kCAAmB,CAAC,IAAI,CAAC;YACnC,UAAU,EAAE;gBACV;oBACE,WAAW,EAAE,oBAAoB;oBACjC,IAAI,EAAE,oBAAoB;oBAC1B,IAAI,EAAE,QAAQ;oBACd,OAAO,EAAE,MAAM;oBACf,QAAQ,EAAE,IAAI;oBACd,WAAW,EAAE,8DAA8D;iBAC5E;gBACD;oBACE,WAAW,EAAE,eAAe;oBAC5B,IAAI,EAAE,cAAc;oBACpB,IAAI,EAAE,SAAS;oBACf,OAAO,EAAE;wBACP;4BACE,IAAI,EAAE,UAAU;4BAChB,KAAK,EAAE,UAAU;4BACjB,WAAW,EAAE,6DAA6D;yBAC3E;wBACD;4BACE,IAAI,EAAE,MAAM;4BACZ,KAAK,EAAE,MAAM;4BACb,WAAW,EAAE,kCAAkC;yBAChD;wBACD;4BACE,IAAI,EAAE,MAAM;4BACZ,KAAK,EAAE,MAAM;4BACb,WAAW,EAAE,sDAAsD;yBACpE;qBACF;oBACD,OAAO,EAAE,UAAU;oBACnB,WAAW,EAAE,6BAA6B;iBAC3C;gBACD;oBACE,WAAW,EAAE,aAAa;oBAC1B,IAAI,EAAE,YAAY;oBAClB,IAAI,EAAE,SAAS;oBACf,OAAO,EAAE,IAAI;oBACb,WAAW,EAAE,oDAAoD;iBAClE;gBACD;oBACE,WAAW,EAAE,cAAc;oBAC3B,IAAI,EAAE,aAAa;oBACnB,IAAI,EAAE,QAAQ;oBACd,OAAO,EAAE,KAAK;oBACd,cAAc,EAAE;wBACd,IAAI,EAAE;4BACJ,UAAU,EAAE,CAAC,IAAI,CAAC;yBACnB;qBACF;oBACD,WAAW,EAAE,4DAA4D;iBAC1E;gBACD;oBACE,WAAW,EAAE,UAAU;oBACvB,IAAI,EAAE,UAAU;oBAChB,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE;oBAC/B,OAAO,EAAE,EAAE;oBACX,WAAW,EAAE,sEAAsE;iBACpF;gBACD;oBACE,WAAW,EAAE,cAAc;oBAC3B,IAAI,EAAE,aAAa;oBACnB,IAAI,EAAE,QAAQ;oBACd,OAAO,EAAE,EAAE;oBACX,WAAW,EAAE,cAAc;oBAC3B,WAAW,EAAE,qDAAqD;iBACnE;gBACD;oBACE,WAAW,EAAE,KAAK;oBAClB,IAAI,EAAE,KAAK;oBACX,IAAI,EAAE,QAAQ;oBACd,OAAO,EAAE,GAAG;oBACZ,WAAW,EAAE;wBACX,QAAQ,EAAE,EAAE;wBACZ,QAAQ,EAAE,GAAG;qBACd;oBACD,WAAW,EAAE,yEAAyE;iBACvF;gBACD;oBACE,WAAW,EAAE,SAAS;oBACtB,IAAI,EAAE,SAAS;oBACf,IAAI,EAAE,YAAY;oBAClB,WAAW,EAAE,YAAY;oBACzB,OAAO,EAAE,EAAE;oBACX,OAAO,EAAE;wBACP;4BACE,WAAW,EAAE,YAAY;4BACzB,IAAI,EAAE,WAAW;4BACjB,IAAI,EAAE,SAAS;4BACf,OAAO,EAAE;gCACP;oCACE,IAAI,EAAE,aAAa;oCACnB,KAAK,EAAE,aAAa;oCACpB,WAAW,EAAE,yCAAyC;iCACvD;gCACD;oCACE,IAAI,EAAE,KAAK;oCACX,KAAK,EAAE,KAAK;oCACZ,WAAW,EAAE,8BAA8B;iCAC5C;gCACD;oCACE,IAAI,EAAE,OAAO;oCACb,KAAK,EAAE,OAAO;oCACd,WAAW,EAAE,6CAA6C;iCAC3D;6BACF;4BACD,OAAO,EAAE,aAAa;4BACtB,WAAW,EAAE,yCAAyC;4BACtD,cAAc,EAAE;gCACd,IAAI,EAAE;oCACJ,eAAe,EAAE,CAAC,UAAU,CAAC;iCAC9B;6BACF;yBACF;wBACD;4BACE,WAAW,EAAE,eAAe;4BAC5B,IAAI,EAAE,cAAc;4BACpB,IAAI,EAAE,SAAS;4BACf,OAAO,EAAE,IAAI;4BACb,WAAW,EAAE,gEAAgE;4BAC7E,cAAc,EAAE;gCACd,IAAI,EAAE;oCACJ,eAAe,EAAE,CAAC,UAAU,CAAC;iCAC9B;6BACF;yBACF;wBACD;4BACE,WAAW,EAAE,WAAW;4BACxB,IAAI,EAAE,UAAU;4BAChB,IAAI,EAAE,QAAQ;4BACd,OAAO,EAAE,IAAI;4BACb,WAAW,EAAE,kCAAkC;yBAChD;wBACD;4BACE,WAAW,EAAE,aAAa;4BAC1B,IAAI,EAAE,YAAY;4BAClB,IAAI,EAAE,QAAQ;4BACd,OAAO,EAAE,CAAC;4BACV,WAAW,EAAE,kCAAkC;yBAChD;wBACD;4BACE,WAAW,EAAE,YAAY;4BACzB,IAAI,EAAE,OAAO;4BACb,IAAI,EAAE,SAAS;4BACf,OAAO,EAAE,IAAI;4BACb,WAAW,EAAE,yCAAyC;yBACvD;qBACF;iBACF;aACF;SACF,CAAC;IAuGJ,CAAC;IArGC,KAAK,CAAC,OAAO;QACX,4EAA4E;QAC5E,iFAAiF;QACjF,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,IAAI,CAAC,iCAAiC,CAAC,CAAC;QAEpE,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QAClC,MAAM,UAAU,GAAyB,EAAE,CAAC;QAE5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,IAAI,CAAC;gBACH,MAAM,kBAAkB,GAAG,IAAI,CAAC,gBAAgB,CAAC,oBAAoB,EAAE,CAAC,CAAW,CAAC;gBACpF,MAAM,YAAY,GAAG,IAAI,CAAC,gBAAgB,CAAC,cAAc,EAAE,CAAC,CAAiC,CAAC;gBAC9F,MAAM,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC,YAAY,EAAE,CAAC,CAAY,CAAC;gBACrE,MAAM,WAAW,GAAG,IAAI,CAAC,gBAAgB,CAAC,aAAa,EAAE,CAAC,CAAW,CAAC;gBACtE,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,UAAU,EAAE,CAAC,CAAW,CAAC;gBAChE,MAAM,WAAW,GAAG,IAAI,CAAC,gBAAgB,CAAC,aAAa,EAAE,CAAC,CAAW,CAAC;gBACtE,MAAM,GAAG,GAAG,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC,CAAW,CAAC;gBACtD,MAAM,OAAO,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,CAAC,CAMjD,CAAC;gBAEF,8BAA8B;gBAC9B,MAAM,cAAc,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,kBAAkB,CAAC,CAAC;gBAC7D,IAAI,CAAC,cAAc,EAAE,CAAC;oBACpB,MAAM,IAAI,KAAK,CAAC,oBAAoB,kBAAkB,4BAA4B,CAAC,EAAE,CAAC,CAAC;gBACzF,CAAC;gBAED,sBAAsB;gBACtB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,mBAAmB,CAAC,CAAC,EAAE,kBAAkB,CAAC,CAAC;gBAE7E,sBAAsB;gBACtB,MAAM,YAAY,GAAwB;oBACxC,YAAY;oBACZ,UAAU;oBACV,GAAG;iBACJ,CAAC;gBAEF,IAAI,UAAU,IAAI,WAAW,EAAE,CAAC;oBAC9B,YAAY,CAAC,WAAW,GAAG,WAAW,CAAC;gBACzC,CAAC;gBACD,IAAI,QAAQ,EAAE,CAAC;oBACb,YAAY,CAAC,QAAQ,GAAG,QAAQ,CAAC;gBACnC,CAAC;gBACD,IAAI,WAAW,EAAE,CAAC;oBAChB,YAAY,CAAC,WAAW,GAAG,WAAW,CAAC;gBACzC,CAAC;gBACD,IAAI,OAAO,CAAC,SAAS,IAAI,YAAY,KAAK,UAAU,EAAE,CAAC;oBACrD,YAAY,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;gBAC7C,CAAC;gBACD,IAAI,OAAO,CAAC,YAAY,KAAK,SAAS,IAAI,YAAY,KAAK,UAAU,EAAE,CAAC;oBACtE,YAAY,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;gBACnD,CAAC;gBACD,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;oBACrB,YAAY,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;gBAC3C,CAAC;gBACD,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;oBACvB,YAAY,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;gBAC/C,CAAC;gBACD,IAAI,OAAO,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;oBAChC,YAAY,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;gBACrC,CAAC;gBAED,qBAAqB;gBACrB,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,YAAY,CAAC,CAAC;gBAC3C,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;gBAE1C,eAAe;gBACf,MAAM,UAAU,GAAwB;oBACtC,IAAI,EAAE,MAAM,CAAC,IAAI;oBACjB,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,MAAM,IAAI,CAAC;oBAChC,MAAM,EAAE,YAAY;iBACrB,CAAC;gBAEF,+CAA+C;gBAC/C,IAAI,YAAY,KAAK,MAAM,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBAC5C,UAAU,CAAC,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC;gBACrC,CAAC;gBAED,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,UAAU;oBAChB,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM;oBACvB,UAAU,EAAE,EAAE,IAAI,EAAE,CAAC,EAAE;iBACxB,CAAC,CAAC;YACL,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,IAAI,IAAI,CAAC,cAAc,EAAE,EAAE,CAAC;oBAC1B,UAAU,CAAC,IAAI,CAAC;wBACd,IAAI,EAAE,EAAE,KAAK,EAAG,KAAe,CAAC,OAAO,EAAE;wBACzC,UAAU,EAAE,EAAE,IAAI,EAAE,CAAC,EAAE;qBACxB,CAAC,CAAC;oBACH,SAAS;gBACX,CAAC;gBACD,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;QAED,OAAO,CAAC,UAAU,CAAC,CAAC;IACtB,CAAC;CACF;AA5QD,sCA4QC"}
package/package.json CHANGED
@@ -1,8 +1,12 @@
1
1
  {
2
2
  "name": "n8n-nodes-lite-parser",
3
- "version": "1.0.0",
3
+ "version": "1.0.2",
4
4
  "description": "n8n community node for LiteParse — fast, local document parsing (PDF, Office, images) with Markdown/JSON output",
5
5
  "main": "dist/index.js",
6
+ "files": [
7
+ "dist",
8
+ "README.md"
9
+ ],
6
10
  "scripts": {
7
11
  "build": "tsc",
8
12
  "dev": "tsc --watch",
@@ -1,275 +0,0 @@
1
- import {
2
- IExecuteFunctions,
3
- INodeExecutionData,
4
- INodeType,
5
- INodeTypeDescription,
6
- NodeConnectionTypes,
7
- } from 'n8n-workflow';
8
-
9
- export class LiteParseNode implements INodeType {
10
- description: INodeTypeDescription = {
11
- displayName: 'LiteParse',
12
- name: 'liteParse',
13
- icon: 'file:liteparse.svg',
14
- group: ['transform'],
15
- version: 1,
16
- subtitle: '={{ $parameter["outputFormat"] }}',
17
- description: 'Parse PDFs, Office docs, and images into Markdown, JSON, or text — locally, no API keys',
18
- defaults: {
19
- name: 'LiteParse',
20
- },
21
- inputs: [NodeConnectionTypes.Main],
22
- outputs: [NodeConnectionTypes.Main],
23
- properties: [
24
- {
25
- displayName: 'Input Binary Field',
26
- name: 'binaryPropertyName',
27
- type: 'string',
28
- default: 'data',
29
- required: true,
30
- description: 'Name of the binary property containing the document to parse',
31
- },
32
- {
33
- displayName: 'Output Format',
34
- name: 'outputFormat',
35
- type: 'options',
36
- options: [
37
- {
38
- name: 'Markdown',
39
- value: 'markdown',
40
- description: 'Structured Markdown with headings, tables, lists, and links',
41
- },
42
- {
43
- name: 'Text',
44
- value: 'text',
45
- description: 'Plain text with layout preserved',
46
- },
47
- {
48
- name: 'JSON',
49
- value: 'json',
50
- description: 'Structured JSON with bounding boxes and spatial data',
51
- },
52
- ],
53
- default: 'markdown',
54
- description: 'Format of the parsed output',
55
- },
56
- {
57
- displayName: 'OCR Enabled',
58
- name: 'ocrEnabled',
59
- type: 'boolean',
60
- default: true,
61
- description: 'Whether to run OCR on scanned documents and images',
62
- },
63
- {
64
- displayName: 'OCR Language',
65
- name: 'ocrLanguage',
66
- type: 'string',
67
- default: 'eng',
68
- displayOptions: {
69
- show: {
70
- ocrEnabled: [true],
71
- },
72
- },
73
- description: 'Tesseract language code (e.g. eng, fra, deu, ara, ara+eng)',
74
- },
75
- {
76
- displayName: 'Password',
77
- name: 'password',
78
- type: 'string',
79
- typeOptions: { password: true },
80
- default: '',
81
- description: 'Password for encrypted/protected PDFs (leave empty if not encrypted)',
82
- },
83
- {
84
- displayName: 'Target Pages',
85
- name: 'targetPages',
86
- type: 'string',
87
- default: '',
88
- placeholder: '1-5,10,15-20',
89
- description: 'Specific pages to parse (leave empty for all pages)',
90
- },
91
- {
92
- displayName: 'DPI',
93
- name: 'dpi',
94
- type: 'number',
95
- default: 150,
96
- typeOptions: {
97
- minValue: 72,
98
- maxValue: 600,
99
- },
100
- description: 'Rendering DPI for OCR and screenshots (higher = better quality, slower)',
101
- },
102
- {
103
- displayName: 'Options',
104
- name: 'options',
105
- type: 'collection',
106
- placeholder: 'Add Option',
107
- default: {},
108
- options: [
109
- {
110
- displayName: 'Image Mode',
111
- name: 'imageMode',
112
- type: 'options',
113
- options: [
114
- {
115
- name: 'Placeholder',
116
- value: 'placeholder',
117
- description: 'Reference images with placeholder links',
118
- },
119
- {
120
- name: 'Off',
121
- value: 'off',
122
- description: 'Strip all images from output',
123
- },
124
- {
125
- name: 'Embed',
126
- value: 'embed',
127
- description: 'Write image PNGs to disk and reference them',
128
- },
129
- ],
130
- default: 'placeholder',
131
- description: 'How to handle images in Markdown output',
132
- displayOptions: {
133
- show: {
134
- '/outputFormat': ['markdown'],
135
- },
136
- },
137
- },
138
- {
139
- displayName: 'Extract Links',
140
- name: 'extractLinks',
141
- type: 'boolean',
142
- default: true,
143
- description: 'Whether to render hyperlinks as [text](url) in Markdown output',
144
- displayOptions: {
145
- show: {
146
- '/outputFormat': ['markdown'],
147
- },
148
- },
149
- },
150
- {
151
- displayName: 'Max Pages',
152
- name: 'maxPages',
153
- type: 'number',
154
- default: 1000,
155
- description: 'Maximum number of pages to parse',
156
- },
157
- {
158
- displayName: 'OCR Workers',
159
- name: 'numWorkers',
160
- type: 'number',
161
- default: 4,
162
- description: 'Number of concurrent OCR workers',
163
- },
164
- {
165
- displayName: 'Quiet Mode',
166
- name: 'quiet',
167
- type: 'boolean',
168
- default: true,
169
- description: 'Suppress progress output from LiteParse',
170
- },
171
- ],
172
- },
173
- ],
174
- };
175
-
176
- async execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
177
- const { LiteParse } = await import('@llamaindex/liteparse');
178
-
179
- const items = this.getInputData();
180
- const returnData: INodeExecutionData[] = [];
181
-
182
- for (let i = 0; i < items.length; i++) {
183
- try {
184
- const binaryPropertyName = this.getNodeParameter('binaryPropertyName', i) as string;
185
- const outputFormat = this.getNodeParameter('outputFormat', i) as 'markdown' | 'text' | 'json';
186
- const ocrEnabled = this.getNodeParameter('ocrEnabled', i) as boolean;
187
- const ocrLanguage = this.getNodeParameter('ocrLanguage', i) as string;
188
- const password = this.getNodeParameter('password', i) as string;
189
- const targetPages = this.getNodeParameter('targetPages', i) as string;
190
- const dpi = this.getNodeParameter('dpi', i) as number;
191
- const options = this.getNodeParameter('options', i) as {
192
- imageMode?: string;
193
- extractLinks?: boolean;
194
- maxPages?: number;
195
- numWorkers?: number;
196
- quiet?: boolean;
197
- };
198
-
199
- // Validate binary data exists
200
- const binaryMetadata = items[i].binary?.[binaryPropertyName];
201
- if (!binaryMetadata) {
202
- throw new Error(`Binary property "${binaryPropertyName}" does not exist on item ${i}`);
203
- }
204
-
205
- // Get the file buffer
206
- const buffer = await this.helpers.getBinaryDataBuffer(i, binaryPropertyName);
207
-
208
- // Build parser config
209
- const parserConfig: Record<string, any> = {
210
- outputFormat,
211
- ocrEnabled,
212
- dpi,
213
- };
214
-
215
- if (ocrEnabled && ocrLanguage) {
216
- parserConfig.ocrLanguage = ocrLanguage;
217
- }
218
- if (password) {
219
- parserConfig.password = password;
220
- }
221
- if (targetPages) {
222
- parserConfig.targetPages = targetPages;
223
- }
224
- if (options.imageMode && outputFormat === 'markdown') {
225
- parserConfig.imageMode = options.imageMode;
226
- }
227
- if (options.extractLinks !== undefined && outputFormat === 'markdown') {
228
- parserConfig.extractLinks = options.extractLinks;
229
- }
230
- if (options.maxPages) {
231
- parserConfig.maxPages = options.maxPages;
232
- }
233
- if (options.numWorkers) {
234
- parserConfig.numWorkers = options.numWorkers;
235
- }
236
- if (options.quiet !== undefined) {
237
- parserConfig.quiet = options.quiet;
238
- }
239
-
240
- // Parse the document
241
- const parser = new LiteParse(parserConfig);
242
- const result = await parser.parse(buffer);
243
-
244
- // Build output
245
- const outputJson: Record<string, any> = {
246
- text: result.text,
247
- pages: result.pages?.length || 0,
248
- format: outputFormat,
249
- };
250
-
251
- // Include structured page data for JSON format
252
- if (outputFormat === 'json' && result.pages) {
253
- outputJson.pageData = result.pages;
254
- }
255
-
256
- returnData.push({
257
- json: outputJson,
258
- binary: items[i].binary,
259
- pairedItem: { item: i },
260
- });
261
- } catch (error) {
262
- if (this.continueOnFail()) {
263
- returnData.push({
264
- json: { error: (error as Error).message },
265
- pairedItem: { item: i },
266
- });
267
- continue;
268
- }
269
- throw error;
270
- }
271
- }
272
-
273
- return [returnData];
274
- }
275
- }
@@ -1,7 +0,0 @@
1
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
2
- <path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/>
3
- <polyline points="14 2 14 8 20 8"/>
4
- <line x1="16" y1="13" x2="8" y2="13"/>
5
- <line x1="16" y1="17" x2="8" y2="17"/>
6
- <polyline points="10 9 9 9 8 9"/>
7
- </svg>
package/tsconfig.json DELETED
@@ -1,20 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "strict": true,
4
- "module": "commonjs",
5
- "target": "es2020",
6
- "lib": ["es2020"],
7
- "moduleResolution": "node",
8
- "esModuleInterop": true,
9
- "skipLibCheck": true,
10
- "forceConsistentCasingInFileNames": true,
11
- "resolveJsonModule": true,
12
- "declaration": true,
13
- "declarationMap": true,
14
- "sourceMap": true,
15
- "outDir": "./dist",
16
- "rootDir": "./src"
17
- },
18
- "include": ["src/**/*.ts"],
19
- "exclude": ["node_modules", "dist"]
20
- }