n8n-nodes-ocrbro 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +5 -0
- package/dist/nodes/OcrBro/OcrBro.node.js +90 -0
- package/dist/nodes/OcrBro/ocrbro.png +0 -0
- package/dist/test/test.js +85 -0
- package/eng.traineddata +0 -0
- package/package.json +47 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.OcrBro = void 0;
|
|
4
|
+
var OcrBro_node_1 = require("./nodes/OcrBro/OcrBro.node");
|
|
5
|
+
Object.defineProperty(exports, "OcrBro", { enumerable: true, get: function () { return OcrBro_node_1.OcrBro; } });
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.OcrBro = void 0;
|
|
4
|
+
const tesseract_js_1 = require("tesseract.js");
|
|
5
|
+
class OcrBro {
|
|
6
|
+
constructor() {
|
|
7
|
+
this.description = {
|
|
8
|
+
displayName: 'OCR Bro',
|
|
9
|
+
name: 'ocrBro',
|
|
10
|
+
icon: 'file:ocrbro.png',
|
|
11
|
+
group: ['transform'],
|
|
12
|
+
version: 1,
|
|
13
|
+
description: 'OCR Images using Tesseract.js - extracts text from images',
|
|
14
|
+
defaults: {
|
|
15
|
+
name: 'OCR Bro',
|
|
16
|
+
},
|
|
17
|
+
inputs: ['main'],
|
|
18
|
+
outputs: ['main'],
|
|
19
|
+
properties: [
|
|
20
|
+
{
|
|
21
|
+
displayName: 'Input Binary Field',
|
|
22
|
+
name: 'binaryPropertyName',
|
|
23
|
+
type: 'string',
|
|
24
|
+
default: 'data',
|
|
25
|
+
required: true,
|
|
26
|
+
description: 'The name of the binary property containing the image file to OCR',
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
displayName: 'Language',
|
|
30
|
+
name: 'language',
|
|
31
|
+
type: 'string',
|
|
32
|
+
default: 'eng',
|
|
33
|
+
description: 'Tesseract language code (e.g., eng, deu, spa). Multiple languages can be specified separated by "+".',
|
|
34
|
+
},
|
|
35
|
+
],
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
async execute() {
|
|
39
|
+
var _a;
|
|
40
|
+
const items = this.getInputData();
|
|
41
|
+
const returnData = [];
|
|
42
|
+
const binaryPropertyName = this.getNodeParameter('binaryPropertyName', 0);
|
|
43
|
+
const language = this.getNodeParameter('language', 0);
|
|
44
|
+
// @ts-ignore
|
|
45
|
+
const worker = await (0, tesseract_js_1.createWorker)();
|
|
46
|
+
// @ts-ignore
|
|
47
|
+
await worker.loadLanguage(language);
|
|
48
|
+
// @ts-ignore
|
|
49
|
+
await worker.initialize(language);
|
|
50
|
+
for (let i = 0; i < items.length; i++) {
|
|
51
|
+
try {
|
|
52
|
+
const item = items[i];
|
|
53
|
+
// @ts-ignore - n8n types are sometimes weird with binary helpers in dry run
|
|
54
|
+
const binaryData = await this.helpers.getBinaryDataBuffer(i, binaryPropertyName);
|
|
55
|
+
const binaryMetadata = item.binary ? item.binary[binaryPropertyName] : undefined;
|
|
56
|
+
if (!binaryMetadata) {
|
|
57
|
+
throw new Error(`Binary property "${binaryPropertyName}" does not exist on item ${i}`);
|
|
58
|
+
}
|
|
59
|
+
const mimeType = binaryMetadata.mimeType;
|
|
60
|
+
if (!mimeType.startsWith('image/')) {
|
|
61
|
+
throw new Error(`Unsupported file type: ${mimeType}. OCR Bro only supports image files (PNG, JPG, TIFF, BMP, etc.)`);
|
|
62
|
+
}
|
|
63
|
+
// @ts-ignore
|
|
64
|
+
const { data } = await worker.recognize(binaryData);
|
|
65
|
+
returnData.push({
|
|
66
|
+
json: {
|
|
67
|
+
text: data.text,
|
|
68
|
+
confidence: data.confidence,
|
|
69
|
+
words: ((_a = data.words) === null || _a === void 0 ? void 0 : _a.length) || 0,
|
|
70
|
+
},
|
|
71
|
+
binary: item.binary,
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
catch (error) {
|
|
75
|
+
if (this.continueOnFail()) {
|
|
76
|
+
returnData.push({
|
|
77
|
+
json: {
|
|
78
|
+
error: error.message,
|
|
79
|
+
},
|
|
80
|
+
});
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
throw error;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
await worker.terminate();
|
|
87
|
+
return [returnData];
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
exports.OcrBro = OcrBro;
|
|
Binary file
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
const OcrBro_node_1 = require("../nodes/OcrBro/OcrBro.node");
|
|
37
|
+
const fs = __importStar(require("fs"));
|
|
38
|
+
const path = __importStar(require("path"));
|
|
39
|
+
// Mock helpers
|
|
40
|
+
const mockHelpers = {
|
|
41
|
+
getBinaryDataBuffer: (index, propertyName) => {
|
|
42
|
+
const filePath = path.join(__dirname, 'sample.png');
|
|
43
|
+
console.log(`Reading file from ${filePath}`);
|
|
44
|
+
return fs.readFileSync(filePath);
|
|
45
|
+
},
|
|
46
|
+
getBinaryMetadata: (index, propertyName) => {
|
|
47
|
+
return { mimeType: 'image/png' };
|
|
48
|
+
},
|
|
49
|
+
returnJsonArray: (items) => items
|
|
50
|
+
};
|
|
51
|
+
// Mock Node
|
|
52
|
+
const node = new OcrBro_node_1.OcrBro();
|
|
53
|
+
// Mock Execution Context
|
|
54
|
+
const mockExecute = async function () {
|
|
55
|
+
this.helpers = mockHelpers;
|
|
56
|
+
this.getNodeParameter = (param) => {
|
|
57
|
+
if (param === 'binaryPropertyName')
|
|
58
|
+
return 'data';
|
|
59
|
+
if (param === 'language')
|
|
60
|
+
return 'eng';
|
|
61
|
+
return '';
|
|
62
|
+
};
|
|
63
|
+
this.getInputData = () => {
|
|
64
|
+
return [{
|
|
65
|
+
json: {},
|
|
66
|
+
binary: {
|
|
67
|
+
data: {
|
|
68
|
+
mimeType: 'image/png'
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}];
|
|
72
|
+
};
|
|
73
|
+
this.continueOnFail = () => false;
|
|
74
|
+
console.log("Starting execution...");
|
|
75
|
+
return await node.execute.call(this);
|
|
76
|
+
};
|
|
77
|
+
// Run
|
|
78
|
+
mockExecute.call({})
|
|
79
|
+
.then((result) => {
|
|
80
|
+
console.log("Execution successful!");
|
|
81
|
+
console.log("Result JSON:", JSON.stringify(result[0][0].json, null, 2));
|
|
82
|
+
})
|
|
83
|
+
.catch((err) => {
|
|
84
|
+
console.error("Execution failed:", err);
|
|
85
|
+
});
|
package/eng.traineddata
ADDED
|
Binary file
|
package/package.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "n8n-nodes-ocrbro",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Native n8n node for OCR using Tesseract.js",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"n8n-community-node-package"
|
|
7
|
+
],
|
|
8
|
+
"license": "MIT",
|
|
9
|
+
"homepage": "",
|
|
10
|
+
"author": {
|
|
11
|
+
"name": "User",
|
|
12
|
+
"email": "user@example.com"
|
|
13
|
+
},
|
|
14
|
+
"community": {
|
|
15
|
+
"author": {
|
|
16
|
+
"name": "User",
|
|
17
|
+
"email": "user@example.com"
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"main": "dist/index.js",
|
|
21
|
+
"n8n": {
|
|
22
|
+
"nodes": [
|
|
23
|
+
"dist/nodes/OcrBro/OcrBro.node.js"
|
|
24
|
+
]
|
|
25
|
+
},
|
|
26
|
+
"scripts": {
|
|
27
|
+
"build": "tsc",
|
|
28
|
+
"dev": "tsc --watch",
|
|
29
|
+
"test": "ts-node test/test.ts"
|
|
30
|
+
},
|
|
31
|
+
"files": [
|
|
32
|
+
"dist",
|
|
33
|
+
"eng.traineddata"
|
|
34
|
+
],
|
|
35
|
+
"peerDependencies": {
|
|
36
|
+
"n8n-workflow": "*"
|
|
37
|
+
},
|
|
38
|
+
"devDependencies": {
|
|
39
|
+
"@types/node": "^18.0.0",
|
|
40
|
+
"n8n-workflow": "*",
|
|
41
|
+
"ts-node": "^10.9.1",
|
|
42
|
+
"typescript": "^5.0.0"
|
|
43
|
+
},
|
|
44
|
+
"dependencies": {
|
|
45
|
+
"tesseract.js": "^4.0.0"
|
|
46
|
+
}
|
|
47
|
+
}
|