n8n-nodes-berget-mk 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -5
- package/dist/nodes/BergetAi/BergetAi.node.js +36 -10
- package/dist/nodes/BergetAi/ocr.js +87 -47
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@ n8n community nodes for [Berget AI](https://berget.ai), packaged as a single ins
|
|
|
4
4
|
|
|
5
5
|
Three nodes:
|
|
6
6
|
|
|
7
|
-
- **Berget AI** — multi-resource action node for one-shot calls: **Chat** (completions, classification), **
|
|
7
|
+
- **Berget AI** — multi-resource action node for one-shot calls: **Chat** (completions, classification), **Rerank** (document reranking), and **Speech to Text** (Swedish-tuned KB-Whisper). Can also be exposed as a tool to an AI Agent. (OCR is temporarily hidden — see [CHANGELOG.md](CHANGELOG.md) for `0.4.4` for details.)
|
|
8
8
|
- **Berget AI Chat Model** — sub-node that plugs into n8n's built-in **AI Agent**, **Basic LLM Chain**, and other LangChain-based nodes. Exposes `reasoning_effort` and the full standard LLM parameter set.
|
|
9
9
|
- **Berget AI Embeddings Model** — sub-node that plugs into n8n's **Vector Store** nodes (Supabase, Qdrant, Pinecone, PGVector, etc.) and **Question and Answer Chain**.
|
|
10
10
|
|
|
@@ -38,10 +38,6 @@ Then add a **Berget AI API** credential with your API key from [berget.ai](https
|
|
|
38
38
|
2. Add **Berget AI Embeddings Model** and connect it to the Embedding socket.
|
|
39
39
|
3. Index documents or query as usual.
|
|
40
40
|
|
|
41
|
-
### Document extraction
|
|
42
|
-
|
|
43
|
-
1. Drop **Berget AI** onto the canvas, pick Resource = **OCR**, provide a document URL or base64 data, and pick your output format (Markdown or JSON).
|
|
44
|
-
|
|
45
41
|
### Swedish speech transcription
|
|
46
42
|
|
|
47
43
|
1. Drop **Berget AI** onto the canvas, pick Resource = **Speech to Text**, pick a model (defaults to `KB-Whisper-Large`), and point at an audio file.
|
|
@@ -4,10 +4,33 @@ exports.BergetAi = void 0;
|
|
|
4
4
|
const n8n_workflow_1 = require("n8n-workflow");
|
|
5
5
|
const n8n_workflow_2 = require("n8n-workflow");
|
|
6
6
|
const chat_1 = require("./chat");
|
|
7
|
-
|
|
7
|
+
// OCR temporarily disabled — see the block comment below the BergetAi class
|
|
8
|
+
// header for the re-enable procedure.
|
|
9
|
+
// import { executeOcr, ocrProperties } from './ocr';
|
|
8
10
|
const rerank_1 = require("./rerank");
|
|
9
11
|
const speech_1 = require("./speech");
|
|
10
12
|
const shared_1 = require("./shared");
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// OCR is temporarily hidden from the UI (since v0.4.4, 2026-04-10).
|
|
15
|
+
//
|
|
16
|
+
// Why: Berget AI removed OCR from their public pricing/models page, and the
|
|
17
|
+
// sync /v1/ocr endpoint returns HTTP 500 OCR_SERVICE_ERROR on every request.
|
|
18
|
+
// Async submissions are accepted but jobs sit in 'processing' indefinitely.
|
|
19
|
+
// The endpoint looks like an orphaned API surface whose backend has been
|
|
20
|
+
// retired. Rather than confuse users with a broken option, we hide it.
|
|
21
|
+
//
|
|
22
|
+
// The full implementation is intact at nodes/BergetAi/ocr.ts and will
|
|
23
|
+
// continue to compile and ship in the tarball (as dead code). If Berget
|
|
24
|
+
// brings OCR back, or someone confirms it works again, re-enabling is four
|
|
25
|
+
// uncomments in this file:
|
|
26
|
+
//
|
|
27
|
+
// 1. The `import { executeOcr, ocrProperties } from './ocr';` line above.
|
|
28
|
+
// 2. The OCR entry in the `resource` dropdown options array.
|
|
29
|
+
// 3. The `...ocrProperties` spread in the properties array.
|
|
30
|
+
// 4. The `case 'ocr':` branch in the execute() switch.
|
|
31
|
+
//
|
|
32
|
+
// All four are marked with "OCR:" comments below. No code needs to change.
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
11
34
|
class BergetAi {
|
|
12
35
|
constructor() {
|
|
13
36
|
this.description = {
|
|
@@ -48,11 +71,12 @@ class BergetAi {
|
|
|
48
71
|
value: 'chat',
|
|
49
72
|
description: 'Create a chat completion',
|
|
50
73
|
},
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
74
|
+
// OCR: uncomment this block to re-enable the OCR resource.
|
|
75
|
+
// {
|
|
76
|
+
// name: 'OCR',
|
|
77
|
+
// value: 'ocr',
|
|
78
|
+
// description: 'Extract text from a document (PDF, DOCX, images)',
|
|
79
|
+
// },
|
|
56
80
|
{
|
|
57
81
|
name: 'Rerank',
|
|
58
82
|
value: 'rerank',
|
|
@@ -66,7 +90,8 @@ class BergetAi {
|
|
|
66
90
|
],
|
|
67
91
|
},
|
|
68
92
|
...chat_1.chatProperties,
|
|
69
|
-
|
|
93
|
+
// OCR: uncomment to re-enable the OCR resource properties.
|
|
94
|
+
// ...ocrProperties,
|
|
70
95
|
...rerank_1.rerankProperties,
|
|
71
96
|
...speech_1.speechProperties,
|
|
72
97
|
],
|
|
@@ -96,9 +121,10 @@ class BergetAi {
|
|
|
96
121
|
case 'chat':
|
|
97
122
|
result = await (0, chat_1.executeChat)(this, i);
|
|
98
123
|
break;
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
124
|
+
// OCR: uncomment to re-enable the OCR execute branch.
|
|
125
|
+
// case 'ocr':
|
|
126
|
+
// result = await executeOcr(this, i);
|
|
127
|
+
// break;
|
|
102
128
|
case 'rerank':
|
|
103
129
|
result = await (0, rerank_1.executeRerank)(this, i);
|
|
104
130
|
break;
|
|
@@ -11,6 +11,8 @@ const showForOcr = {
|
|
|
11
11
|
},
|
|
12
12
|
},
|
|
13
13
|
};
|
|
14
|
+
const DEFAULT_POLLING_TIMEOUT_SECONDS = 360;
|
|
15
|
+
const DEFAULT_POLLING_INTERVAL_SECONDS = 3;
|
|
14
16
|
exports.ocrProperties = [
|
|
15
17
|
{
|
|
16
18
|
displayName: 'Document Type',
|
|
@@ -54,11 +56,11 @@ exports.ocrProperties = [
|
|
|
54
56
|
},
|
|
55
57
|
},
|
|
56
58
|
{
|
|
57
|
-
displayName: '
|
|
58
|
-
name: '
|
|
59
|
+
displayName: 'Return Task ID Immediately',
|
|
60
|
+
name: 'ocrReturnTaskIdImmediately',
|
|
59
61
|
type: 'boolean',
|
|
60
62
|
default: false,
|
|
61
|
-
description: 'Whether to
|
|
63
|
+
description: 'Whether to submit the document and return immediately with a taskId instead of waiting for the result. When off (default), the node submits the job and polls internally until the OCR is done, returning the extracted content. When on, the node returns { taskId, resultUrl, status } right away so you can poll the result yourself with an HTTP Request node in a separate step — useful for very slow documents or when you want to decouple submission from retrieval.',
|
|
62
64
|
...showForOcr,
|
|
63
65
|
},
|
|
64
66
|
{
|
|
@@ -102,51 +104,44 @@ exports.ocrProperties = [
|
|
|
102
104
|
{ name: 'TesserOCR', value: 'tesserocr' },
|
|
103
105
|
],
|
|
104
106
|
default: 'easyocr',
|
|
105
|
-
description: 'OCR engine to use',
|
|
106
|
-
},
|
|
107
|
-
{
|
|
108
|
-
displayName: 'Perform OCR',
|
|
109
|
-
name: 'doOcr',
|
|
110
|
-
type: 'boolean',
|
|
111
|
-
default: true,
|
|
112
|
-
description: 'Whether to perform OCR on the document',
|
|
113
|
-
},
|
|
114
|
-
{
|
|
115
|
-
displayName: 'Extract Table Structure',
|
|
116
|
-
name: 'doTableStructure',
|
|
117
|
-
type: 'boolean',
|
|
118
|
-
default: true,
|
|
119
|
-
description: 'Whether to extract table structure',
|
|
107
|
+
description: 'OCR engine to use. Not all engines are guaranteed to be available on Berget\'s infrastructure — "easyocr" is the default and most reliable. Try another engine only if easyocr fails for a specific document.',
|
|
120
108
|
},
|
|
121
109
|
{
|
|
122
110
|
displayName: 'Include Images',
|
|
123
111
|
name: 'includeImages',
|
|
124
112
|
type: 'boolean',
|
|
125
113
|
default: false,
|
|
126
|
-
description: 'Whether to include base64-encoded images in the output',
|
|
114
|
+
description: 'Whether to include base64-encoded images in the extracted output',
|
|
127
115
|
},
|
|
128
116
|
{
|
|
129
|
-
displayName: '
|
|
130
|
-
name: '
|
|
131
|
-
type: '
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
117
|
+
displayName: 'Polling Timeout (Seconds)',
|
|
118
|
+
name: 'pollingTimeoutSeconds',
|
|
119
|
+
type: 'number',
|
|
120
|
+
typeOptions: { minValue: 10 },
|
|
121
|
+
default: DEFAULT_POLLING_TIMEOUT_SECONDS,
|
|
122
|
+
description: "Maximum number of seconds to wait for OCR to complete when Return Task ID Immediately is off. If the job hasn't finished by then, the node throws a timeout error that still includes the taskId so you can retrieve the result later with a separate HTTP Request.",
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
displayName: 'Polling Interval (Seconds)',
|
|
126
|
+
name: 'pollingIntervalSeconds',
|
|
127
|
+
type: 'number',
|
|
128
|
+
typeOptions: { minValue: 1 },
|
|
129
|
+
default: DEFAULT_POLLING_INTERVAL_SECONDS,
|
|
130
|
+
description: 'How many seconds to wait between polls when checking the OCR task status. Berget suggests ~2s, so values of 2–5 are reasonable. The server may override this with a Retry-After header.',
|
|
140
131
|
},
|
|
141
132
|
],
|
|
142
133
|
...showForOcr,
|
|
143
134
|
},
|
|
144
135
|
];
|
|
136
|
+
function sleep(ms) {
|
|
137
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
138
|
+
}
|
|
145
139
|
async function executeOcr(context, itemIndex) {
|
|
146
|
-
var _a, _b, _c, _d, _e;
|
|
140
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k;
|
|
147
141
|
const credentials = await context.getCredentials('bergetAiApi');
|
|
142
|
+
const apiKey = credentials.apiKey;
|
|
148
143
|
const documentType = context.getNodeParameter('ocrDocumentType', itemIndex);
|
|
149
|
-
const
|
|
144
|
+
const returnImmediately = context.getNodeParameter('ocrReturnTaskIdImmediately', itemIndex, false);
|
|
150
145
|
const options = context.getNodeParameter('ocrOptions', itemIndex, {});
|
|
151
146
|
let documentUrl;
|
|
152
147
|
if (documentType === 'url') {
|
|
@@ -156,22 +151,28 @@ async function executeOcr(context, itemIndex) {
|
|
|
156
151
|
const documentData = context.getNodeParameter('ocrDocumentData', itemIndex);
|
|
157
152
|
documentUrl = `data:application/pdf;base64,${documentData}`;
|
|
158
153
|
}
|
|
159
|
-
|
|
154
|
+
// Always submit async. Berget's sync /ocr endpoint returns HTTP 500
|
|
155
|
+
// OCR_SERVICE_ERROR on every request as of 2026-04; the async path is
|
|
156
|
+
// the only one that actually works. We wrap polling so the user sees a
|
|
157
|
+
// synchronous result by default.
|
|
158
|
+
const requestBody = {
|
|
160
159
|
document: { url: documentUrl, type: 'document' },
|
|
161
|
-
async:
|
|
160
|
+
async: true,
|
|
162
161
|
options: {
|
|
163
162
|
outputFormat: (_a = options.outputFormat) !== null && _a !== void 0 ? _a : 'md',
|
|
164
163
|
tableMode: (_b = options.tableMode) !== null && _b !== void 0 ? _b : 'accurate',
|
|
165
164
|
ocrMethod: (_c = options.ocrMethod) !== null && _c !== void 0 ? _c : 'easyocr',
|
|
166
|
-
doOcr: options.doOcr !== false,
|
|
167
|
-
doTableStructure: options.doTableStructure !== false,
|
|
168
165
|
includeImages: (_d = options.includeImages) !== null && _d !== void 0 ? _d : false,
|
|
169
|
-
inputFormat: (_e = options.inputFormat) !== null && _e !== void 0 ? _e : ['pdf'],
|
|
170
166
|
},
|
|
171
167
|
};
|
|
172
|
-
const
|
|
173
|
-
if (status
|
|
174
|
-
|
|
168
|
+
const submission = await (0, shared_1.bergetRequest)(apiKey, 'POST', '/ocr', requestBody);
|
|
169
|
+
if (submission.status !== 202 && submission.status !== 200) {
|
|
170
|
+
throw new n8n_workflow_1.NodeOperationError(context.getNode(), (0, shared_1.formatBergetError)('OCR submission', submission.status, submission.data), { itemIndex });
|
|
171
|
+
}
|
|
172
|
+
// If Berget ever starts honoring sync again, it'll return a full result at 200.
|
|
173
|
+
// Pass that through directly.
|
|
174
|
+
if (submission.status === 200) {
|
|
175
|
+
const d = submission.data;
|
|
175
176
|
return {
|
|
176
177
|
content: d.content,
|
|
177
178
|
usage: d.usage,
|
|
@@ -179,15 +180,54 @@ async function executeOcr(context, itemIndex) {
|
|
|
179
180
|
processing_mode: 'synchronous',
|
|
180
181
|
};
|
|
181
182
|
}
|
|
182
|
-
|
|
183
|
-
|
|
183
|
+
const submissionData = submission.data;
|
|
184
|
+
const taskId = submissionData.taskId;
|
|
185
|
+
const resultUrl = submissionData.resultUrl;
|
|
186
|
+
if (!taskId) {
|
|
187
|
+
throw new n8n_workflow_1.NodeOperationError(context.getNode(), 'Berget AI OCR submission accepted but returned no taskId', { itemIndex });
|
|
188
|
+
}
|
|
189
|
+
if (returnImmediately) {
|
|
184
190
|
return {
|
|
185
|
-
taskId
|
|
186
|
-
|
|
187
|
-
|
|
191
|
+
taskId,
|
|
192
|
+
resultUrl,
|
|
193
|
+
status: (_e = submissionData.status) !== null && _e !== void 0 ? _e : 'pending',
|
|
188
194
|
processing_mode: 'asynchronous',
|
|
189
|
-
message: 'Document processing started. Use the taskId to
|
|
195
|
+
message: 'Document processing started. Use the taskId with an HTTP Request node against resultUrl to retrieve the extracted content later.',
|
|
190
196
|
};
|
|
191
197
|
}
|
|
192
|
-
|
|
198
|
+
// Poll loop.
|
|
199
|
+
const timeoutSeconds = (_f = options.pollingTimeoutSeconds) !== null && _f !== void 0 ? _f : DEFAULT_POLLING_TIMEOUT_SECONDS;
|
|
200
|
+
const intervalSeconds = Math.max((_g = options.pollingIntervalSeconds) !== null && _g !== void 0 ? _g : DEFAULT_POLLING_INTERVAL_SECONDS, 1);
|
|
201
|
+
const deadline = Date.now() + timeoutSeconds * 1000;
|
|
202
|
+
while (Date.now() < deadline) {
|
|
203
|
+
const poll = await (0, shared_1.bergetRequest)(apiKey, 'GET', `/ocr/result/${encodeURIComponent(taskId)}`);
|
|
204
|
+
if (poll.status === 200) {
|
|
205
|
+
const d = poll.data;
|
|
206
|
+
return {
|
|
207
|
+
content: d.content,
|
|
208
|
+
usage: d.usage,
|
|
209
|
+
metadata: d.metadata,
|
|
210
|
+
taskId,
|
|
211
|
+
processing_mode: 'asynchronous',
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
if (poll.status === 202) {
|
|
215
|
+
// Berget has returned multiple response shapes on 202:
|
|
216
|
+
// { id, status: 'processing', retryAfter: 2000 }
|
|
217
|
+
// { error: { message: 'OCR job is still processing', type: 'OCR_JOB_PROCESSING', param: { status, retryAfter } } }
|
|
218
|
+
// If status is 'failed', surface that as an error instead of looping.
|
|
219
|
+
const d = poll.data;
|
|
220
|
+
const observedStatus = (_h = d.status) !== null && _h !== void 0 ? _h : (_k = (_j = d.error) === null || _j === void 0 ? void 0 : _j.param) === null || _k === void 0 ? void 0 : _k.status;
|
|
221
|
+
if (observedStatus === 'failed') {
|
|
222
|
+
throw new n8n_workflow_1.NodeOperationError(context.getNode(), (0, shared_1.formatBergetError)('OCR', 202, poll.data) + ` — taskId: ${taskId}`, { itemIndex });
|
|
223
|
+
}
|
|
224
|
+
await sleep(intervalSeconds * 1000);
|
|
225
|
+
continue;
|
|
226
|
+
}
|
|
227
|
+
if (poll.status === 404) {
|
|
228
|
+
throw new n8n_workflow_1.NodeOperationError(context.getNode(), `Berget AI OCR error: task ${taskId} not found (HTTP 404). The task may have been deleted or never existed.`, { itemIndex });
|
|
229
|
+
}
|
|
230
|
+
throw new n8n_workflow_1.NodeOperationError(context.getNode(), (0, shared_1.formatBergetError)('OCR polling', poll.status, poll.data) + ` — taskId: ${taskId}`, { itemIndex });
|
|
231
|
+
}
|
|
232
|
+
throw new n8n_workflow_1.NodeOperationError(context.getNode(), `Berget AI OCR polling timed out after ${timeoutSeconds}s. The job may still be running on Berget's side. You can retrieve the result later by doing GET /v1/ocr/result/${taskId} with your API key. To avoid this, increase the Polling Timeout option or enable 'Return Task ID Immediately'.`, { itemIndex });
|
|
193
233
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "n8n-nodes-berget-mk",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.4",
|
|
4
4
|
"description": "n8n community node for Berget AI. Multi-resource action node (chat, OCR, rerank, speech-to-text) plus Chat Model and Embeddings Model sub-nodes that plug into n8n's built-in AI Agent and Vector Store nodes.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"n8n-community-node-package",
|