extractia-sdk 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +163 -6
- package/dist/extractia-sdk.browser.js +538 -82
- package/dist/extractia-sdk.cjs.js +522 -87
- package/dist/extractia-sdk.esm.js +522 -87
- package/dist/index.d.ts +279 -22
- package/package.json +23 -4
- package/src/apiClient.js +192 -23
- package/src/browser-entry.js +35 -0
- package/src/errors.js +281 -40
- package/src/index.d.ts +279 -22
- package/src/index.js +16 -0
- package/src/ocrTools.js +48 -0
- package/src/subusers.js +1 -1
- package/src/utils.js +223 -0
- package/vitest.integration.config.js +27 -0
- package/dist/extractia-sdk.browser.js.map +0 -7
- package/dist/extractia-sdk.cjs.js.map +0 -7
- package/dist/extractia-sdk.esm.js.map +0 -7
- package/dist/extractia-sdk.js.map +0 -7
package/src/utils.js
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ExtractIA SDK — utility helpers
|
|
3
|
+
*
|
|
4
|
+
* All helpers are pure / side-effect-free and safe to import in both
|
|
5
|
+
* browser and Node.js environments (FileReader is guarded).
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// ─── Base64 helpers ───────────────────────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Converts a browser `File` (or `Blob`) to a base64 data-URL string.
|
|
12
|
+
*
|
|
13
|
+
* The returned string includes the MIME prefix, e.g.:
|
|
14
|
+
* `data:image/jpeg;base64,/9j/4AAQ…`
|
|
15
|
+
*
|
|
16
|
+
* @param {File|Blob} file
|
|
17
|
+
* @returns {Promise<string>} Base64 data-URL.
|
|
18
|
+
* @throws {Error} In non-browser environments where `FileReader` is unavailable.
|
|
19
|
+
*/
|
|
20
|
+
export function fileToBase64(file) {
|
|
21
|
+
if (typeof FileReader === "undefined") {
|
|
22
|
+
return Promise.reject(
|
|
23
|
+
new Error(
|
|
24
|
+
"fileToBase64: FileReader is not available in this environment. " +
|
|
25
|
+
"In Node.js, read the file manually and encode it with Buffer.from(data).toString('base64').",
|
|
26
|
+
),
|
|
27
|
+
);
|
|
28
|
+
}
|
|
29
|
+
return new Promise((resolve, reject) => {
|
|
30
|
+
const reader = new FileReader();
|
|
31
|
+
reader.onload = () => resolve(/** @type {string} */ (reader.result));
|
|
32
|
+
reader.onerror = () =>
|
|
33
|
+
reject(
|
|
34
|
+
new Error(
|
|
35
|
+
`fileToBase64: failed to read file "${file.name ?? "unknown"}".`,
|
|
36
|
+
),
|
|
37
|
+
);
|
|
38
|
+
reader.readAsDataURL(file);
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Strips the data-URL prefix from a base64 string.
|
|
44
|
+
* If the string is already a raw base64 value, it is returned unchanged.
|
|
45
|
+
*
|
|
46
|
+
* @param {string} base64OrDataUrl — `"data:image/jpeg;base64,ABC…"` or `"ABC…"`.
|
|
47
|
+
* @returns {string} Raw base64 characters without the `data:…;base64,` prefix.
|
|
48
|
+
* @throws {TypeError} If the argument is not a string.
|
|
49
|
+
*/
|
|
50
|
+
export function stripDataUrlPrefix(base64OrDataUrl) {
|
|
51
|
+
if (typeof base64OrDataUrl !== "string") {
|
|
52
|
+
throw new TypeError("stripDataUrlPrefix: argument must be a string.");
|
|
53
|
+
}
|
|
54
|
+
const idx = base64OrDataUrl.indexOf(";base64,");
|
|
55
|
+
return idx !== -1 ? base64OrDataUrl.slice(idx + 8) : base64OrDataUrl;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Returns the MIME type embedded in a data-URL prefix, or `null`.
|
|
60
|
+
*
|
|
61
|
+
* @param {string} dataUrl — e.g. `"data:image/png;base64,…"`.
|
|
62
|
+
* @returns {string|null}
|
|
63
|
+
*/
|
|
64
|
+
export function getMimeType(dataUrl) {
|
|
65
|
+
if (typeof dataUrl !== "string") return null;
|
|
66
|
+
const match = dataUrl.match(/^data:([^;]+);base64,/);
|
|
67
|
+
return match ? match[1] : null;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Returns `true` if the string is a valid base64-encoded value
|
|
72
|
+
* (with or without a data-URL prefix).
|
|
73
|
+
*
|
|
74
|
+
* @param {string} str
|
|
75
|
+
* @returns {boolean}
|
|
76
|
+
*/
|
|
77
|
+
export function isBase64(str) {
|
|
78
|
+
if (typeof str !== "string" || !str.trim()) return false;
|
|
79
|
+
const raw = stripDataUrlPrefix(str);
|
|
80
|
+
// Must be divisible by 4 and only contain valid base64 characters
|
|
81
|
+
return (
|
|
82
|
+
raw.length > 0 && raw.length % 4 === 0 && /^[A-Za-z0-9+/]*={0,2}$/.test(raw)
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Accepts either a browser `File` / `Blob` or a base64 string and
|
|
88
|
+
* always resolves to a base64 string (data-URL for files, unchanged for strings).
|
|
89
|
+
*
|
|
90
|
+
* Useful for SDK functions that accept both `File` inputs (browser) and
|
|
91
|
+
* pre-encoded base64 strings (Node.js / server-side).
|
|
92
|
+
*
|
|
93
|
+
* @param {File|Blob|string} fileOrBase64
|
|
94
|
+
* @returns {Promise<string>}
|
|
95
|
+
* @throws {TypeError} If the argument is neither a string nor a File/Blob.
|
|
96
|
+
*/
|
|
97
|
+
export async function ensureBase64(fileOrBase64) {
|
|
98
|
+
if (typeof fileOrBase64 === "string") return fileOrBase64;
|
|
99
|
+
if (
|
|
100
|
+
(typeof File !== "undefined" && fileOrBase64 instanceof File) ||
|
|
101
|
+
(typeof Blob !== "undefined" && fileOrBase64 instanceof Blob)
|
|
102
|
+
) {
|
|
103
|
+
return fileToBase64(fileOrBase64);
|
|
104
|
+
}
|
|
105
|
+
throw new TypeError(
|
|
106
|
+
"ensureBase64: argument must be a File, Blob, or a base64 string.",
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// ─── Pagination helpers ───────────────────────────────────────────────────────
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Async generator that wraps any paginated SDK function following the Spring
|
|
114
|
+
* `Page` contract (`{ content: T[], totalPages: number }`) and yields
|
|
115
|
+
* individual items across all pages.
|
|
116
|
+
*
|
|
117
|
+
* @template T
|
|
118
|
+
* @param {(opts: { page: number; size: number }) => Promise<{ content: T[]; totalPages: number }>} fn
|
|
119
|
+
* A paginated SDK function (e.g. `getCreditsHistory`, `getDocumentHistory`).
|
|
120
|
+
* @param {object} [opts]
|
|
121
|
+
* @param {number} [opts.size=50] — Items per request (max depends on endpoint).
|
|
122
|
+
* @param {number} [opts.startPage=0] — Zero-based page index to start from.
|
|
123
|
+
* @param {number} [opts.maxPages=1000] — Safety ceiling: stop after this many pages.
|
|
124
|
+
* @yields {T}
|
|
125
|
+
*
|
|
126
|
+
* @example
|
|
127
|
+
* for await (const entry of paginate(getCreditsHistory, { size: 100 })) {
|
|
128
|
+
* console.log(entry.creditsConsumed);
|
|
129
|
+
* }
|
|
130
|
+
*/
|
|
131
|
+
export async function* paginate(
|
|
132
|
+
fn,
|
|
133
|
+
{ size = 50, startPage = 0, maxPages = 1_000 } = {},
|
|
134
|
+
) {
|
|
135
|
+
let page = startPage;
|
|
136
|
+
let pagesRead = 0;
|
|
137
|
+
|
|
138
|
+
while (pagesRead < maxPages) {
|
|
139
|
+
const result = await fn({ page, size });
|
|
140
|
+
const items = Array.isArray(result?.content) ? result.content : [];
|
|
141
|
+
|
|
142
|
+
for (const item of items) yield item;
|
|
143
|
+
|
|
144
|
+
const totalPages = result?.totalPages ?? 1;
|
|
145
|
+
if (items.length === 0 || page + 1 >= totalPages) break;
|
|
146
|
+
|
|
147
|
+
page++;
|
|
148
|
+
pagesRead++;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Collects all pages of a paginated SDK function into a flat array.
|
|
154
|
+
*
|
|
155
|
+
* @template T
|
|
156
|
+
* @param {(opts: { page: number; size: number }) => Promise<{ content: T[]; totalPages: number }>} fn
|
|
157
|
+
* @param {Parameters<typeof paginate>[1]} [opts]
|
|
158
|
+
* @returns {Promise<T[]>}
|
|
159
|
+
*/
|
|
160
|
+
export async function paginateAll(fn, opts) {
|
|
161
|
+
const items = [];
|
|
162
|
+
for await (const item of paginate(fn, opts)) items.push(item);
|
|
163
|
+
return items;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// ─── Async helpers ────────────────────────────────────────────────────────────
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Returns a Promise that resolves after `ms` milliseconds.
|
|
170
|
+
*
|
|
171
|
+
* @param {number} ms
|
|
172
|
+
* @returns {Promise<void>}
|
|
173
|
+
*/
|
|
174
|
+
export function delay(ms) {
|
|
175
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Calls an async function and retries it with exponential back-off when it
|
|
180
|
+
* throws a retryable `ExtractiaError` (i.e. `error.isRetryable()` returns `true`).
|
|
181
|
+
*
|
|
182
|
+
* This is useful when you have disabled the SDK's built-in automatic retry
|
|
183
|
+
* (`configure({ retries: 0 })`) and want finer control per call.
|
|
184
|
+
*
|
|
185
|
+
* @template T
|
|
186
|
+
* @param {() => Promise<T>} fn — Async function to call.
|
|
187
|
+
* @param {object} [opts]
|
|
188
|
+
* @param {number} [opts.retries=3] — Max additional attempts after the first failure.
|
|
189
|
+
* @param {number} [opts.initialDelay=500] — Initial back-off delay (ms); doubles each attempt.
|
|
190
|
+
* @param {(err: Error) => boolean} [opts.shouldRetry] — Custom predicate; defaults to `err.isRetryable()`.
|
|
191
|
+
* @returns {Promise<T>}
|
|
192
|
+
*
|
|
193
|
+
* @example
|
|
194
|
+
* const doc = await withRetry(() => processImage(templateId, base64), { retries: 3 });
|
|
195
|
+
*/
|
|
196
|
+
export async function withRetry(
|
|
197
|
+
fn,
|
|
198
|
+
{ retries = 3, initialDelay = 500, shouldRetry } = {},
|
|
199
|
+
) {
|
|
200
|
+
const isRetryable =
|
|
201
|
+
shouldRetry ??
|
|
202
|
+
((err) => typeof err.isRetryable === "function" && err.isRetryable());
|
|
203
|
+
|
|
204
|
+
let lastErr;
|
|
205
|
+
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
206
|
+
try {
|
|
207
|
+
return await fn();
|
|
208
|
+
} catch (err) {
|
|
209
|
+
lastErr = err;
|
|
210
|
+
if (attempt < retries && isRetryable(err)) {
|
|
211
|
+
// Honour Retry-After for rate-limit errors
|
|
212
|
+
const wait =
|
|
213
|
+
err.retryAfter != null
|
|
214
|
+
? err.retryAfter * 1_000
|
|
215
|
+
: initialDelay * 2 ** attempt;
|
|
216
|
+
await delay(wait);
|
|
217
|
+
continue;
|
|
218
|
+
}
|
|
219
|
+
throw err;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
throw lastErr;
|
|
223
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { defineConfig } from "vitest/config";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Vitest config for integration tests against the real ExtractIA API.
|
|
5
|
+
*
|
|
6
|
+
* Required env vars:
|
|
7
|
+
* EXTRACTIA_API_TOKEN — valid bearer token (all integration tests)
|
|
8
|
+
* EXTRACTIA_ALLOW_WRITE — "true" to enable create/update/delete tests
|
|
9
|
+
* EXTRACTIA_ALLOW_CREDITS — "true" to enable tests that consume doc/AI credits
|
|
10
|
+
*
|
|
11
|
+
* Run:
|
|
12
|
+
* EXTRACTIA_API_TOKEN=sk_xxx EXTRACTIA_ALLOW_WRITE=true EXTRACTIA_ALLOW_CREDITS=true \
|
|
13
|
+
* npm run test:integration
|
|
14
|
+
*/
|
|
15
|
+
export default defineConfig({
|
|
16
|
+
test: {
|
|
17
|
+
globals: true,
|
|
18
|
+
environment: "node",
|
|
19
|
+
include: ["tests/integration/**/*.integration.test.js"],
|
|
20
|
+
// Real HTTP calls — 30 s per test, 30 s for setup/teardown hooks
|
|
21
|
+
testTimeout: 30_000,
|
|
22
|
+
hookTimeout: 30_000,
|
|
23
|
+
// Sequential: avoid hammering the API and triggering rate-limits
|
|
24
|
+
sequence: { concurrent: false },
|
|
25
|
+
reporters: ["verbose"],
|
|
26
|
+
},
|
|
27
|
+
});
|