@arkyc/ocr 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/drivers/ai.d.mts +120 -0
  2. package/dist/drivers/ai.d.mts.map +1 -0
  3. package/dist/drivers/ai.mjs +454 -0
  4. package/dist/drivers/ai.mjs.map +1 -0
  5. package/dist/drivers/external.d.mts +17 -0
  6. package/dist/drivers/external.d.mts.map +1 -0
  7. package/dist/drivers/external.mjs +34 -0
  8. package/dist/drivers/external.mjs.map +1 -0
  9. package/dist/drivers/mock.d.mts +16 -0
  10. package/dist/drivers/mock.d.mts.map +1 -0
  11. package/dist/drivers/mock.mjs +34 -0
  12. package/dist/drivers/mock.mjs.map +1 -0
  13. package/dist/drivers/preprocess.d.mts +51 -0
  14. package/dist/drivers/preprocess.d.mts.map +1 -0
  15. package/dist/drivers/preprocess.mjs +50 -0
  16. package/dist/drivers/preprocess.mjs.map +1 -0
  17. package/dist/drivers/tesseract.d.mts +75 -0
  18. package/dist/drivers/tesseract.d.mts.map +1 -0
  19. package/dist/drivers/tesseract.mjs +175 -0
  20. package/dist/drivers/tesseract.mjs.map +1 -0
  21. package/dist/index.d.mts +12 -0
  22. package/dist/index.mjs +10 -0
  23. package/dist/parsers/generic.d.mts +8 -0
  24. package/dist/parsers/generic.d.mts.map +1 -0
  25. package/dist/parsers/generic.mjs +84 -0
  26. package/dist/parsers/generic.mjs.map +1 -0
  27. package/dist/parsers/mrz.d.mts +8 -0
  28. package/dist/parsers/mrz.d.mts.map +1 -0
  29. package/dist/parsers/mrz.mjs +149 -0
  30. package/dist/parsers/mrz.mjs.map +1 -0
  31. package/dist/parsers/registry.d.mts +49 -0
  32. package/dist/parsers/registry.d.mts.map +1 -0
  33. package/dist/parsers/registry.mjs +100 -0
  34. package/dist/parsers/registry.mjs.map +1 -0
  35. package/dist/parsers/types.d.mts +43 -0
  36. package/dist/parsers/types.d.mts.map +1 -0
  37. package/dist/registry.d.mts +20 -0
  38. package/dist/registry.d.mts.map +1 -0
  39. package/dist/registry.mjs +36 -0
  40. package/dist/registry.mjs.map +1 -0
  41. package/dist/types.d.mts +48 -0
  42. package/dist/types.d.mts.map +1 -0
  43. package/package.json +32 -0
@@ -0,0 +1,120 @@
1
+ import { OcrDriver, OcrRequest } from "../types.mjs";
2
+ import { DocumentType, OcrAuthenticity, OcrFields, OcrResultData } from "@arkyc/types";
3
+
4
+ //#region src/drivers/ai.d.ts
5
+ /**
6
+ * A small, cheap vision model is plenty for reading printed/MRZ document text.
7
+ */
8
+ declare const DEFAULT_AI_MODEL = "claude-haiku-4-5-20251001";
9
+ /**
10
+ * A base64 image ready for the messages API, with its detected media type.
11
+ */
12
+ interface PreparedImage {
13
+ mediaType: 'image/jpeg' | 'image/png' | 'image/webp';
14
+ data: string;
15
+ }
16
+ /**
17
+ * Raw fields the vision model returned, before validation/scoring.
18
+ */
19
+ interface AiExtraction {
20
+ fields: OcrFields;
21
+ /** The model's own legibility self-assessment in [0, 1], if it gave one. */
22
+ legibility?: number;
23
+ documentType?: DocumentType | null;
24
+ /** Best-effort tamper/replay read; omitted when the model reported nothing. */
25
+ authenticity?: OcrAuthenticity;
26
+ }
27
+ /**
28
+ * Calls a vision model and returns the raw extracted fields. Injected so the
29
+ * driver's scoring can be unit-tested without a network call; the default
30
+ * implementation talks to the Anthropic messages API.
31
+ */
32
+ type AiVisionExtract = (request: OcrRequest, images: PreparedImage[]) => Promise<AiExtraction>;
33
+ interface AnthropicOcrOptions {
34
+ apiKey?: string;
35
+ /** Model id; defaults to {@link DEFAULT_AI_MODEL}. */
36
+ model?: string;
37
+ /** API base URL; defaults to {@link DEFAULT_BASE_URL}. */
38
+ baseUrl?: string;
39
+ /** Longest image edge (px) to upload; defaults to {@link DEFAULT_MAX_EDGE}. */
40
+ maxEdge?: number;
41
+ /** Per-attempt request timeout (ms); defaults to {@link DEFAULT_TIMEOUT_MS}. */
42
+ timeoutMs?: number;
43
+ /** Bounded retries on 429/529; defaults to {@link DEFAULT_MAX_RETRIES}. */
44
+ maxRetries?: number;
45
+ /** Override the vision call (tests). When set, `apiKey` is not required. */
46
+ extract?: AiVisionExtract;
47
+ }
48
+ /**
49
+ * AI OCR driver: hands the document image(s) to a vision LLM (Claude by default)
50
+ * and maps the structured response onto {@link OcrResultData}.
51
+ *
52
+ * Confidence is **not** taken from the model's self-report — LLMs are poorly
53
+ * calibrated and will state high confidence on hallucinated fields. Instead it's
54
+ * derived deterministically from which fields came back and whether they're
55
+ * structurally valid (see {@link scoreConfidence}), so the value feeding the
56
+ * decision engine's `ocrConfidenceThreshold` is meaningful. The model's own
57
+ * legibility read is folded in only as a small soft penalty.
58
+ *
59
+ * It also asks the model for a best-effort, image-only authenticity read
60
+ * (screen-replay, photocopy, digital/physical tampering). That signal is
61
+ * advisory: a fired flag only *caps* the OCR confidence (see
62
+ * {@link applyAuthenticity}) so a suspicious document routes to manual review —
63
+ * an LLM's authenticity guess never auto-rejects a user on its own.
64
+ */
65
+ declare class AnthropicOcrDriver implements OcrDriver {
66
+ readonly name = "ai";
67
+ private readonly model;
68
+ private readonly baseUrl;
69
+ private readonly maxEdge;
70
+ private readonly vision;
71
+ constructor(options?: AnthropicOcrOptions);
72
+ extract(request: OcrRequest): Promise<OcrResultData>;
73
+ /**
74
+ * Resize (best-effort) and base64-encode the front + optional back images.
75
+ *
76
+ * @param input
77
+ * @returns
78
+ */
79
+ private prepareImages;
80
+ }
81
+ /**
82
+ * Deterministic confidence in [0, 1] from field completeness + structural
83
+ * validity. Weights sum to 1.0 when every field is present and well-formed; the
84
+ * model's self-reported legibility can only pull the score down by up to 15%, so
85
+ * trustworthy structure dominates an unreliable self-assessment.
86
+ *
87
+ * @param fields
88
+ * @param legibility
89
+ * @returns
90
+ */
91
+ declare function scoreConfidence(fields: OcrFields, legibility?: number): number;
92
+ /**
93
+ * Fold the authenticity read into the field-derived confidence. Only ever lowers
94
+ * the score, and only when a concrete flag fired — a genuine (or unassessed)
95
+ * document is returned untouched. A flagged document is capped at both the
96
+ * model's own authenticity confidence and {@link SUSPECT_CONFIDENCE_CEILING},
97
+ * which lands it in manual review rather than auto-approval. This is deliberately
98
+ * conservative: the LLM's authenticity guess can demote, never reject outright.
99
+ *
100
+ * @param fieldScore
101
+ * @param authenticity
102
+ * @returns
103
+ */
104
+ declare function applyAuthenticity(fieldScore: number, authenticity?: OcrAuthenticity): number;
105
+ /**
106
+ * Build the default vision call against the Anthropic messages API.
107
+ *
108
+ * @param input
109
+ * @returns
110
+ */
111
+ declare function anthropicVision(opts: {
112
+ apiKey: string;
113
+ model: string;
114
+ baseUrl: string;
115
+ timeoutMs?: number;
116
+ maxRetries?: number;
117
+ }): AiVisionExtract;
118
+ //#endregion
119
+ export { AiExtraction, AiVisionExtract, AnthropicOcrDriver, AnthropicOcrOptions, DEFAULT_AI_MODEL, anthropicVision, applyAuthenticity, scoreConfidence };
120
+ //# sourceMappingURL=ai.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai.d.mts","names":[],"sources":["../../src/drivers/ai.ts"],"mappings":";;;;;;AAaA;cAAa,gBAAA;;;AAAgB;UAsBnB,aAAA;EACR,SAAA;EACA,IAAI;AAAA;AAAA;AAMN;;AANM,UAMW,YAAA;EACf,MAAA,EAAQ,SAAA;EAGO;EADf,UAAA;EACA,YAAA,GAAe,YAAA;EAEe;EAA9B,YAAA,GAAe,eAAA;AAAA;;;;;;KAQL,eAAA,IAAmB,OAAA,EAAS,UAAA,EAAY,MAAA,EAAQ,aAAA,OAAoB,OAAA,CAAQ,YAAA;AAAA,UAEvE,mBAAA;EACf,MAAA;EAHyB;EAKzB,KAAA;EALsC;EAOtC,OAAA;EAPsF;EAStF,OAAA;EATqF;EAWrF,SAAA;EAXsC;EAatC,UAAA;EAb0D;EAe1D,OAAA,GAAU,eAAe;AAAA;;;AAfyE;AAEpG;;;;;;;;;;;;;AAa2B;cAoBd,kBAAA,YAA8B,SAAA;EAAA,SAChC,IAAA;EAAA,iBAEQ,KAAA;EAAA,iBACA,OAAA;EAAA,iBACA,OAAA;EAAA,iBACA,MAAA;cAEL,OAAA,GAAS,mBAAA;EAkBf,OAAA,CAAQ,OAAA,EAAS,UAAA,GAAa,OAAA,CAAQ,aAAA;EA1BM;;;;;;EAAA,QAqDpC,aAAA;AAAA;;;;;;;;;;;iBAwCA,eAAA,CAAgB,MAAA,EAAQ,SAAS,EAAE,UAAA;AAAnD;;;;;;;;AAAsE;AAiCtE;;;AAjCA,iBAiCgB,iBAAA,CAAkB,UAAA,UAAoB,YAAA,GAAe,eAAe;;;;;AAAA;AAkKpF;iBAAgB,eAAA,CAAgB,IAAA;EAC9B,MAAA;EACA,KAAA;EACA,OAAA;EACA,SAAA;EACA,UAAA;AAAA,IACE,eAAe"}
@@ -0,0 +1,454 @@
1
+ //#region src/drivers/ai.ts
2
+ const clamp01 = (n) => Math.min(1, Math.max(0, n));
3
+ /**
4
+ * Anthropic's default API base; override for a gateway/proxy.
5
+ */
6
+ const DEFAULT_BASE_URL = "https://api.anthropic.com";
7
+ /**
8
+ * A small, cheap vision model is plenty for reading printed/MRZ document text.
9
+ */
10
+ const DEFAULT_AI_MODEL = "claude-haiku-4-5-20251001";
11
+ /**
12
+ * Longest edge (px) we upload. The API downsamples anything larger anyway and
13
+ * bills by the resampled size, so this is a cost/bandwidth bound, not a quality
14
+ * one — small enough to be cheap, large enough to keep the MRZ legible.
15
+ */
16
+ const DEFAULT_MAX_EDGE = 1568;
17
+ /**
18
+ * Per-attempt request timeout (ms) — a hung connection must not stall a worker.
19
+ */
20
+ const DEFAULT_TIMEOUT_MS = 3e4;
21
+ /**
22
+ * Bounded in-driver retries on rate-limit/overload; the job queue retries beyond.
23
+ */
24
+ const DEFAULT_MAX_RETRIES = 2;
25
+ /**
26
+ * AI OCR driver: hands the document image(s) to a vision LLM (Claude by default)
27
+ * and maps the structured response onto {@link OcrResultData}.
28
+ *
29
+ * Confidence is **not** taken from the model's self-report — LLMs are poorly
30
+ * calibrated and will state high confidence on hallucinated fields. Instead it's
31
+ * derived deterministically from which fields came back and whether they're
32
+ * structurally valid (see {@link scoreConfidence}), so the value feeding the
33
+ * decision engine's `ocrConfidenceThreshold` is meaningful. The model's own
34
+ * legibility read is folded in only as a small soft penalty.
35
+ *
36
+ * It also asks the model for a best-effort, image-only authenticity read
37
+ * (screen-replay, photocopy, digital/physical tampering). That signal is
38
+ * advisory: a fired flag only *caps* the OCR confidence (see
39
+ * {@link applyAuthenticity}) so a suspicious document routes to manual review —
40
+ * an LLM's authenticity guess never auto-rejects a user on its own.
41
+ */
42
+ var AnthropicOcrDriver = class {
43
+ name = "ai";
44
+ model;
45
+ baseUrl;
46
+ maxEdge;
47
+ vision;
48
+ constructor(options = {}) {
49
+ if (!options.apiKey && !options.extract) throw new Error("AnthropicOcrDriver requires config.apiKey");
50
+ this.model = options.model ?? "claude-haiku-4-5-20251001";
51
+ this.baseUrl = (options.baseUrl ?? DEFAULT_BASE_URL).replace(/\/$/, "");
52
+ this.maxEdge = options.maxEdge ?? DEFAULT_MAX_EDGE;
53
+ this.vision = options.extract ?? anthropicVision({
54
+ apiKey: options.apiKey,
55
+ model: this.model,
56
+ baseUrl: this.baseUrl,
57
+ timeoutMs: options.timeoutMs ?? DEFAULT_TIMEOUT_MS,
58
+ maxRetries: options.maxRetries ?? DEFAULT_MAX_RETRIES
59
+ });
60
+ }
61
+ async extract(request) {
62
+ const images = await this.prepareImages(request);
63
+ const raw = await this.vision(request, images);
64
+ const fields = normalizeFields(raw.fields);
65
+ const authenticity = raw.authenticity;
66
+ return {
67
+ fields,
68
+ confidence: applyAuthenticity(scoreConfidence(fields, raw.legibility), authenticity),
69
+ ...authenticity ? { authenticity } : {},
70
+ raw: {
71
+ provider: "ai",
72
+ model: this.model,
73
+ legibility: raw.legibility ?? null,
74
+ documentType: raw.documentType ?? request.documentType ?? null,
75
+ authenticity: authenticity ?? null
76
+ }
77
+ };
78
+ }
79
+ /**
80
+ * Resize (best-effort) and base64-encode the front + optional back images.
81
+ *
82
+ * @param input
83
+ * @returns
84
+ */
85
+ async prepareImages(request) {
86
+ const sources = [request.image, request.backImage].filter((b) => !!b && b.length > 0);
87
+ return Promise.all(sources.map((bytes) => prepareImage(bytes, this.maxEdge)));
88
+ }
89
+ };
90
+ const present = (v) => typeof v === "string" && v.trim().length > 0;
91
+ const isIsoDate = (v) => present(v) && /^\d{4}-\d{2}-\d{2}$/.test(v) && !Number.isNaN(Date.parse(v));
92
+ /**
93
+ * Trim/blank-out the model's fields; drop anything empty so callers see `undefined`.
94
+ *
95
+ * @param input
96
+ * @returns
97
+ */
98
+ function normalizeFields(fields) {
99
+ const out = {};
100
+ const set = (key, value) => {
101
+ if (present(value)) out[key] = value.trim();
102
+ };
103
+ set("firstName", fields.firstName);
104
+ set("lastName", fields.lastName);
105
+ set("fullName", fields.fullName);
106
+ set("dateOfBirth", fields.dateOfBirth);
107
+ set("documentNumber", fields.documentNumber);
108
+ set("expiryDate", fields.expiryDate);
109
+ set("nationality", fields.nationality);
110
+ return out;
111
+ }
112
+ /**
113
+ * Deterministic confidence in [0, 1] from field completeness + structural
114
+ * validity. Weights sum to 1.0 when every field is present and well-formed; the
115
+ * model's self-reported legibility can only pull the score down by up to 15%, so
116
+ * trustworthy structure dominates an unreliable self-assessment.
117
+ *
118
+ * @param fields
119
+ * @param legibility
120
+ * @returns
121
+ */
122
+ function scoreConfidence(fields, legibility) {
123
+ return clamp01([
124
+ [present(fields.firstName) && present(fields.lastName) || present(fields.fullName), .28],
125
+ [isIsoDate(fields.dateOfBirth), .22],
126
+ [present(fields.documentNumber) && fields.documentNumber.trim().length >= 4, .22],
127
+ [isIsoDate(fields.expiryDate), .16],
128
+ [present(fields.nationality), .12]
129
+ ].reduce((sum, [ok, weight]) => ok ? sum + weight : sum, 0) * (.85 + .15 * clamp01(legibility ?? 1)));
130
+ }
131
+ /**
132
+ * Hard ceiling on OCR confidence once any authenticity flag fires. Below the
133
+ * default `ocrConfidenceThreshold` (0.8), so a flagged document routes to manual
134
+ * review regardless of how cleanly its fields read.
135
+ */
136
+ const SUSPECT_CONFIDENCE_CEILING = .5;
137
+ /**
138
+ * Fold the authenticity read into the field-derived confidence. Only ever lowers
139
+ * the score, and only when a concrete flag fired — a genuine (or unassessed)
140
+ * document is returned untouched. A flagged document is capped at both the
141
+ * model's own authenticity confidence and {@link SUSPECT_CONFIDENCE_CEILING},
142
+ * which lands it in manual review rather than auto-approval. This is deliberately
143
+ * conservative: the LLM's authenticity guess can demote, never reject outright.
144
+ *
145
+ * @param fieldScore
146
+ * @param authenticity
147
+ * @returns
148
+ */
149
+ function applyAuthenticity(fieldScore, authenticity) {
150
+ if (!authenticity || authenticity.genuine) return fieldScore;
151
+ return Math.min(fieldScore, clamp01(authenticity.confidence), SUSPECT_CONFIDENCE_CEILING);
152
+ }
153
+ const SYSTEM_PROMPT = "You are an OCR engine for identity documents. Read the visible printed text and the machine-readable zone (MRZ) if present, and return the fields exactly as printed. Do not guess, infer, or correct values you cannot clearly read — omit a field rather than fabricate it. Dates must be ISO 8601 (YYYY-MM-DD).\n\nIdentity documents print BOTH a date of issue and a date of expiry — never confuse them. `expiryDate` is only the expiry/expiration/\"valid until\"/\"date of expiry\" value (in an MRZ, the second date field, not the first). `issueDate` is the \"date of issue\"/\"issued\"/\"valid from\" value. The expiry is later than the issue date. If a document shows only an issue date and no expiry, return the issue date in `issueDate` and leave `expiryDate` empty — do not put the issue date in `expiryDate`.\n\nALSO assess document authenticity, to the best of your ability, from the image alone — and report it. Be conservative: raise a flag only on a clear, visible sign. A genuine document that was merely photographed imperfectly (blur, glare on a glossy original, a shadow, a crop) must NOT be flagged. Set `screenReplay` when the image looks like a photo of a screen — moiré or a visible pixel grid, backlit glow, refresh banding, or a device bezel (a replay attack). Set `photocopy` when it looks like a photo or scan of a printout/photocopy rather than the physical document — flat matte paper, halftone dot printing, no security features or hologram. Set `digitalTampering` for signs of digital editing — mismatched fonts or kerning, misaligned or recoloured text, smudged or cloned regions, a pasted-in portrait, or altered dates. Set `physicalTampering` for a substituted photo, peeled or lifted laminate, or scratched-out / overwritten fields. Give `authenticityConfidence` from 0 (clearly fake or replayed) to 1 (clearly an authentic original), and list brief supporting notes in `authenticityObservations`. You read and assess the document; you do not make the final accept/reject decision.";
154
+ const READ_TOOL = {
155
+ name: "read_document",
156
+ description: "Return the identity fields read from the document image(s).",
157
+ input_schema: {
158
+ type: "object",
159
+ properties: {
160
+ firstName: {
161
+ type: "string",
162
+ description: "Given name(s) as printed."
163
+ },
164
+ lastName: {
165
+ type: "string",
166
+ description: "Surname / family name as printed."
167
+ },
168
+ fullName: {
169
+ type: "string",
170
+ description: "Full name if given/surname cannot be separated."
171
+ },
172
+ dateOfBirth: {
173
+ type: "string",
174
+ description: "Date of birth, ISO 8601 YYYY-MM-DD."
175
+ },
176
+ documentNumber: {
177
+ type: "string",
178
+ description: "Document/serial number as printed."
179
+ },
180
+ issueDate: {
181
+ type: "string",
182
+ description: "Date of issue (\"issued\"/\"valid from\"/\"issue date\"), ISO 8601 YYYY-MM-DD. NOT the expiry."
183
+ },
184
+ expiryDate: {
185
+ type: "string",
186
+ description: "Expiry date (\"date of expiry\"/\"valid until\"/\"expiry date\"), ISO 8601 YYYY-MM-DD. Must be the expiry, never the issue date; omit if the document shows no expiry."
187
+ },
188
+ nationality: {
189
+ type: "string",
190
+ description: "ISO 3166-1 alpha-3 country code if determinable."
191
+ },
192
+ documentType: {
193
+ type: "string",
194
+ enum: [
195
+ "passport",
196
+ "id_card",
197
+ "drivers_license",
198
+ "residence_permit"
199
+ ]
200
+ },
201
+ legibility: {
202
+ type: "number",
203
+ description: "How clearly the document text could be read, 0 (illegible) to 1 (crisp)."
204
+ },
205
+ screenReplay: {
206
+ type: "boolean",
207
+ description: "Looks like a photo of a screen (moiré, pixel grid, backlit glow, refresh banding, bezel)."
208
+ },
209
+ photocopy: {
210
+ type: "boolean",
211
+ description: "Looks like a photo/scan of a printout or photocopy rather than the physical document."
212
+ },
213
+ digitalTampering: {
214
+ type: "boolean",
215
+ description: "Signs of digital editing: mismatched fonts, misaligned/recoloured text, cloned regions, edited photo/dates."
216
+ },
217
+ physicalTampering: {
218
+ type: "boolean",
219
+ description: "Signs of physical tampering: substituted photo, peeled laminate, scratched-out/overwritten fields."
220
+ },
221
+ authenticityConfidence: {
222
+ type: "number",
223
+ description: "Authenticity confidence, 0 (clearly fake/replayed) to 1 (clearly an authentic original)."
224
+ },
225
+ authenticityObservations: {
226
+ type: "array",
227
+ items: { type: "string" },
228
+ description: "Brief notes supporting any authenticity flag raised."
229
+ }
230
+ }
231
+ }
232
+ };
233
+ const DOCUMENT_TYPES = [
234
+ "passport",
235
+ "id_card",
236
+ "drivers_license",
237
+ "residence_permit"
238
+ ];
239
+ /**
240
+ * HTTP statuses worth a bounded retry: rate-limited / overloaded.
241
+ */
242
+ const RETRYABLE_STATUS = /* @__PURE__ */ new Set([429, 529]);
243
+ const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
244
+ /**
245
+ * Exponential backoff (ms) for attempt 0, 1, 2…, capped at 8s.
246
+ */
247
+ const backoffMs = (attempt) => Math.min(8e3, 500 * 2 ** attempt);
248
+ /**
249
+ * Parse a `retry-after` header (seconds) into ms; null when absent/invalid.
250
+ */
251
+ function retryAfterMs(res) {
252
+ const header = res.headers.get("retry-after");
253
+ if (!header) return null;
254
+ const seconds = Number(header);
255
+ return Number.isFinite(seconds) && seconds >= 0 ? seconds * 1e3 : null;
256
+ }
257
+ /**
258
+ * POST with a per-attempt timeout and a bounded retry on 429/529 — honouring
259
+ * `retry-after`, else exponential backoff. Timeouts/network errors retry too;
260
+ * any other response is returned as-is for the caller to handle. The durable job
261
+ * queue is the longer-term retry beyond {@link DEFAULT_MAX_RETRIES}.
262
+ *
263
+ * @param url
264
+ * @param init
265
+ * @param timeoutMs
266
+ * @param maxRetries
267
+ * @returns
268
+ */
269
+ async function postWithRetry(url, init, timeoutMs, maxRetries) {
270
+ for (let attempt = 0;; attempt++) {
271
+ let res;
272
+ try {
273
+ res = await fetch(url, {
274
+ ...init,
275
+ signal: AbortSignal.timeout(timeoutMs)
276
+ });
277
+ } catch (error) {
278
+ if (attempt >= maxRetries) throw error;
279
+ await sleep(backoffMs(attempt));
280
+ continue;
281
+ }
282
+ if (RETRYABLE_STATUS.has(res.status) && attempt < maxRetries) {
283
+ await sleep(retryAfterMs(res) ?? backoffMs(attempt));
284
+ continue;
285
+ }
286
+ return res;
287
+ }
288
+ }
289
+ /**
290
+ * Build the default vision call against the Anthropic messages API.
291
+ *
292
+ * @param input
293
+ * @returns
294
+ */
295
+ function anthropicVision(opts) {
296
+ return async (request, images) => {
297
+ const hint = request.documentType ? ` The document is a ${request.documentType}.` : "";
298
+ const country = request.country ? ` Issuing country hint: ${request.country}.` : "";
299
+ const res = await postWithRetry(`${opts.baseUrl}/v1/messages`, {
300
+ method: "POST",
301
+ headers: {
302
+ "x-api-key": opts.apiKey,
303
+ "anthropic-version": "2023-06-01",
304
+ "content-type": "application/json"
305
+ },
306
+ body: JSON.stringify({
307
+ model: opts.model,
308
+ max_tokens: 1024,
309
+ system: SYSTEM_PROMPT,
310
+ tools: [READ_TOOL],
311
+ tool_choice: {
312
+ type: "tool",
313
+ name: "read_document"
314
+ },
315
+ messages: [{
316
+ role: "user",
317
+ content: [...images.map((img) => ({
318
+ type: "image",
319
+ source: {
320
+ type: "base64",
321
+ media_type: img.mediaType,
322
+ data: img.data
323
+ }
324
+ })), {
325
+ type: "text",
326
+ text: `Read this identity document and return its fields.${hint}${country}`
327
+ }]
328
+ }]
329
+ })
330
+ }, opts.timeoutMs ?? DEFAULT_TIMEOUT_MS, opts.maxRetries ?? DEFAULT_MAX_RETRIES);
331
+ if (!res.ok) throw new Error(`AnthropicOcrDriver request failed with status ${res.status}`);
332
+ const tool = (await res.json()).content?.find((block) => block.type === "tool_use" && block.name === "read_document");
333
+ if (!tool?.input) throw new Error("AnthropicOcrDriver: model returned no structured fields");
334
+ return mapToolInput(tool.input);
335
+ };
336
+ }
337
+ /**
338
+ * Coerce the model's tool input into an {@link AiExtraction} (string-typed fields).
339
+ *
340
+ * @param input
341
+ * @returns
342
+ */
343
+ function mapToolInput(input) {
344
+ const str = (v) => typeof v === "string" && v.trim() ? v.trim() : void 0;
345
+ const docType = str(input.documentType);
346
+ return {
347
+ fields: {
348
+ firstName: str(input.firstName),
349
+ lastName: str(input.lastName),
350
+ fullName: str(input.fullName),
351
+ dateOfBirth: str(input.dateOfBirth),
352
+ documentNumber: str(input.documentNumber),
353
+ expiryDate: str(input.expiryDate),
354
+ nationality: str(input.nationality)
355
+ },
356
+ legibility: typeof input.legibility === "number" ? input.legibility : void 0,
357
+ documentType: docType && DOCUMENT_TYPES.includes(docType) ? docType : null,
358
+ authenticity: mapAuthenticity(input)
359
+ };
360
+ }
361
+ /**
362
+ * Build the {@link OcrAuthenticity} read from the model's tool input. Returns
363
+ * `undefined` when the model reported nothing assessable (no flags, no
364
+ * confidence, no notes) so a driver that can't see authenticity stays silent.
365
+ *
366
+ * @param input
367
+ * @returns
368
+ */
369
+ function mapAuthenticity(input) {
370
+ const bool = (v) => v === true;
371
+ const screenReplay = bool(input.screenReplay);
372
+ const photocopy = bool(input.photocopy);
373
+ const digitalTampering = bool(input.digitalTampering);
374
+ const physicalTampering = bool(input.physicalTampering);
375
+ const flagged = screenReplay || photocopy || digitalTampering || physicalTampering;
376
+ const observations = Array.isArray(input.authenticityObservations) ? input.authenticityObservations.filter((o) => typeof o === "string" && o.trim().length > 0).map((o) => o.trim()).slice(0, 6) : [];
377
+ const reported = typeof input.authenticityConfidence === "number";
378
+ if (!flagged && !reported && observations.length === 0) return void 0;
379
+ const confidence = clamp01(reported ? input.authenticityConfidence : flagged ? .2 : 1);
380
+ return {
381
+ genuine: !flagged,
382
+ confidence,
383
+ screenReplay,
384
+ photocopy,
385
+ digitalTampering,
386
+ physicalTampering,
387
+ observations
388
+ };
389
+ }
390
+ /**
391
+ * Detect the media type from magic bytes; default to JPEG.
392
+ *
393
+ * @param bytes
394
+ * @returns
395
+ */
396
+ function detectMediaType(bytes) {
397
+ if (bytes[0] === 255 && bytes[1] === 216) return "image/jpeg";
398
+ if (bytes[0] === 137 && bytes[1] === 80) return "image/png";
399
+ if (bytes[0] === 82 && bytes[1] === 73 && bytes[8] === 87) return "image/webp";
400
+ return "image/jpeg";
401
+ }
402
+ let sharpFactory;
403
+ /** Lazily resolve the optional `sharp` factory once; null when not installed. */
404
+ async function loadSharp() {
405
+ if (sharpFactory !== void 0) return sharpFactory;
406
+ const moduleName = "sharp";
407
+ try {
408
+ sharpFactory = (await import(
409
+ /* @vite-ignore */
410
+ moduleName
411
+ )).default;
412
+ } catch {
413
+ sharpFactory = null;
414
+ }
415
+ return sharpFactory;
416
+ }
417
+ /**
418
+ * Downscale an image to `maxEdge` and re-encode as JPEG to bound the upload (and
419
+ * therefore the model's image-token cost). Falls back to the original bytes when
420
+ * `sharp` isn't installed or fails — the API caps oversized images itself.
421
+ *
422
+ * @param url
423
+ * @param init
424
+ * @param timeoutMs
425
+ * @param maxRetries
426
+ * @returns
427
+ */
428
+ async function prepareImage(bytes, maxEdge) {
429
+ const sharp = await loadSharp();
430
+ if (sharp) try {
431
+ const input = Buffer.from(bytes);
432
+ const { width, height } = await sharp(input).metadata();
433
+ const longest = Math.max(width ?? 0, height ?? 0);
434
+ let pipe = sharp(input);
435
+ if (longest > maxEdge) pipe = pipe.resize({
436
+ width: maxEdge,
437
+ height: maxEdge,
438
+ fit: "inside",
439
+ withoutEnlargement: true
440
+ });
441
+ return {
442
+ mediaType: "image/jpeg",
443
+ data: (await pipe.jpeg({ quality: 85 }).toBuffer()).toString("base64")
444
+ };
445
+ } catch {}
446
+ return {
447
+ mediaType: detectMediaType(bytes),
448
+ data: Buffer.from(bytes).toString("base64")
449
+ };
450
+ }
451
+ //#endregion
452
+ export { AnthropicOcrDriver, DEFAULT_AI_MODEL, anthropicVision, applyAuthenticity, scoreConfidence };
453
+
454
+ //# sourceMappingURL=ai.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai.mjs","names":[],"sources":["../../src/drivers/ai.ts"],"sourcesContent":["import type { DocumentType, OcrAuthenticity, OcrFields, OcrResultData } from '@arkyc/types'\nimport type { OcrDriver, OcrRequest } from '../types'\n\nconst clamp01 = (n: number): number => Math.min(1, Math.max(0, n))\n\n/**\n * Anthropic's default API base; override for a gateway/proxy.\n */\nconst DEFAULT_BASE_URL = 'https://api.anthropic.com'\n\n/**\n * A small, cheap vision model is plenty for reading printed/MRZ document text.\n */\nexport const DEFAULT_AI_MODEL = 'claude-haiku-4-5-20251001'\n\n/**\n * Longest edge (px) we upload. The API downsamples anything larger anyway and\n * bills by the resampled size, so this is a cost/bandwidth bound, not a quality\n * one — small enough to be cheap, large enough to keep the MRZ legible.\n */\nconst DEFAULT_MAX_EDGE = 1568\n\n/**\n * Per-attempt request timeout (ms) — a hung connection must not stall a worker.\n */\nconst DEFAULT_TIMEOUT_MS = 30_000\n\n/**\n * Bounded in-driver retries on rate-limit/overload; the job queue retries beyond.\n */\nconst DEFAULT_MAX_RETRIES = 2\n\n/**\n * A base64 image ready for the messages API, with its detected media type.\n */\ninterface PreparedImage {\n mediaType: 'image/jpeg' | 'image/png' | 'image/webp'\n data: string\n}\n\n/**\n * Raw fields the vision model returned, before validation/scoring.\n */\nexport interface AiExtraction {\n fields: OcrFields\n /** The model's own legibility self-assessment in [0, 1], if it gave one. */\n legibility?: number\n documentType?: DocumentType | null\n /** Best-effort tamper/replay read; omitted when the model reported nothing. */\n authenticity?: OcrAuthenticity\n}\n\n/**\n * Calls a vision model and returns the raw extracted fields. Injected so the\n * driver's scoring can be unit-tested without a network call; the default\n * implementation talks to the Anthropic messages API.\n */\nexport type AiVisionExtract = (request: OcrRequest, images: PreparedImage[]) => Promise<AiExtraction>\n\nexport interface AnthropicOcrOptions {\n apiKey?: string\n /** Model id; defaults to {@link DEFAULT_AI_MODEL}. */\n model?: string\n /** API base URL; defaults to {@link DEFAULT_BASE_URL}. */\n baseUrl?: string\n /** Longest image edge (px) to upload; defaults to {@link DEFAULT_MAX_EDGE}. */\n maxEdge?: number\n /** Per-attempt request timeout (ms); defaults to {@link DEFAULT_TIMEOUT_MS}. */\n timeoutMs?: number\n /** Bounded retries on 429/529; defaults to {@link DEFAULT_MAX_RETRIES}. */\n maxRetries?: number\n /** Override the vision call (tests). When set, `apiKey` is not required. */\n extract?: AiVisionExtract\n}\n\n/**\n * AI OCR driver: hands the document image(s) to a vision LLM (Claude by default)\n * and maps the structured response onto {@link OcrResultData}.\n *\n * Confidence is **not** taken from the model's self-report — LLMs are poorly\n * calibrated and will state high confidence on hallucinated fields. Instead it's\n * derived deterministically from which fields came back and whether they're\n * structurally valid (see {@link scoreConfidence}), so the value feeding the\n * decision engine's `ocrConfidenceThreshold` is meaningful. The model's own\n * legibility read is folded in only as a small soft penalty.\n *\n * It also asks the model for a best-effort, image-only authenticity read\n * (screen-replay, photocopy, digital/physical tampering). That signal is\n * advisory: a fired flag only *caps* the OCR confidence (see\n * {@link applyAuthenticity}) so a suspicious document routes to manual review —\n * an LLM's authenticity guess never auto-rejects a user on its own.\n */\nexport class AnthropicOcrDriver implements OcrDriver {\n readonly name = 'ai'\n\n private readonly model: string\n private readonly baseUrl: string\n private readonly maxEdge: number\n private readonly vision: AiVisionExtract\n\n constructor(options: AnthropicOcrOptions = {}) {\n if (!options.apiKey && !options.extract) {\n throw new Error('AnthropicOcrDriver requires config.apiKey')\n }\n this.model = options.model ?? DEFAULT_AI_MODEL\n this.baseUrl = (options.baseUrl ?? DEFAULT_BASE_URL).replace(/\\/$/, '')\n this.maxEdge = options.maxEdge ?? DEFAULT_MAX_EDGE\n this.vision =\n options.extract ??\n anthropicVision({\n apiKey: options.apiKey as string,\n model: this.model,\n baseUrl: this.baseUrl,\n timeoutMs: options.timeoutMs ?? DEFAULT_TIMEOUT_MS,\n maxRetries: options.maxRetries ?? DEFAULT_MAX_RETRIES,\n })\n }\n\n async extract(request: OcrRequest): Promise<OcrResultData> {\n const images = await this.prepareImages(request)\n const raw = await this.vision(request, images)\n const fields = normalizeFields(raw.fields)\n const authenticity = raw.authenticity\n const confidence = applyAuthenticity(scoreConfidence(fields, raw.legibility), authenticity)\n\n return {\n fields,\n confidence,\n ...(authenticity ? { authenticity } : {}),\n raw: {\n provider: 'ai',\n model: this.model,\n legibility: raw.legibility ?? null,\n documentType: raw.documentType ?? request.documentType ?? null,\n authenticity: authenticity ?? null,\n },\n }\n }\n\n /**\n * Resize (best-effort) and base64-encode the front + optional back images.\n *\n * @param input\n * @returns\n */\n private async prepareImages(request: OcrRequest): Promise<PreparedImage[]> {\n const sources = [request.image, request.backImage].filter((b): b is Uint8Array => !!b && b.length > 0)\n return Promise.all(sources.map((bytes) => prepareImage(bytes, this.maxEdge)))\n }\n}\n\nconst present = (v: unknown): v is string => typeof v === 'string' && v.trim().length > 0\nconst isIsoDate = (v: unknown): boolean => present(v) && /^\\d{4}-\\d{2}-\\d{2}$/.test(v) && !Number.isNaN(Date.parse(v))\n\n/**\n * Trim/blank-out the model's fields; drop anything empty so callers see `undefined`.\n *\n * @param input\n * @returns\n */\nfunction normalizeFields(fields: OcrFields): OcrFields {\n const out: OcrFields = {}\n const set = (key: keyof OcrFields, value: string | undefined) => {\n if (present(value)) out[key] = value.trim()\n }\n set('firstName', fields.firstName)\n set('lastName', fields.lastName)\n set('fullName', fields.fullName)\n set('dateOfBirth', fields.dateOfBirth)\n set('documentNumber', fields.documentNumber)\n set('expiryDate', fields.expiryDate)\n set('nationality', fields.nationality)\n return out\n}\n\n/**\n * Deterministic confidence in [0, 1] from field completeness + structural\n * validity. Weights sum to 1.0 when every field is present and well-formed; the\n * model's self-reported legibility can only pull the score down by up to 15%, so\n * trustworthy structure dominates an unreliable self-assessment.\n *\n * @param fields\n * @param legibility\n * @returns\n */\nexport function scoreConfidence(fields: OcrFields, legibility?: number): number {\n const hasName = (present(fields.firstName) && present(fields.lastName)) || present(fields.fullName)\n const checks: Array<[boolean, number]> = [\n [hasName, 0.28],\n [isIsoDate(fields.dateOfBirth), 0.22],\n [present(fields.documentNumber) && fields.documentNumber!.trim().length >= 4, 0.22],\n [isIsoDate(fields.expiryDate), 0.16],\n [present(fields.nationality), 0.12],\n ]\n const base = checks.reduce((sum, [ok, weight]) => (ok ? sum + weight : sum), 0)\n const legible = clamp01(legibility ?? 1)\n return clamp01(base * (0.85 + 0.15 * legible))\n}\n\n/**\n * Hard ceiling on OCR confidence once any authenticity flag fires. Below the\n * default `ocrConfidenceThreshold` (0.8), so a flagged document routes to manual\n * review regardless of how cleanly its fields read.\n */\nconst SUSPECT_CONFIDENCE_CEILING = 0.5\n\n/**\n * Fold the authenticity read into the field-derived confidence. Only ever lowers\n * the score, and only when a concrete flag fired — a genuine (or unassessed)\n * document is returned untouched. A flagged document is capped at both the\n * model's own authenticity confidence and {@link SUSPECT_CONFIDENCE_CEILING},\n * which lands it in manual review rather than auto-approval. This is deliberately\n * conservative: the LLM's authenticity guess can demote, never reject outright.\n *\n * @param fieldScore\n * @param authenticity\n * @returns\n */\nexport function applyAuthenticity(fieldScore: number, authenticity?: OcrAuthenticity): number {\n if (!authenticity || authenticity.genuine) return fieldScore\n return Math.min(fieldScore, clamp01(authenticity.confidence), SUSPECT_CONFIDENCE_CEILING)\n}\n\nconst SYSTEM_PROMPT =\n 'You are an OCR engine for identity documents. Read the visible printed text and ' +\n 'the machine-readable zone (MRZ) if present, and return the fields exactly as ' +\n 'printed. Do not guess, infer, or correct values you cannot clearly read — omit a ' +\n 'field rather than fabricate it. Dates must be ISO 8601 (YYYY-MM-DD).\\n\\n' +\n 'Identity documents print BOTH a date of issue and a date of expiry — never ' +\n 'confuse them. `expiryDate` is only the expiry/expiration/\"valid until\"/\"date of ' +\n 'expiry\" value (in an MRZ, the second date field, not the first). `issueDate` is ' +\n 'the \"date of issue\"/\"issued\"/\"valid from\" value. The expiry is later than the ' +\n 'issue date. If a document shows only an issue date and no expiry, return the ' +\n 'issue date in `issueDate` and leave `expiryDate` empty — do not put the issue ' +\n 'date in `expiryDate`.\\n\\n' +\n 'ALSO assess document authenticity, to the best of your ability, from the image ' +\n 'alone — and report it. Be conservative: raise a flag only on a clear, visible ' +\n 'sign. A genuine document that was merely photographed imperfectly (blur, glare ' +\n 'on a glossy original, a shadow, a crop) must NOT be flagged. Set `screenReplay` ' +\n 'when the image looks like a photo of a screen — moiré or a visible pixel grid, ' +\n 'backlit glow, refresh banding, or a device bezel (a replay attack). Set ' +\n '`photocopy` when it looks like a photo or scan of a printout/photocopy rather ' +\n 'than the physical document — flat matte paper, halftone dot printing, no ' +\n 'security features or hologram. Set `digitalTampering` for signs of digital ' +\n 'editing — mismatched fonts or kerning, misaligned or recoloured text, smudged ' +\n 'or cloned regions, a pasted-in portrait, or altered dates. Set ' +\n '`physicalTampering` for a substituted photo, peeled or lifted laminate, or ' +\n 'scratched-out / overwritten fields. Give `authenticityConfidence` from 0 ' +\n '(clearly fake or replayed) to 1 (clearly an authentic original), and list brief ' +\n 'supporting notes in `authenticityObservations`. You read and assess the ' +\n 'document; you do not make the final accept/reject decision.'\n\nconst READ_TOOL = {\n name: 'read_document',\n description: 'Return the identity fields read from the document image(s).',\n input_schema: {\n type: 'object',\n properties: {\n firstName: { type: 'string', description: 'Given name(s) as printed.' },\n lastName: { type: 'string', description: 'Surname / family name as printed.' },\n fullName: { type: 'string', description: 'Full name if given/surname cannot be separated.' },\n dateOfBirth: { type: 'string', description: 'Date of birth, ISO 8601 YYYY-MM-DD.' },\n documentNumber: { type: 'string', description: 'Document/serial number as printed.' },\n issueDate: {\n type: 'string',\n description: 'Date of issue (\"issued\"/\"valid from\"/\"issue date\"), ISO 8601 YYYY-MM-DD. NOT the expiry.',\n },\n expiryDate: {\n type: 'string',\n description:\n 'Expiry date (\"date of expiry\"/\"valid until\"/\"expiry date\"), ISO 8601 YYYY-MM-DD. Must be the expiry, never the issue date; omit if the document shows no expiry.',\n },\n nationality: { type: 'string', description: 'ISO 3166-1 alpha-3 country code if determinable.' },\n documentType: {\n type: 'string',\n enum: ['passport', 'id_card', 'drivers_license', 'residence_permit'],\n },\n legibility: {\n type: 'number',\n description: 'How clearly the document text could be read, 0 (illegible) to 1 (crisp).',\n },\n screenReplay: {\n type: 'boolean',\n description: 'Looks like a photo of a screen (moiré, pixel grid, backlit glow, refresh banding, bezel).',\n },\n photocopy: {\n type: 'boolean',\n description: 'Looks like a photo/scan of a printout or photocopy rather than the physical document.',\n },\n digitalTampering: {\n type: 'boolean',\n description:\n 'Signs of digital editing: mismatched fonts, misaligned/recoloured text, cloned regions, edited photo/dates.',\n },\n physicalTampering: {\n type: 'boolean',\n description:\n 'Signs of physical tampering: substituted photo, peeled laminate, scratched-out/overwritten fields.',\n },\n authenticityConfidence: {\n type: 'number',\n description: 'Authenticity confidence, 0 (clearly fake/replayed) to 1 (clearly an authentic original).',\n },\n authenticityObservations: {\n type: 'array',\n items: { type: 'string' },\n description: 'Brief notes supporting any authenticity flag raised.',\n },\n },\n },\n} as const\n\n/**\n * Minimal shape of the messages-API response we read.\n *\n */\ninterface AnthropicResponse {\n content?: Array<{ type: string; name?: string; input?: Record<string, unknown> }>\n}\n\nconst DOCUMENT_TYPES: readonly DocumentType[] = ['passport', 'id_card', 'drivers_license', 'residence_permit']\n\n/**\n * HTTP statuses worth a bounded retry: rate-limited / overloaded.\n */\nconst RETRYABLE_STATUS = new Set([429, 529])\n\nconst sleep = (ms: number): Promise<void> => new Promise((resolve) => setTimeout(resolve, ms))\n\n/**\n * Exponential backoff (ms) for attempt 0, 1, 2…, capped at 8s.\n */\nconst backoffMs = (attempt: number): number => Math.min(8_000, 500 * 2 ** attempt)\n\n/**\n * Parse a `retry-after` header (seconds) into ms; null when absent/invalid.\n */\nfunction retryAfterMs(res: Response): number | null {\n const header = res.headers.get('retry-after')\n if (!header) return null\n const seconds = Number(header)\n return Number.isFinite(seconds) && seconds >= 0 ? seconds * 1000 : null\n}\n\n/**\n * POST with a per-attempt timeout and a bounded retry on 429/529 — honouring\n * `retry-after`, else exponential backoff. Timeouts/network errors retry too;\n * any other response is returned as-is for the caller to handle. The durable job\n * queue is the longer-term retry beyond {@link DEFAULT_MAX_RETRIES}.\n *\n * @param url\n * @param init\n * @param timeoutMs\n * @param maxRetries\n * @returns\n */\nasync function postWithRetry(url: string, init: RequestInit, timeoutMs: number, maxRetries: number): Promise<Response> {\n for (let attempt = 0; ; attempt++) {\n let res: Response\n try {\n res = await fetch(url, { ...init, signal: AbortSignal.timeout(timeoutMs) })\n } catch (error) {\n if (attempt >= maxRetries) throw error\n await sleep(backoffMs(attempt))\n continue\n }\n if (RETRYABLE_STATUS.has(res.status) && attempt < maxRetries) {\n await sleep(retryAfterMs(res) ?? backoffMs(attempt))\n continue\n }\n return res\n }\n}\n\n/**\n * Build the default vision call against the Anthropic messages API.\n *\n * @param input\n * @returns\n */\nexport function anthropicVision(opts: {\n apiKey: string\n model: string\n baseUrl: string\n timeoutMs?: number\n maxRetries?: number\n}): AiVisionExtract {\n return async (request, images) => {\n const hint = request.documentType ? ` The document is a ${request.documentType}.` : ''\n const country = request.country ? ` Issuing country hint: ${request.country}.` : ''\n const res = await postWithRetry(\n `${opts.baseUrl}/v1/messages`,\n {\n method: 'POST',\n headers: {\n 'x-api-key': opts.apiKey,\n 'anthropic-version': '2023-06-01',\n 'content-type': 'application/json',\n },\n body: JSON.stringify({\n model: opts.model,\n max_tokens: 1024,\n system: SYSTEM_PROMPT,\n tools: [READ_TOOL],\n tool_choice: { type: 'tool', name: 'read_document' },\n messages: [\n {\n role: 'user',\n content: [\n ...images.map((img) => ({\n type: 'image',\n source: { type: 'base64', media_type: img.mediaType, data: img.data },\n })),\n { type: 'text', text: `Read this identity document and return its fields.${hint}${country}` },\n ],\n },\n ],\n }),\n },\n opts.timeoutMs ?? DEFAULT_TIMEOUT_MS,\n opts.maxRetries ?? DEFAULT_MAX_RETRIES,\n )\n\n if (!res.ok) {\n throw new Error(`AnthropicOcrDriver request failed with status ${res.status}`)\n }\n\n const json = (await res.json()) as AnthropicResponse\n const tool = json.content?.find((block) => block.type === 'tool_use' && block.name === 'read_document')\n if (!tool?.input) {\n throw new Error('AnthropicOcrDriver: model returned no structured fields')\n }\n\n return mapToolInput(tool.input)\n }\n}\n\n/**\n * Coerce the model's tool input into an {@link AiExtraction} (string-typed fields).\n *\n * @param input\n * @returns\n */\nfunction mapToolInput(input: Record<string, unknown>): AiExtraction {\n const str = (v: unknown): string | undefined => (typeof v === 'string' && v.trim() ? v.trim() : undefined)\n const docType = str(input.documentType)\n return {\n fields: {\n firstName: str(input.firstName),\n lastName: str(input.lastName),\n fullName: str(input.fullName),\n dateOfBirth: str(input.dateOfBirth),\n documentNumber: str(input.documentNumber),\n expiryDate: str(input.expiryDate),\n nationality: str(input.nationality),\n },\n legibility: typeof input.legibility === 'number' ? input.legibility : undefined,\n documentType: docType && (DOCUMENT_TYPES as readonly string[]).includes(docType) ? (docType as DocumentType) : null,\n authenticity: mapAuthenticity(input),\n }\n}\n\n/**\n * Build the {@link OcrAuthenticity} read from the model's tool input. Returns\n * `undefined` when the model reported nothing assessable (no flags, no\n * confidence, no notes) so a driver that can't see authenticity stays silent.\n *\n * @param input\n * @returns\n */\nfunction mapAuthenticity(input: Record<string, unknown>): OcrAuthenticity | undefined {\n const bool = (v: unknown): boolean => v === true\n const screenReplay = bool(input.screenReplay)\n const photocopy = bool(input.photocopy)\n const digitalTampering = bool(input.digitalTampering)\n const physicalTampering = bool(input.physicalTampering)\n const flagged = screenReplay || photocopy || digitalTampering || physicalTampering\n\n const observations = Array.isArray(input.authenticityObservations)\n ? input.authenticityObservations\n .filter((o): o is string => typeof o === 'string' && o.trim().length > 0)\n .map((o) => o.trim())\n .slice(0, 6)\n : []\n\n const reported = typeof input.authenticityConfidence === 'number'\n // The model said nothing about authenticity at all — don't fabricate a verdict.\n if (!flagged && !reported && observations.length === 0) return undefined\n\n // A flagged document with no explicit score defaults to clearly-suspect (0.2);\n // an unflagged read defaults to clearly-genuine (1).\n const confidence = clamp01(reported ? (input.authenticityConfidence as number) : flagged ? 0.2 : 1)\n\n return { genuine: !flagged, confidence, screenReplay, photocopy, digitalTampering, physicalTampering, observations }\n}\n\n/**\n * Detect the media type from magic bytes; default to JPEG.\n *\n * @param bytes\n * @returns\n */\nfunction detectMediaType(bytes: Uint8Array): PreparedImage['mediaType'] {\n if (bytes[0] === 0xff && bytes[1] === 0xd8) return 'image/jpeg'\n if (bytes[0] === 0x89 && bytes[1] === 0x50) return 'image/png'\n if (bytes[0] === 0x52 && bytes[1] === 0x49 && bytes[8] === 0x57) return 'image/webp'\n return 'image/jpeg'\n}\n\ninterface SharpInstance {\n metadata(): Promise<{ width?: number; height?: number }>\n resize(options: { width?: number; height?: number; fit: string; withoutEnlargement: boolean }): SharpInstance\n jpeg(options: { quality: number }): SharpInstance\n toBuffer(): Promise<Buffer>\n}\ntype SharpFactory = (input: Buffer) => SharpInstance\n\nlet sharpFactory: SharpFactory | null | undefined\n\n/** Lazily resolve the optional `sharp` factory once; null when not installed. */\nasync function loadSharp(): Promise<SharpFactory | null> {\n if (sharpFactory !== undefined) return sharpFactory\n const moduleName = 'sharp'\n try {\n const mod = (await import(/* @vite-ignore */ moduleName)) as unknown as { default: SharpFactory }\n sharpFactory = mod.default\n } catch {\n sharpFactory = null\n }\n return sharpFactory\n}\n\n/**\n * Downscale an image to `maxEdge` and re-encode as JPEG to bound the upload (and\n * therefore the model's image-token cost). Falls back to the original bytes when\n * `sharp` isn't installed or fails — the API caps oversized images itself.\n *\n * @param url\n * @param init\n * @param timeoutMs\n * @param maxRetries\n * @returns\n */\nasync function prepareImage(bytes: Uint8Array, maxEdge: number): Promise<PreparedImage> {\n const sharp = await loadSharp()\n if (sharp) {\n try {\n const input = Buffer.from(bytes)\n const { width, height } = await sharp(input).metadata()\n const longest = Math.max(width ?? 0, height ?? 0)\n let pipe = sharp(input)\n if (longest > maxEdge) {\n pipe = pipe.resize({ width: maxEdge, height: maxEdge, fit: 'inside', withoutEnlargement: true })\n }\n const out = await pipe.jpeg({ quality: 85 }).toBuffer()\n return { mediaType: 'image/jpeg', data: out.toString('base64') }\n } catch {\n // fall through to raw bytes\n }\n }\n return { mediaType: detectMediaType(bytes), data: Buffer.from(bytes).toString('base64') }\n}\n"],"mappings":";AAGA,MAAM,WAAW,MAAsB,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,CAAC,CAAC;;;;AAKjE,MAAM,mBAAmB;;;;AAKzB,MAAa,mBAAmB;;;;;;AAOhC,MAAM,mBAAmB;;;;AAKzB,MAAM,qBAAqB;;;;AAK3B,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;AA8D5B,IAAa,qBAAb,MAAqD;CACnD,OAAgB;CAEhB;CACA;CACA;CACA;CAEA,YAAY,UAA+B,CAAC,GAAG;EAC7C,IAAI,CAAC,QAAQ,UAAU,CAAC,QAAQ,SAC9B,MAAM,IAAI,MAAM,2CAA2C;EAE7D,KAAK,QAAQ,QAAQ,SAAA;EACrB,KAAK,WAAW,QAAQ,WAAW,iBAAA,CAAkB,QAAQ,OAAO,EAAE;EACtE,KAAK,UAAU,QAAQ,WAAW;EAClC,KAAK,SACH,QAAQ,WACR,gBAAgB;GACd,QAAQ,QAAQ;GAChB,OAAO,KAAK;GACZ,SAAS,KAAK;GACd,WAAW,QAAQ,aAAa;GAChC,YAAY,QAAQ,cAAc;EACpC,CAAC;CACL;CAEA,MAAM,QAAQ,SAA6C;EACzD,MAAM,SAAS,MAAM,KAAK,cAAc,OAAO;EAC/C,MAAM,MAAM,MAAM,KAAK,OAAO,SAAS,MAAM;EAC7C,MAAM,SAAS,gBAAgB,IAAI,MAAM;EACzC,MAAM,eAAe,IAAI;EAGzB,OAAO;GACL;GACA,YAJiB,kBAAkB,gBAAgB,QAAQ,IAAI,UAAU,GAAG,YAInE;GACT,GAAI,eAAe,EAAE,aAAa,IAAI,CAAC;GACvC,KAAK;IACH,UAAU;IACV,OAAO,KAAK;IACZ,YAAY,IAAI,cAAc;IAC9B,cAAc,IAAI,gBAAgB,QAAQ,gBAAgB;IAC1D,cAAc,gBAAgB;GAChC;EACF;CACF;;;;;;;CAQA,MAAc,cAAc,SAA+C;EACzE,MAAM,UAAU,CAAC,QAAQ,OAAO,QAAQ,SAAS,CAAC,CAAC,QAAQ,MAAuB,CAAC,CAAC,KAAK,EAAE,SAAS,CAAC;EACrG,OAAO,QAAQ,IAAI,QAAQ,KAAK,UAAU,aAAa,OAAO,KAAK,OAAO,CAAC,CAAC;CAC9E;AACF;AAEA,MAAM,WAAW,MAA4B,OAAO,MAAM,YAAY,EAAE,KAAK,CAAC,CAAC,SAAS;AACxF,MAAM,aAAa,MAAwB,QAAQ,CAAC,KAAK,sBAAsB,KAAK,CAAC,KAAK,CAAC,OAAO,MAAM,KAAK,MAAM,CAAC,CAAC;;;;;;;AAQrH,SAAS,gBAAgB,QAA8B;CACrD,MAAM,MAAiB,CAAC;CACxB,MAAM,OAAO,KAAsB,UAA8B;EAC/D,IAAI,QAAQ,KAAK,GAAG,IAAI,OAAO,MAAM,KAAK;CAC5C;CACA,IAAI,aAAa,OAAO,SAAS;CACjC,IAAI,YAAY,OAAO,QAAQ;CAC/B,IAAI,YAAY,OAAO,QAAQ;CAC/B,IAAI,eAAe,OAAO,WAAW;CACrC,IAAI,kBAAkB,OAAO,cAAc;CAC3C,IAAI,cAAc,OAAO,UAAU;CACnC,IAAI,eAAe,OAAO,WAAW;CACrC,OAAO;AACT;;;;;;;;;;;AAYA,SAAgB,gBAAgB,QAAmB,YAA6B;CAW9E,OAAO,QAFM;EANX,CAFe,QAAQ,OAAO,SAAS,KAAK,QAAQ,OAAO,QAAQ,KAAM,QAAQ,OAAO,QAAQ,GAEtF,GAAI;EACd,CAAC,UAAU,OAAO,WAAW,GAAG,GAAI;EACpC,CAAC,QAAQ,OAAO,cAAc,KAAK,OAAO,eAAgB,KAAK,CAAC,CAAC,UAAU,GAAG,GAAI;EAClF,CAAC,UAAU,OAAO,UAAU,GAAG,GAAI;EACnC,CAAC,QAAQ,OAAO,WAAW,GAAG,GAAI;CAElB,CAAC,CAAC,QAAQ,KAAK,CAAC,IAAI,YAAa,KAAK,MAAM,SAAS,KAAM,CAE3D,KAAK,MAAO,MADd,QAAQ,cAAc,CACK,EAAE;AAC/C;;;;;;AAOA,MAAM,6BAA6B;;;;;;;;;;;;;AAcnC,SAAgB,kBAAkB,YAAoB,cAAwC;CAC5F,IAAI,CAAC,gBAAgB,aAAa,SAAS,OAAO;CAClD,OAAO,KAAK,IAAI,YAAY,QAAQ,aAAa,UAAU,GAAG,0BAA0B;AAC1F;AAEA,MAAM,gBACJ;AA4BF,MAAM,YAAY;CAChB,MAAM;CACN,aAAa;CACb,cAAc;EACZ,MAAM;EACN,YAAY;GACV,WAAW;IAAE,MAAM;IAAU,aAAa;GAA4B;GACtE,UAAU;IAAE,MAAM;IAAU,aAAa;GAAoC;GAC7E,UAAU;IAAE,MAAM;IAAU,aAAa;GAAkD;GAC3F,aAAa;IAAE,MAAM;IAAU,aAAa;GAAsC;GAClF,gBAAgB;IAAE,MAAM;IAAU,aAAa;GAAqC;GACpF,WAAW;IACT,MAAM;IACN,aAAa;GACf;GACA,YAAY;IACV,MAAM;IACN,aACE;GACJ;GACA,aAAa;IAAE,MAAM;IAAU,aAAa;GAAmD;GAC/F,cAAc;IACZ,MAAM;IACN,MAAM;KAAC;KAAY;KAAW;KAAmB;IAAkB;GACrE;GACA,YAAY;IACV,MAAM;IACN,aAAa;GACf;GACA,cAAc;IACZ,MAAM;IACN,aAAa;GACf;GACA,WAAW;IACT,MAAM;IACN,aAAa;GACf;GACA,kBAAkB;IAChB,MAAM;IACN,aACE;GACJ;GACA,mBAAmB;IACjB,MAAM;IACN,aACE;GACJ;GACA,wBAAwB;IACtB,MAAM;IACN,aAAa;GACf;GACA,0BAA0B;IACxB,MAAM;IACN,OAAO,EAAE,MAAM,SAAS;IACxB,aAAa;GACf;EACF;CACF;AACF;AAUA,MAAM,iBAA0C;CAAC;CAAY;CAAW;CAAmB;AAAkB;;;;AAK7G,MAAM,mCAAmB,IAAI,IAAI,CAAC,KAAK,GAAG,CAAC;AAE3C,MAAM,SAAS,OAA8B,IAAI,SAAS,YAAY,WAAW,SAAS,EAAE,CAAC;;;;AAK7F,MAAM,aAAa,YAA4B,KAAK,IAAI,KAAO,MAAM,KAAK,OAAO;;;;AAKjF,SAAS,aAAa,KAA8B;CAClD,MAAM,SAAS,IAAI,QAAQ,IAAI,aAAa;CAC5C,IAAI,CAAC,QAAQ,OAAO;CACpB,MAAM,UAAU,OAAO,MAAM;CAC7B,OAAO,OAAO,SAAS,OAAO,KAAK,WAAW,IAAI,UAAU,MAAO;AACrE;;;;;;;;;;;;;AAcA,eAAe,cAAc,KAAa,MAAmB,WAAmB,YAAuC;CACrH,KAAK,IAAI,UAAU,IAAK,WAAW;EACjC,IAAI;EACJ,IAAI;GACF,MAAM,MAAM,MAAM,KAAK;IAAE,GAAG;IAAM,QAAQ,YAAY,QAAQ,SAAS;GAAE,CAAC;EAC5E,SAAS,OAAO;GACd,IAAI,WAAW,YAAY,MAAM;GACjC,MAAM,MAAM,UAAU,OAAO,CAAC;GAC9B;EACF;EACA,IAAI,iBAAiB,IAAI,IAAI,MAAM,KAAK,UAAU,YAAY;GAC5D,MAAM,MAAM,aAAa,GAAG,KAAK,UAAU,OAAO,CAAC;GACnD;EACF;EACA,OAAO;CACT;AACF;;;;;;;AAQA,SAAgB,gBAAgB,MAMZ;CAClB,OAAO,OAAO,SAAS,WAAW;EAChC,MAAM,OAAO,QAAQ,eAAe,sBAAsB,QAAQ,aAAa,KAAK;EACpF,MAAM,UAAU,QAAQ,UAAU,0BAA0B,QAAQ,QAAQ,KAAK;EACjF,MAAM,MAAM,MAAM,cAChB,GAAG,KAAK,QAAQ,eAChB;GACE,QAAQ;GACR,SAAS;IACP,aAAa,KAAK;IAClB,qBAAqB;IACrB,gBAAgB;GAClB;GACA,MAAM,KAAK,UAAU;IACnB,OAAO,KAAK;IACZ,YAAY;IACZ,QAAQ;IACR,OAAO,CAAC,SAAS;IACjB,aAAa;KAAE,MAAM;KAAQ,MAAM;IAAgB;IACnD,UAAU,CACR;KACE,MAAM;KACN,SAAS,CACP,GAAG,OAAO,KAAK,SAAS;MACtB,MAAM;MACN,QAAQ;OAAE,MAAM;OAAU,YAAY,IAAI;OAAW,MAAM,IAAI;MAAK;KACtE,EAAE,GACF;MAAE,MAAM;MAAQ,MAAM,qDAAqD,OAAO;KAAU,CAC9F;IACF,CACF;GACF,CAAC;EACH,GACA,KAAK,aAAa,oBAClB,KAAK,cAAc,mBACrB;EAEA,IAAI,CAAC,IAAI,IACP,MAAM,IAAI,MAAM,iDAAiD,IAAI,QAAQ;EAI/E,MAAM,QAAO,MADO,IAAI,KAAK,EAAA,CACX,SAAS,MAAM,UAAU,MAAM,SAAS,cAAc,MAAM,SAAS,eAAe;EACtG,IAAI,CAAC,MAAM,OACT,MAAM,IAAI,MAAM,yDAAyD;EAG3E,OAAO,aAAa,KAAK,KAAK;CAChC;AACF;;;;;;;AAQA,SAAS,aAAa,OAA8C;CAClE,MAAM,OAAO,MAAoC,OAAO,MAAM,YAAY,EAAE,KAAK,IAAI,EAAE,KAAK,IAAI,KAAA;CAChG,MAAM,UAAU,IAAI,MAAM,YAAY;CACtC,OAAO;EACL,QAAQ;GACN,WAAW,IAAI,MAAM,SAAS;GAC9B,UAAU,IAAI,MAAM,QAAQ;GAC5B,UAAU,IAAI,MAAM,QAAQ;GAC5B,aAAa,IAAI,MAAM,WAAW;GAClC,gBAAgB,IAAI,MAAM,cAAc;GACxC,YAAY,IAAI,MAAM,UAAU;GAChC,aAAa,IAAI,MAAM,WAAW;EACpC;EACA,YAAY,OAAO,MAAM,eAAe,WAAW,MAAM,aAAa,KAAA;EACtE,cAAc,WAAY,eAAqC,SAAS,OAAO,IAAK,UAA2B;EAC/G,cAAc,gBAAgB,KAAK;CACrC;AACF;;;;;;;;;AAUA,SAAS,gBAAgB,OAA6D;CACpF,MAAM,QAAQ,MAAwB,MAAM;CAC5C,MAAM,eAAe,KAAK,MAAM,YAAY;CAC5C,MAAM,YAAY,KAAK,MAAM,SAAS;CACtC,MAAM,mBAAmB,KAAK,MAAM,gBAAgB;CACpD,MAAM,oBAAoB,KAAK,MAAM,iBAAiB;CACtD,MAAM,UAAU,gBAAgB,aAAa,oBAAoB;CAEjE,MAAM,eAAe,MAAM,QAAQ,MAAM,wBAAwB,IAC7D,MAAM,yBACH,QAAQ,MAAmB,OAAO,MAAM,YAAY,EAAE,KAAK,CAAC,CAAC,SAAS,CAAC,CAAC,CACxE,KAAK,MAAM,EAAE,KAAK,CAAC,CAAC,CACpB,MAAM,GAAG,CAAC,IACb,CAAC;CAEL,MAAM,WAAW,OAAO,MAAM,2BAA2B;CAEzD,IAAI,CAAC,WAAW,CAAC,YAAY,aAAa,WAAW,GAAG,OAAO,KAAA;CAI/D,MAAM,aAAa,QAAQ,WAAY,MAAM,yBAAoC,UAAU,KAAM,CAAC;CAElG,OAAO;EAAE,SAAS,CAAC;EAAS;EAAY;EAAc;EAAW;EAAkB;EAAmB;CAAa;AACrH;;;;;;;AAQA,SAAS,gBAAgB,OAA+C;CACtE,IAAI,MAAM,OAAO,OAAQ,MAAM,OAAO,KAAM,OAAO;CACnD,IAAI,MAAM,OAAO,OAAQ,MAAM,OAAO,IAAM,OAAO;CACnD,IAAI,MAAM,OAAO,MAAQ,MAAM,OAAO,MAAQ,MAAM,OAAO,IAAM,OAAO;CACxE,OAAO;AACT;AAUA,IAAI;;AAGJ,eAAe,YAA0C;CACvD,IAAI,iBAAiB,KAAA,GAAW,OAAO;CACvC,MAAM,aAAa;CACnB,IAAI;EAEF,gBAAe,MADI;;GAA0B;GAC1B;CACrB,QAAQ;EACN,eAAe;CACjB;CACA,OAAO;AACT;;;;;;;;;;;;AAaA,eAAe,aAAa,OAAmB,SAAyC;CACtF,MAAM,QAAQ,MAAM,UAAU;CAC9B,IAAI,OACF,IAAI;EACF,MAAM,QAAQ,OAAO,KAAK,KAAK;EAC/B,MAAM,EAAE,OAAO,WAAW,MAAM,MAAM,KAAK,CAAC,CAAC,SAAS;EACtD,MAAM,UAAU,KAAK,IAAI,SAAS,GAAG,UAAU,CAAC;EAChD,IAAI,OAAO,MAAM,KAAK;EACtB,IAAI,UAAU,SACZ,OAAO,KAAK,OAAO;GAAE,OAAO;GAAS,QAAQ;GAAS,KAAK;GAAU,oBAAoB;EAAK,CAAC;EAGjG,OAAO;GAAE,WAAW;GAAc,OAAM,MADtB,KAAK,KAAK,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC,SAAS,EAAA,CACV,SAAS,QAAQ;EAAE;CACjE,QAAQ,CAER;CAEF,OAAO;EAAE,WAAW,gBAAgB,KAAK;EAAG,MAAM,OAAO,KAAK,KAAK,CAAC,CAAC,SAAS,QAAQ;CAAE;AAC1F"}
@@ -0,0 +1,17 @@
1
+ import { OcrConfig, OcrDriver, OcrRequest } from "../types.mjs";
2
+ import { OcrResultData } from "@arkyc/types";
3
+
4
+ //#region src/drivers/external.d.ts
5
+ /**
6
+ * Generic HTTP OCR driver: POSTs the base64 image to a configured endpoint and
7
+ * expects an {@link OcrResultData}-shaped JSON response.
8
+ */
9
+ declare class ExternalOcrDriver implements OcrDriver {
10
+ private readonly config;
11
+ readonly name = "external";
12
+ constructor(config: OcrConfig);
13
+ extract(request: OcrRequest): Promise<OcrResultData>;
14
+ }
15
+ //#endregion
16
+ export { ExternalOcrDriver };
17
+ //# sourceMappingURL=external.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"external.d.mts","names":[],"sources":["../../src/drivers/external.ts"],"mappings":";;;;;;AAOA;;cAAa,iBAAA,YAA6B,SAAA;EAAA,iBAGX,MAAA;EAAA,SAFpB,IAAA;cAEoB,MAAA,EAAQ,SAAA;EAI/B,OAAA,CAAQ,OAAA,EAAS,UAAA,GAAa,OAAA,CAAQ,aAAA;AAAA"}