@yigitahmetsahin/captcha-solver 2.0.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -5,6 +5,9 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
6
  var __getProtoOf = Object.getPrototypeOf;
7
7
  var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __esm = (fn, res) => function __init() {
9
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
10
+ };
8
11
  var __export = (target, all) => {
9
12
  for (var name in all)
10
13
  __defProp(target, name, { get: all[name], enumerable: true });
@@ -27,11 +30,214 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
27
30
  ));
28
31
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
32
 
33
+ // src/preprocess.ts
34
+ async function preprocessCaptcha(input, options) {
35
+ const buf = await preprocessCaptchaToBuffer(input, options);
36
+ return buf.toString("base64");
37
+ }
38
+ async function preprocessCaptchaToBuffer(input, options) {
39
+ const {
40
+ preCropHeight = 1,
41
+ median = 0,
42
+ blur = 1.5,
43
+ greyscale = true,
44
+ scale = 4,
45
+ upscaleKernel = "lanczos3",
46
+ postBlur = 0,
47
+ normalise = false,
48
+ contrast = 3,
49
+ sharpen = true,
50
+ threshold = false,
51
+ negate = false,
52
+ crop = "auto",
53
+ padding = true
54
+ } = options ?? {};
55
+ let source = typeof input === "string" ? import_path.default.resolve(input) : input;
56
+ const metadata = await (0, import_sharp.default)(source).metadata();
57
+ const origW = metadata.width;
58
+ let origH = metadata.height;
59
+ if (preCropHeight < 1 && preCropHeight > 0) {
60
+ const keepH = Math.floor(origH * preCropHeight);
61
+ source = await (0, import_sharp.default)(source).extract({ left: 0, top: 0, width: origW, height: keepH }).toBuffer();
62
+ origH = keepH;
63
+ }
64
+ let pipeline = (0, import_sharp.default)(source);
65
+ if (median > 0) pipeline = pipeline.median(median);
66
+ if (blur > 0) pipeline = pipeline.blur(blur);
67
+ if (greyscale) pipeline = pipeline.greyscale();
68
+ const smoothed = await pipeline.toBuffer();
69
+ const upscaled = await (0, import_sharp.default)(smoothed).resize(origW * scale, origH * scale, { kernel: upscaleKernel }).toBuffer();
70
+ let postProcessed = upscaled;
71
+ if (postBlur > 0) {
72
+ postProcessed = await (0, import_sharp.default)(upscaled).blur(postBlur).toBuffer();
73
+ }
74
+ if (normalise) {
75
+ postProcessed = await (0, import_sharp.default)(postProcessed).normalise().toBuffer();
76
+ }
77
+ let enhanced;
78
+ if (contrast !== 1) {
79
+ const stats = await (0, import_sharp.default)(postProcessed).stats();
80
+ const mean = stats.channels[0].mean;
81
+ let pipe = (0, import_sharp.default)(postProcessed).linear(contrast, mean * (1 - contrast));
82
+ if (sharpen) pipe = pipe.sharpen({ sigma: 1, m1: 2, m2: 1 });
83
+ enhanced = await pipe.toBuffer();
84
+ } else {
85
+ enhanced = sharpen ? await (0, import_sharp.default)(postProcessed).sharpen({ sigma: 1, m1: 2, m2: 1 }).toBuffer() : postProcessed;
86
+ }
87
+ if (threshold !== false && typeof threshold === "number") {
88
+ enhanced = await (0, import_sharp.default)(enhanced).threshold(threshold).toBuffer();
89
+ }
90
+ const targetWidth = options?.targetWidth;
91
+ if (targetWidth && targetWidth > 0) {
92
+ enhanced = await (0, import_sharp.default)(enhanced).resize(targetWidth, null, { kernel: "lanczos3" }).toBuffer();
93
+ }
94
+ let cropped;
95
+ if (crop === "none") {
96
+ cropped = enhanced;
97
+ } else if (crop === "auto") {
98
+ cropped = await autoCrop(enhanced);
99
+ } else {
100
+ const fractions = crop === "legacy" ? LEGACY_CROP : crop;
101
+ const scaledW = origW * scale;
102
+ const scaledH = origH * scale;
103
+ const cropLeft = Math.floor(scaledW * fractions.left);
104
+ const cropTop = Math.floor(scaledH * fractions.top);
105
+ const cropRight = Math.floor(scaledW * fractions.right);
106
+ const cropBottom = Math.floor(scaledH * fractions.bottom);
107
+ const cropW = cropRight - cropLeft;
108
+ const cropH = cropBottom - cropTop;
109
+ cropped = await (0, import_sharp.default)(enhanced).extract({ left: cropLeft, top: cropTop, width: cropW, height: cropH }).toBuffer();
110
+ }
111
+ const final = negate ? await (0, import_sharp.default)(cropped).negate().toBuffer() : cropped;
112
+ if (padding === false) {
113
+ return (0, import_sharp.default)(final).png().toBuffer();
114
+ }
115
+ const pad = typeof padding === "number" ? padding : void 0;
116
+ const vPad = pad ?? 20;
117
+ const hPad = pad ?? 30;
118
+ return (0, import_sharp.default)(final).extend({
119
+ top: vPad,
120
+ bottom: vPad,
121
+ left: hPad,
122
+ right: hPad,
123
+ background: { r: 255, g: 255, b: 255 }
124
+ }).png().toBuffer();
125
+ }
126
+ async function autoCrop(enhanced) {
127
+ try {
128
+ const trimmed = (0, import_sharp.default)(enhanced).trim({ threshold: 30 });
129
+ const trimmedBuf = await trimmed.toBuffer({ resolveWithObject: true });
130
+ const { width, height } = trimmedBuf.info;
131
+ if (width > 2 && height > 2) {
132
+ return trimmedBuf.data;
133
+ }
134
+ } catch {
135
+ }
136
+ return enhanced;
137
+ }
138
+ function imageToBase64(imagePath) {
139
+ const buffer = import_fs.default.readFileSync(imagePath);
140
+ return buffer.toString("base64");
141
+ }
142
+ var import_fs, import_path, import_sharp, LEGACY_CROP;
143
+ var init_preprocess = __esm({
144
+ "src/preprocess.ts"() {
145
+ "use strict";
146
+ import_fs = __toESM(require("fs"), 1);
147
+ import_path = __toESM(require("path"), 1);
148
+ import_sharp = __toESM(require("sharp"), 1);
149
+ LEGACY_CROP = { left: 0.1, top: 0.02, right: 0.9, bottom: 0.6 };
150
+ }
151
+ });
152
+
153
+ // src/tesseract.ts
154
+ var tesseract_exports = {};
155
+ __export(tesseract_exports, {
156
+ TESSERACT_VARIANTS: () => TESSERACT_VARIANTS,
157
+ createTesseractReader: () => createTesseractReader
158
+ });
159
+ async function createTesseractReader() {
160
+ let createWorker;
161
+ try {
162
+ const tess = await import("tesseract.js");
163
+ createWorker = tess.createWorker;
164
+ } catch {
165
+ return null;
166
+ }
167
+ const worker = await createWorker("eng");
168
+ await worker.setParameters({
169
+ tessedit_char_whitelist: "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
170
+ tessedit_pageseg_mode: "7"
171
+ // PSM.SINGLE_LINE
172
+ });
173
+ return {
174
+ async recognize(image) {
175
+ const { data } = await worker.recognize(image);
176
+ return data.text.trim().replace(/[^A-Z0-9]/g, "");
177
+ },
178
+ async recognizeMulti(input, variants) {
179
+ const results = [];
180
+ for (const opts of variants) {
181
+ try {
182
+ const buf = await preprocessCaptchaToBuffer(input, opts);
183
+ const { data } = await worker.recognize(buf);
184
+ const clean = data.text.trim().replace(/[^A-Z0-9]/g, "");
185
+ if (clean.length >= 2 && clean.length <= 8) {
186
+ results.push(clean);
187
+ }
188
+ } catch {
189
+ }
190
+ }
191
+ return results;
192
+ },
193
+ async dispose() {
194
+ await worker.terminate();
195
+ }
196
+ };
197
+ }
198
+ var TESSERACT_VARIANTS;
199
+ var init_tesseract = __esm({
200
+ "src/tesseract.ts"() {
201
+ "use strict";
202
+ init_preprocess();
203
+ TESSERACT_VARIANTS = [
204
+ // Variant 1: standard enhanced
205
+ {
206
+ blur: 1.5,
207
+ greyscale: true,
208
+ scale: 4,
209
+ contrast: 3,
210
+ sharpen: true,
211
+ crop: "auto",
212
+ padding: true
213
+ },
214
+ // Variant 2: enhanced + negated
215
+ {
216
+ blur: 1.5,
217
+ greyscale: true,
218
+ scale: 4,
219
+ contrast: 3,
220
+ sharpen: true,
221
+ negate: true,
222
+ crop: "auto",
223
+ padding: true
224
+ }
225
+ ];
226
+ }
227
+ });
228
+
30
229
  // src/index.ts
31
230
  var index_exports = {};
32
231
  __export(index_exports, {
232
+ DITHER_CONFUSION_GROUPS: () => DITHER_CONFUSION_GROUPS,
233
+ LEGACY_CONFUSION_GROUPS: () => LEGACY_CONFUSION_GROUPS,
33
234
  Solver: () => Solver,
235
+ TESSERACT_VARIANTS: () => TESSERACT_VARIANTS,
236
+ createTesseractReader: () => createTesseractReader,
237
+ disambiguateResult: () => disambiguateResult,
34
238
  imageToBase64: () => imageToBase64,
239
+ majorityVote: () => majorityVote,
240
+ majorityVoteDetailed: () => majorityVoteDetailed,
35
241
  preprocessCaptcha: () => preprocessCaptcha,
36
242
  preprocessCaptchaToBuffer: () => preprocessCaptchaToBuffer
37
243
  });
@@ -39,52 +245,317 @@ module.exports = __toCommonJS(index_exports);
39
245
 
40
246
  // src/solver.ts
41
247
  var import_ai = require("ai");
248
+ init_preprocess();
42
249
 
43
- // src/preprocess.ts
44
- var import_fs = __toESM(require("fs"), 1);
45
- var import_path = __toESM(require("path"), 1);
46
- var import_sharp = __toESM(require("sharp"), 1);
47
- async function preprocessCaptcha(input) {
48
- const buf = await preprocessCaptchaToBuffer(input);
49
- return buf.toString("base64");
250
+ // src/disambiguate.ts
251
+ var import_sharp2 = __toESM(require("sharp"), 1);
252
+ async function disambiguateResult(result, rankedByPos, binaryImage) {
253
+ const ambiguousPositions = [];
254
+ for (let pos = 0; pos < result.length; pos++) {
255
+ if (result[pos] !== "2" && result[pos] !== "Z") continue;
256
+ const ranked = rankedByPos[pos];
257
+ const hasAlt = (ranked.get("6") ?? 0) >= 1 || (ranked.get("L") ?? 0) >= 1 || (ranked.get("1") ?? 0) >= 1;
258
+ if (hasAlt) {
259
+ ambiguousPositions.push(pos);
260
+ continue;
261
+ }
262
+ const twoZCount = result.filter((c) => c === "2" || c === "Z").length;
263
+ if (twoZCount >= 3) {
264
+ ambiguousPositions.push(pos);
265
+ }
266
+ }
267
+ if (ambiguousPositions.length === 0) return;
268
+ const meta = await (0, import_sharp2.default)(binaryImage).metadata();
269
+ const fullW = meta.width;
270
+ const fullH = meta.height;
271
+ const cropTop = Math.floor(fullH * 0.12);
272
+ const cropH = Math.floor(fullH * 0.76);
273
+ const { data, info } = await (0, import_sharp2.default)(binaryImage).extract({ left: 0, top: cropTop, width: fullW, height: cropH }).greyscale().negate().raw().toBuffer({ resolveWithObject: true });
274
+ const w = info.width;
275
+ const h = info.height;
276
+ const pixels = new Uint8Array(data);
277
+ const regions = segmentCharacters(pixels, w, h, result.length);
278
+ if (!regions || regions.length !== result.length) return;
279
+ for (const pos of ambiguousPositions) {
280
+ const region = regions[pos];
281
+ const features = analyseCharacter(pixels, w, h, region);
282
+ const newChar = classifyFromFeatures(features, result[pos]);
283
+ if (newChar) {
284
+ result[pos] = newChar;
285
+ }
286
+ }
50
287
  }
51
- async function preprocessCaptchaToBuffer(input) {
52
- const source = typeof input === "string" ? import_path.default.resolve(input) : input;
53
- const metadata = await (0, import_sharp.default)(source).metadata();
54
- const origW = metadata.width;
55
- const origH = metadata.height;
56
- const smoothed = await (0, import_sharp.default)(source).blur(1.5).greyscale().toBuffer();
57
- const upscaled = await (0, import_sharp.default)(smoothed).resize(origW * 4, origH * 4, { kernel: "lanczos3" }).toBuffer();
58
- const stats = await (0, import_sharp.default)(upscaled).stats();
59
- const mean = stats.channels[0].mean;
60
- const enhanced = await (0, import_sharp.default)(upscaled).linear(3, mean * (1 - 3)).sharpen({ sigma: 1, m1: 2, m2: 1 }).toBuffer();
61
- const scaledW = origW * 4;
62
- const scaledH = origH * 4;
63
- const cropLeft = Math.floor(scaledW * 0.1);
64
- const cropTop = Math.floor(scaledH * 0.02);
65
- const cropRight = Math.floor(scaledW * 0.9);
66
- const cropBottom = Math.floor(scaledH * 0.6);
67
- const cropW = cropRight - cropLeft;
68
- const cropH = cropBottom - cropTop;
69
- return (0, import_sharp.default)(enhanced).extract({ left: cropLeft, top: cropTop, width: cropW, height: cropH }).extend({
70
- top: 20,
71
- bottom: 20,
72
- left: 30,
73
- right: 30,
74
- background: { r: 255, g: 255, b: 255 }
75
- }).png().toBuffer();
288
+ function segmentCharacters(pixels, w, h, expectedCount) {
289
+ const colDensity = new Float64Array(w);
290
+ for (let x = 0; x < w; x++) {
291
+ let count = 0;
292
+ for (let y = 0; y < h; y++) {
293
+ if (pixels[y * w + x] >= 128) count++;
294
+ }
295
+ colDensity[x] = count / h;
296
+ }
297
+ let contentLeft = 0;
298
+ let contentRight = w;
299
+ for (let x = 0; x < w; x++) {
300
+ if (colDensity[x] > 0.05) {
301
+ contentLeft = x;
302
+ break;
303
+ }
304
+ }
305
+ for (let x = w - 1; x >= 0; x--) {
306
+ if (colDensity[x] > 0.05) {
307
+ contentRight = x + 1;
308
+ break;
309
+ }
310
+ }
311
+ const smoothW = 15;
312
+ const smoothed = new Float64Array(w);
313
+ for (let x = contentLeft; x < contentRight; x++) {
314
+ let sum = 0;
315
+ let count = 0;
316
+ for (let dx = -smoothW; dx <= smoothW; dx++) {
317
+ const nx = x + dx;
318
+ if (nx >= contentLeft && nx < contentRight) {
319
+ sum += colDensity[nx];
320
+ count++;
321
+ }
322
+ }
323
+ smoothed[x] = sum / count;
324
+ }
325
+ const charWidth = (contentRight - contentLeft) / expectedCount;
326
+ const margin = Math.floor(charWidth * 0.6);
327
+ const searchLeft = contentLeft + margin;
328
+ const searchRight = contentRight - margin;
329
+ const valleys = [];
330
+ for (let x = searchLeft + 1; x < searchRight - 1; x++) {
331
+ if (smoothed[x] <= smoothed[x - 1] && smoothed[x] <= smoothed[x + 1]) {
332
+ const leftMax = Math.max(...Array.from(smoothed.slice(Math.max(searchLeft, x - 40), x)));
333
+ const rightMax = Math.max(
334
+ ...Array.from(smoothed.slice(x + 1, Math.min(searchRight, x + 41)))
335
+ );
336
+ const depth = Math.min(leftMax, rightMax) - smoothed[x];
337
+ if (depth > 0.01) {
338
+ valleys.push({ x, depth });
339
+ }
340
+ }
341
+ }
342
+ valleys.sort((a, b) => b.depth - a.depth);
343
+ const splits = [];
344
+ const minDist = charWidth * 0.6;
345
+ for (const v of valleys) {
346
+ if (splits.length >= expectedCount - 1) break;
347
+ if (splits.every((s) => Math.abs(s - v.x) > minDist)) {
348
+ splits.push(v.x);
349
+ }
350
+ }
351
+ if (splits.length < expectedCount - 1) {
352
+ const step = (contentRight - contentLeft) / expectedCount;
353
+ splits.length = 0;
354
+ for (let i = 1; i < expectedCount; i++) {
355
+ splits.push(Math.floor(contentLeft + step * i));
356
+ }
357
+ }
358
+ splits.sort((a, b) => a - b);
359
+ const boundaries = [contentLeft, ...splits, contentRight];
360
+ return boundaries.slice(0, expectedCount).map((start, idx) => {
361
+ const end = boundaries[idx + 1];
362
+ let top = h;
363
+ let bottom = 0;
364
+ for (let y = 0; y < h; y++) {
365
+ for (let x = start; x < end; x++) {
366
+ if (pixels[y * w + x] >= 128) {
367
+ if (y < top) top = y;
368
+ if (y > bottom) bottom = y;
369
+ }
370
+ }
371
+ }
372
+ return { left: start, right: end, top: Math.max(0, top), bottom: Math.min(h, bottom + 1) };
373
+ });
76
374
  }
77
- function imageToBase64(imagePath) {
78
- const buffer = import_fs.default.readFileSync(imagePath);
79
- return buffer.toString("base64");
375
+ function detectHoles(pixels, imgW, region) {
376
+ const rw = region.right - region.left;
377
+ const rh = region.bottom - region.top;
378
+ if (rw < 3 || rh < 3) return { count: 0, hasBottom: false, hasTop: false };
379
+ const grid = new Uint8Array(rw * rh);
380
+ for (let ly = 0; ly < rh; ly++) {
381
+ for (let lx = 0; lx < rw; lx++) {
382
+ const px = pixels[(region.top + ly) * imgW + (region.left + lx)];
383
+ grid[ly * rw + lx] = px >= 128 ? 1 : 0;
384
+ }
385
+ }
386
+ const visited = new Uint8Array(rw * rh);
387
+ const queue = [];
388
+ for (let lx = 0; lx < rw; lx++) {
389
+ if (grid[lx] === 0 && !visited[lx]) {
390
+ visited[lx] = 1;
391
+ queue.push(lx);
392
+ }
393
+ const bottom = (rh - 1) * rw + lx;
394
+ if (grid[bottom] === 0 && !visited[bottom]) {
395
+ visited[bottom] = 1;
396
+ queue.push(bottom);
397
+ }
398
+ }
399
+ for (let ly = 0; ly < rh; ly++) {
400
+ const left = ly * rw;
401
+ if (grid[left] === 0 && !visited[left]) {
402
+ visited[left] = 1;
403
+ queue.push(left);
404
+ }
405
+ const right = ly * rw + rw - 1;
406
+ if (grid[right] === 0 && !visited[right]) {
407
+ visited[right] = 1;
408
+ queue.push(right);
409
+ }
410
+ }
411
+ let qi = 0;
412
+ while (qi < queue.length) {
413
+ const idx = queue[qi++];
414
+ const lx = idx % rw;
415
+ const ly = Math.floor(idx / rw);
416
+ for (const [dx, dy] of [
417
+ [0, 1],
418
+ [0, -1],
419
+ [1, 0],
420
+ [-1, 0]
421
+ ]) {
422
+ const nx = lx + dx;
423
+ const ny = ly + dy;
424
+ if (nx < 0 || nx >= rw || ny < 0 || ny >= rh) continue;
425
+ const ni = ny * rw + nx;
426
+ if (!visited[ni] && grid[ni] === 0) {
427
+ visited[ni] = 1;
428
+ queue.push(ni);
429
+ }
430
+ }
431
+ }
432
+ let holeCount = 0;
433
+ let hasBottom = false;
434
+ let hasTop = false;
435
+ const midY = rh / 2;
436
+ for (let ly = 0; ly < rh; ly++) {
437
+ for (let lx = 0; lx < rw; lx++) {
438
+ const idx = ly * rw + lx;
439
+ if (grid[idx] === 0 && !visited[idx]) {
440
+ const holeQueue = [idx];
441
+ visited[idx] = 1;
442
+ let hi = 0;
443
+ let area = 0;
444
+ let sumY = 0;
445
+ while (hi < holeQueue.length) {
446
+ const hidx = holeQueue[hi++];
447
+ area++;
448
+ sumY += Math.floor(hidx / rw);
449
+ const hx = hidx % rw;
450
+ const hy = Math.floor(hidx / rw);
451
+ for (const [dx, dy] of [
452
+ [0, 1],
453
+ [0, -1],
454
+ [1, 0],
455
+ [-1, 0]
456
+ ]) {
457
+ const hnx = hx + dx;
458
+ const hny = hy + dy;
459
+ if (hnx < 0 || hnx >= rw || hny < 0 || hny >= rh) continue;
460
+ const hni = hny * rw + hnx;
461
+ if (!visited[hni] && grid[hni] === 0) {
462
+ visited[hni] = 1;
463
+ holeQueue.push(hni);
464
+ }
465
+ }
466
+ }
467
+ const charArea = rw * rh;
468
+ if (area > charArea * 5e-3) {
469
+ holeCount++;
470
+ const avgY = sumY / area;
471
+ if (avgY >= midY) hasBottom = true;
472
+ else hasTop = true;
473
+ }
474
+ }
475
+ }
476
+ }
477
+ return { count: holeCount, hasBottom, hasTop };
478
+ }
479
+ function analyseCharacter(pixels, imgW, _imgH, region) {
480
+ const rw = region.right - region.left;
481
+ const rh = region.bottom - region.top;
482
+ const holes = detectHoles(pixels, imgW, region);
483
+ const aspectRatio = rh / Math.max(rw, 1);
484
+ const quarterH = Math.max(3, Math.floor(rh * 0.25));
485
+ let topMinX = rw, topMaxX = 0, botMinX = rw, botMaxX = 0;
486
+ for (let lx = 0; lx < rw; lx++) {
487
+ for (let ly = 0; ly < quarterH; ly++) {
488
+ if (pixels[(region.top + ly) * imgW + (region.left + lx)] >= 128) {
489
+ if (lx < topMinX) topMinX = lx;
490
+ if (lx > topMaxX) topMaxX = lx;
491
+ }
492
+ }
493
+ for (let ly = rh - quarterH; ly < rh; ly++) {
494
+ if (pixels[(region.top + ly) * imgW + (region.left + lx)] >= 128) {
495
+ if (lx < botMinX) botMinX = lx;
496
+ if (lx > botMaxX) botMaxX = lx;
497
+ }
498
+ }
499
+ }
500
+ const topWidth = topMaxX > topMinX ? (topMaxX - topMinX) / rw : 0;
501
+ const bottomWidth = botMaxX > botMinX ? (botMaxX - botMinX) / rw : 0;
502
+ const bottomHorizontalExtent = bottomWidth;
503
+ const topHorizontalExtent = topWidth;
504
+ const topQuarterH = Math.max(3, Math.floor(rh * 0.25));
505
+ const rightHalf = Math.floor(rw / 2);
506
+ let topRightDark = 0;
507
+ let topRightTotal = 0;
508
+ for (let ly = 0; ly < topQuarterH; ly++) {
509
+ for (let lx = rightHalf; lx < rw; lx++) {
510
+ topRightTotal++;
511
+ if (pixels[(region.top + ly) * imgW + (region.left + lx)] >= 128) {
512
+ topRightDark++;
513
+ }
514
+ }
515
+ }
516
+ const topCurvature = topRightTotal > 0 && topRightDark / topRightTotal > 0.15;
517
+ return {
518
+ hasHoleBottom: holes.hasBottom,
519
+ hasHoleTop: holes.hasTop,
520
+ holeCount: holes.count,
521
+ aspectRatio,
522
+ bottomHorizontalExtent,
523
+ topHorizontalExtent,
524
+ topCurvature
525
+ };
526
+ }
527
+ function classifyFromFeatures(features, _votedChar) {
528
+ if (features.hasHoleBottom && !features.hasHoleTop) {
529
+ return "6";
530
+ }
531
+ if (features.holeCount >= 2) {
532
+ return "8";
533
+ }
534
+ if (features.hasHoleTop && !features.hasHoleBottom) {
535
+ return null;
536
+ }
537
+ if (features.holeCount === 0 && features.aspectRatio > 1.8 && !features.topCurvature) {
538
+ return "1";
539
+ }
540
+ if (features.holeCount === 0 && features.bottomHorizontalExtent > 0.5 && features.bottomHorizontalExtent > features.topHorizontalExtent * 1.15 && features.aspectRatio > 0.8) {
541
+ return "L";
542
+ }
543
+ return null;
80
544
  }
81
545
 
82
546
  // src/solver.ts
83
- var PROMPT = `You are an assistant helping a visually impaired person read distorted text from an image.
84
- The text contains uppercase letters A-Z and/or digits 0-9.
85
- A thin vertical stroke is the digit 1. Never read it as the letter I or L.
86
- A round closed shape is the letter O, not the letter D.
87
- Output ONLY the exact characters you read, nothing else.`;
547
+ var PROMPT = `Read the 4 distorted characters in these images. Two processed versions shown.
548
+ The text uses UPPERCASE A-Z and digits 0-9 only. No lowercase.
549
+
550
+ WARNING: The dithered rendering makes many characters appear as "2". Before writing "2", check:
551
+ - Could it be "6"? (has closed loop at bottom)
552
+ - Could it be "L"? (has vertical stem + horizontal foot, 90\xB0 corner)
553
+ - Could it be "1"? (thin vertical stroke, no curve)
554
+ - Could it be "Z"? (all straight lines, sharp angles)
555
+
556
+ Also watch for: O/0 have curved sides (not D which has flat left); B has two bumps (not D with one curve); X is two crossing diagonals (not K with vertical bar); G has horizontal bar inside (not C).
557
+
558
+ Output ONLY the 4 characters.`;
88
559
  var DEFAULT_MODELS = {
89
560
  openai: "gpt-4o",
90
561
  anthropic: "claude-sonnet-4-20250514",
@@ -110,7 +581,7 @@ async function resolveModel(apiKey, provider, modelId) {
110
581
  );
111
582
  }
112
583
  }
113
- var CONFUSION_GROUPS = {
584
+ var LEGACY_CONFUSION_GROUPS = {
114
585
  "1": "1",
115
586
  I: "1",
116
587
  L: "1",
@@ -122,7 +593,23 @@ var CONFUSION_GROUPS = {
122
593
  Z: "Z",
123
594
  "2": "Z"
124
595
  };
125
- function majorityVote(attempts, expectedLength) {
596
+ var DITHER_CONFUSION_GROUPS = {
597
+ D: "O",
598
+ O: "O",
599
+ I: "1",
600
+ "1": "1",
601
+ K: "X",
602
+ X: "X",
603
+ A: "X",
604
+ C: "G",
605
+ G: "G",
606
+ "9": "8",
607
+ "8": "8",
608
+ Y: "X",
609
+ E: "5",
610
+ "5": "5"
611
+ };
612
+ function majorityVote(attempts, expectedLength, groups) {
126
613
  let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;
127
614
  if (filtered.length === 0) {
128
615
  filtered = attempts;
@@ -142,30 +629,127 @@ function majorityVote(attempts, expectedLength) {
142
629
  }
143
630
  const sameLenAttempts = filtered.filter((a) => a.length === bestLen);
144
631
  if (sameLenAttempts.length === 0) return filtered[0];
632
+ const useGroups = groups && typeof groups === "object" ? groups : void 0;
145
633
  const result = [];
634
+ const rankedByPos = [];
146
635
  for (let pos = 0; pos < bestLen; pos++) {
147
636
  const charCounts = /* @__PURE__ */ new Map();
148
637
  for (const a of sameLenAttempts) {
149
638
  const ch = a[pos];
150
639
  charCounts.set(ch, (charCounts.get(ch) ?? 0) + 1);
151
640
  }
152
- const groupCounts = /* @__PURE__ */ new Map();
153
- for (const [ch, count] of charCounts) {
154
- const canonical = CONFUSION_GROUPS[ch] ?? ch;
155
- groupCounts.set(canonical, (groupCounts.get(canonical) ?? 0) + count);
156
- }
157
- let bestGroup = "";
158
- let bestGroupCount = 0;
159
- for (const [canonical, count] of groupCounts) {
160
- if (count > bestGroupCount) {
161
- bestGroup = canonical;
162
- bestGroupCount = count;
641
+ if (useGroups) {
642
+ const groupCounts = /* @__PURE__ */ new Map();
643
+ for (const [ch, count] of charCounts) {
644
+ const canonical = useGroups[ch] ?? ch;
645
+ groupCounts.set(canonical, (groupCounts.get(canonical) ?? 0) + count);
646
+ }
647
+ rankedByPos.push(groupCounts);
648
+ let bestGroup = "";
649
+ let bestGroupCount = 0;
650
+ for (const [canonical, count] of groupCounts) {
651
+ if (count > bestGroupCount) {
652
+ bestGroup = canonical;
653
+ bestGroupCount = count;
654
+ }
655
+ }
656
+ result.push(bestGroup);
657
+ } else {
658
+ rankedByPos.push(charCounts);
659
+ let bestChar = "";
660
+ let bestCharCount = 0;
661
+ for (const [ch, count] of charCounts) {
662
+ if (count > bestCharCount) {
663
+ bestChar = ch;
664
+ bestCharCount = count;
665
+ }
666
+ }
667
+ result.push(bestChar);
668
+ }
669
+ }
670
+ if (bestLen >= 4) {
671
+ const charFreq = /* @__PURE__ */ new Map();
672
+ for (const ch of result) {
673
+ charFreq.set(ch, (charFreq.get(ch) ?? 0) + 1);
674
+ }
675
+ for (const [ch, freq] of charFreq) {
676
+ if (freq < 3) continue;
677
+ let strongestPos = -1;
678
+ let strongestCount = 0;
679
+ for (let pos = 0; pos < bestLen; pos++) {
680
+ if (result[pos] !== ch) continue;
681
+ const count = rankedByPos[pos].get(ch) ?? 0;
682
+ if (count > strongestCount) {
683
+ strongestCount = count;
684
+ strongestPos = pos;
685
+ }
686
+ }
687
+ for (let pos = 0; pos < bestLen; pos++) {
688
+ if (result[pos] !== ch || pos === strongestPos) continue;
689
+ const ranked = rankedByPos[pos];
690
+ const usedChars = new Set(result);
691
+ let bestUnique = "";
692
+ let bestUniqueCount = 0;
693
+ let bestAny = "";
694
+ let bestAnyCount = 0;
695
+ for (const [c, count] of ranked) {
696
+ if (c === ch) continue;
697
+ if (count > bestAnyCount) {
698
+ bestAny = c;
699
+ bestAnyCount = count;
700
+ }
701
+ if (!usedChars.has(c) && count > bestUniqueCount) {
702
+ bestUnique = c;
703
+ bestUniqueCount = count;
704
+ }
705
+ }
706
+ const sub = bestUniqueCount >= 2 ? bestUnique : bestAnyCount >= 2 ? bestAny : "";
707
+ if (sub) {
708
+ result[pos] = sub;
709
+ }
163
710
  }
164
711
  }
165
- result.push(bestGroup);
166
712
  }
167
713
  return result.join("");
168
714
  }
715
+ function majorityVoteDetailed(attempts, expectedLength, groups) {
716
+ let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;
717
+ if (filtered.length === 0) filtered = attempts;
718
+ if (filtered.length === 0) return { result: [], rankedByPos: [] };
719
+ const lenCounts = /* @__PURE__ */ new Map();
720
+ for (const a of filtered) lenCounts.set(a.length, (lenCounts.get(a.length) ?? 0) + 1);
721
+ let bestLen = 0;
722
+ let bestCount = 0;
723
+ for (const [len, count] of lenCounts) {
724
+ if (count > bestCount) {
725
+ bestLen = len;
726
+ bestCount = count;
727
+ }
728
+ }
729
+ const sameLenAttempts = filtered.filter((a) => a.length === bestLen);
730
+ if (sameLenAttempts.length === 0) return { result: [...filtered[0]], rankedByPos: [] };
731
+ const useGroups = groups && typeof groups === "object" ? groups : void 0;
732
+ const result = [];
733
+ const rankedByPos = [];
734
+ for (let pos = 0; pos < bestLen; pos++) {
735
+ const counts = /* @__PURE__ */ new Map();
736
+ for (const a of sameLenAttempts) {
737
+ const ch = useGroups ? useGroups[a[pos]] ?? a[pos] : a[pos];
738
+ counts.set(ch, (counts.get(ch) ?? 0) + 1);
739
+ }
740
+ rankedByPos.push(counts);
741
+ let bestChar = "";
742
+ let bestCharCount = 0;
743
+ for (const [ch, count] of counts) {
744
+ if (count > bestCharCount) {
745
+ bestChar = ch;
746
+ bestCharCount = count;
747
+ }
748
+ }
749
+ result.push(bestChar);
750
+ }
751
+ return { result, rankedByPos };
752
+ }
169
753
  function sumOptional(a, b) {
170
754
  if (a === void 0 && b === void 0) return void 0;
171
755
  return (a ?? 0) + (b ?? 0);
@@ -258,30 +842,212 @@ var Solver = class {
258
842
  * @returns Solved text, per-attempt answers, and token usage
259
843
  */
260
844
  async solve(input, options = {}) {
261
- const { numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;
845
+ const {
846
+ numAttempts = 9,
847
+ expectedLength,
848
+ maxRetries = 2,
849
+ verbose = true,
850
+ confusionGroups = false,
851
+ preprocess,
852
+ useTesseract = true,
853
+ useDisambiguation = true
854
+ } = options;
262
855
  const model = await this.getModel();
263
- const imageBuffer = await preprocessCaptchaToBuffer(input);
264
- const results = await Promise.all(
265
- Array.from({ length: numAttempts }, () => this.singleAttempt(model, imageBuffer, maxRetries))
266
- );
267
- const valid = results.filter((r) => r !== null);
856
+ const [enhancedBuffer, heavyCleanBuffer, mediumCleanBuffer] = await Promise.all([
857
+ preprocessCaptchaToBuffer(input, preprocess),
858
+ preprocessCaptchaToBuffer(input, {
859
+ blur: 0,
860
+ greyscale: true,
861
+ scale: 8,
862
+ upscaleKernel: "nearest",
863
+ postBlur: 15,
864
+ normalise: true,
865
+ contrast: 1,
866
+ sharpen: false,
867
+ threshold: 140,
868
+ negate: true,
869
+ crop: "none",
870
+ targetWidth: 800,
871
+ padding: 20
872
+ }),
873
+ preprocessCaptchaToBuffer(input, {
874
+ blur: 0,
875
+ greyscale: true,
876
+ scale: 8,
877
+ upscaleKernel: "nearest",
878
+ postBlur: 8,
879
+ normalise: true,
880
+ contrast: 1,
881
+ sharpen: false,
882
+ threshold: 120,
883
+ negate: true,
884
+ crop: "none",
885
+ targetWidth: 800,
886
+ padding: 20
887
+ })
888
+ ]);
889
+ const halfN = Math.ceil(numAttempts / 2);
890
+ const visionResults = await Promise.all([
891
+ ...Array.from(
892
+ { length: halfN },
893
+ () => this.singleAttempt(model, enhancedBuffer, heavyCleanBuffer, maxRetries)
894
+ ),
895
+ ...Array.from(
896
+ { length: numAttempts - halfN },
897
+ () => this.singleAttempt(model, enhancedBuffer, mediumCleanBuffer, maxRetries)
898
+ )
899
+ ]);
900
+ const valid = visionResults.filter((r) => r !== null);
268
901
  if (verbose) {
269
902
  valid.forEach((r, i) => console.log(` Attempt ${i + 1}: ${r.text}`));
270
903
  }
271
904
  const attempts = valid.map((r) => r.text);
272
905
  const attemptUsages = valid.map((r) => r.usage);
906
+ if (useTesseract) {
907
+ try {
908
+ const reader = await this.getTesseractReader();
909
+ if (reader) {
910
+ const { TESSERACT_VARIANTS: TESSERACT_VARIANTS2 } = await Promise.resolve().then(() => (init_tesseract(), tesseract_exports));
911
+ const tessReads = await reader.recognizeMulti(input, TESSERACT_VARIANTS2);
912
+ for (const read of tessReads) {
913
+ attempts.push(read);
914
+ if (verbose) console.log(` Tesseract: ${read}`);
915
+ }
916
+ }
917
+ } catch {
918
+ }
919
+ }
920
+ const correctionAttempts = Math.min(3, Math.floor(numAttempts / 3));
921
+ if (correctionAttempts > 0 && attempts.length > 0) {
922
+ const initialVote = majorityVote(attempts, expectedLength, confusionGroups);
923
+ const suspiciousCount = [...initialVote].filter((c) => c === "2" || c === "Z").length;
924
+ if (suspiciousCount >= 2 && initialVote.length === (expectedLength ?? initialVote.length)) {
925
+ const corrPrompt = this.buildCorrectionPrompt(initialVote);
926
+ if (corrPrompt) {
927
+ const corrections = await Promise.all(
928
+ Array.from(
929
+ { length: correctionAttempts },
930
+ () => this.selfCorrect(model, enhancedBuffer, heavyCleanBuffer, initialVote, corrPrompt)
931
+ )
932
+ );
933
+ for (const c of corrections) {
934
+ if (c) {
935
+ for (let w = 0; w < 5; w++) attempts.push(c.text);
936
+ if (verbose) console.log(` Corrected: ${c.text}`);
937
+ }
938
+ }
939
+ }
940
+ }
941
+ }
273
942
  const usage = aggregateUsage(attemptUsages);
274
943
  if (attempts.length === 0) {
275
944
  if (verbose) console.log(" All attempts failed!");
276
945
  return { text: "", attempts, usage, attemptUsages };
277
946
  }
278
- return { text: majorityVote(attempts, expectedLength), attempts, usage, attemptUsages };
947
+ const { result, rankedByPos } = majorityVoteDetailed(attempts, expectedLength, confusionGroups);
948
+ if (useDisambiguation && result.length > 0 && rankedByPos.length > 0) {
949
+ try {
950
+ await disambiguateResult(result, rankedByPos, heavyCleanBuffer);
951
+ const lightCleanBuffer = await preprocessCaptchaToBuffer(input, {
952
+ median: 3,
953
+ blur: 0,
954
+ greyscale: true,
955
+ scale: 4,
956
+ postBlur: 3,
957
+ normalise: true,
958
+ contrast: 1,
959
+ sharpen: false,
960
+ threshold: 128,
961
+ crop: "none",
962
+ padding: 20
963
+ });
964
+ await disambiguateResult(result, rankedByPos, lightCleanBuffer);
965
+ } catch {
966
+ }
967
+ }
968
+ const finalText = majorityVote(
969
+ [...attempts, result.join("")],
970
+ // include disambiguated result as an extra "vote"
971
+ expectedLength,
972
+ confusionGroups
973
+ );
974
+ return {
975
+ text: finalText,
976
+ attempts,
977
+ usage,
978
+ attemptUsages
979
+ };
980
+ }
981
+ _tesseractReader = void 0;
982
+ async getTesseractReader() {
983
+ if (this._tesseractReader !== void 0) return this._tesseractReader;
984
+ try {
985
+ const { createTesseractReader: createTesseractReader2 } = await Promise.resolve().then(() => (init_tesseract(), tesseract_exports));
986
+ this._tesseractReader = await createTesseractReader2();
987
+ } catch {
988
+ this._tesseractReader = null;
989
+ }
990
+ return this._tesseractReader;
991
+ }
992
+ /** Clean up resources (Tesseract worker). */
993
+ async dispose() {
994
+ if (this._tesseractReader) {
995
+ await this._tesseractReader.dispose();
996
+ this._tesseractReader = null;
997
+ }
998
+ }
999
+ buildCorrectionPrompt(initial) {
1000
+ const checks = [...initial].map((c, pos) => {
1001
+ if (c !== "2" && c !== "Z") return null;
1002
+ if (pos === 0)
1003
+ return `Pos ${pos + 1} ("${c}"): thin stroke \u2192 "1"? closed loop at bottom \u2192 "6"? vertical+foot \u2192 "L"?`;
1004
+ if (pos < initial.length - 1)
1005
+ return `Pos ${pos + 1} ("${c}"): vertical + horizontal foot \u2192 "L"? thin stroke \u2192 "1"? loop \u2192 "6"?`;
1006
+ return `Pos ${pos + 1} ("${c}"): curved top \u2192 keep "2"; straight angles \u2192 "Z"`;
1007
+ }).filter(Boolean);
1008
+ if (!checks.length) return null;
1009
+ const prefix = [...initial].filter((c) => c === "2" || c === "Z").length >= 3 ? `"${initial}" has many similar chars \u2014 unusual for a captcha.
1010
+ ` : "";
1011
+ return `${prefix}Recheck:
1012
+ ${checks.join("\n")}
1013
+ Only change with clear evidence. Output ONLY the corrected 4 characters.`;
1014
+ }
1015
+ async selfCorrect(model, primaryBuffer, secondaryBuffer, initial, correctionPrompt) {
1016
+ try {
1017
+ const { text } = await (0, import_ai.generateText)({
1018
+ model,
1019
+ messages: [
1020
+ {
1021
+ role: "user",
1022
+ content: [
1023
+ { type: "text", text: PROMPT },
1024
+ { type: "image", image: primaryBuffer },
1025
+ { type: "image", image: secondaryBuffer }
1026
+ ]
1027
+ },
1028
+ { role: "assistant", content: initial },
1029
+ {
1030
+ role: "user",
1031
+ content: [
1032
+ { type: "text", text: correctionPrompt },
1033
+ { type: "image", image: primaryBuffer }
1034
+ ]
1035
+ }
1036
+ ],
1037
+ temperature: 0.3,
1038
+ maxOutputTokens: 32
1039
+ });
1040
+ const cleaned = text.trim().replace(/[^A-Za-z0-9]/g, "").toUpperCase();
1041
+ return cleaned.length >= 2 && cleaned.length <= 8 ? { text: cleaned } : null;
1042
+ } catch {
1043
+ return null;
1044
+ }
279
1045
  }
280
1046
  /**
281
1047
  * Make a single API call to read the captcha.
282
1048
  * Retries up to `maxRetries` times on failure.
283
1049
  */
284
- async singleAttempt(model, imageBuffer, maxRetries) {
1050
+ async singleAttempt(model, primaryBuffer, secondaryBuffer, maxRetries) {
285
1051
  for (let retry = 0; retry <= maxRetries; retry++) {
286
1052
  try {
287
1053
  const { text, usage } = await (0, import_ai.generateText)({
@@ -291,7 +1057,8 @@ var Solver = class {
291
1057
  role: "user",
292
1058
  content: [
293
1059
  { type: "text", text: PROMPT },
294
- { type: "image", image: imageBuffer }
1060
+ { type: "image", image: primaryBuffer },
1061
+ { type: "image", image: secondaryBuffer }
295
1062
  ]
296
1063
  }
297
1064
  ],
@@ -300,11 +1067,29 @@ var Solver = class {
300
1067
  });
301
1068
  const raw = text.trim();
302
1069
  const lower = raw.toLowerCase();
303
- if (lower.includes("sorry") || lower.includes("can't help") || lower.includes("cannot help") || lower.includes("unable to") || lower.includes("i can't") || raw.length > 20) {
1070
+ if (lower.includes("sorry") || lower.includes("can't help") || lower.includes("cannot help") || lower.includes("unable to") || lower.includes("i can't")) {
304
1071
  return null;
305
1072
  }
306
- const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, "");
307
- return cleaned ? { text: cleaned, usage } : null;
1073
+ let answer = "";
1074
+ const allAlpha = raw.replace(/[^A-Za-z0-9]/g, "").toUpperCase();
1075
+ if (allAlpha.length <= 10) {
1076
+ answer = allAlpha;
1077
+ } else {
1078
+ const lines = raw.split(/\n/).reverse();
1079
+ for (const line of lines) {
1080
+ const tokens = line.trim().split(/\s+/);
1081
+ for (let ti = tokens.length - 1; ti >= 0; ti--) {
1082
+ const clean = tokens[ti].replace(/[^A-Za-z0-9]/g, "").toUpperCase();
1083
+ if (clean.length >= 2 && clean.length <= 8) {
1084
+ answer = clean;
1085
+ break;
1086
+ }
1087
+ }
1088
+ if (answer) break;
1089
+ }
1090
+ if (!answer) answer = allAlpha.slice(-8);
1091
+ }
1092
+ return answer ? { text: answer, usage } : null;
308
1093
  } catch (_err) {
309
1094
  if (retry < maxRetries) {
310
1095
  await new Promise((r) => setTimeout(r, 1e3 * (retry + 1)));
@@ -316,10 +1101,21 @@ var Solver = class {
316
1101
  return null;
317
1102
  }
318
1103
  };
1104
+
1105
+ // src/index.ts
1106
+ init_preprocess();
1107
+ init_tesseract();
319
1108
  // Annotate the CommonJS export names for ESM import in node:
320
1109
  0 && (module.exports = {
1110
+ DITHER_CONFUSION_GROUPS,
1111
+ LEGACY_CONFUSION_GROUPS,
321
1112
  Solver,
1113
+ TESSERACT_VARIANTS,
1114
+ createTesseractReader,
1115
+ disambiguateResult,
322
1116
  imageToBase64,
1117
+ majorityVote,
1118
+ majorityVoteDetailed,
323
1119
  preprocessCaptcha,
324
1120
  preprocessCaptchaToBuffer
325
1121
  });