@yigitahmetsahin/captcha-solver 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -5,6 +5,9 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
6
  var __getProtoOf = Object.getPrototypeOf;
7
7
  var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __esm = (fn, res) => function __init() {
9
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
10
+ };
8
11
  var __export = (target, all) => {
9
12
  for (var name in all)
10
13
  __defProp(target, name, { get: all[name], enumerable: true });
@@ -27,59 +30,66 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
27
30
  ));
28
31
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
32
 
30
- // src/index.ts
31
- var index_exports = {};
32
- __export(index_exports, {
33
- LEGACY_CONFUSION_GROUPS: () => LEGACY_CONFUSION_GROUPS,
34
- Solver: () => Solver,
35
- imageToBase64: () => imageToBase64,
36
- majorityVote: () => majorityVote,
37
- preprocessCaptcha: () => preprocessCaptcha,
38
- preprocessCaptchaToBuffer: () => preprocessCaptchaToBuffer
39
- });
40
- module.exports = __toCommonJS(index_exports);
41
-
42
- // src/solver.ts
43
- var import_ai = require("ai");
44
-
45
33
  // src/preprocess.ts
46
- var import_fs = __toESM(require("fs"), 1);
47
- var import_path = __toESM(require("path"), 1);
48
- var import_sharp = __toESM(require("sharp"), 1);
49
- var LEGACY_CROP = { left: 0.1, top: 0.02, right: 0.9, bottom: 0.6 };
50
34
  async function preprocessCaptcha(input, options) {
51
35
  const buf = await preprocessCaptchaToBuffer(input, options);
52
36
  return buf.toString("base64");
53
37
  }
54
38
  async function preprocessCaptchaToBuffer(input, options) {
55
39
  const {
40
+ preCropHeight = 1,
41
+ median = 0,
56
42
  blur = 1.5,
43
+ greyscale = true,
57
44
  scale = 4,
45
+ upscaleKernel = "lanczos3",
46
+ postBlur = 0,
47
+ normalise = false,
58
48
  contrast = 3,
59
49
  sharpen = true,
60
- crop = "auto",
61
- padding = true,
50
+ threshold = false,
62
51
  negate = false,
63
- greyscale = true
52
+ crop = "auto",
53
+ padding = true
64
54
  } = options ?? {};
65
- const source = typeof input === "string" ? import_path.default.resolve(input) : input;
55
+ let source = typeof input === "string" ? import_path.default.resolve(input) : input;
66
56
  const metadata = await (0, import_sharp.default)(source).metadata();
67
57
  const origW = metadata.width;
68
- const origH = metadata.height;
58
+ let origH = metadata.height;
59
+ if (preCropHeight < 1 && preCropHeight > 0) {
60
+ const keepH = Math.floor(origH * preCropHeight);
61
+ source = await (0, import_sharp.default)(source).extract({ left: 0, top: 0, width: origW, height: keepH }).toBuffer();
62
+ origH = keepH;
63
+ }
69
64
  let pipeline = (0, import_sharp.default)(source);
65
+ if (median > 0) pipeline = pipeline.median(median);
70
66
  if (blur > 0) pipeline = pipeline.blur(blur);
71
67
  if (greyscale) pipeline = pipeline.greyscale();
72
68
  const smoothed = await pipeline.toBuffer();
73
- const upscaled = await (0, import_sharp.default)(smoothed).resize(origW * scale, origH * scale, { kernel: "lanczos3" }).toBuffer();
69
+ const upscaled = await (0, import_sharp.default)(smoothed).resize(origW * scale, origH * scale, { kernel: upscaleKernel }).toBuffer();
70
+ let postProcessed = upscaled;
71
+ if (postBlur > 0) {
72
+ postProcessed = await (0, import_sharp.default)(upscaled).blur(postBlur).toBuffer();
73
+ }
74
+ if (normalise) {
75
+ postProcessed = await (0, import_sharp.default)(postProcessed).normalise().toBuffer();
76
+ }
74
77
  let enhanced;
75
78
  if (contrast !== 1) {
76
- const stats = await (0, import_sharp.default)(upscaled).stats();
79
+ const stats = await (0, import_sharp.default)(postProcessed).stats();
77
80
  const mean = stats.channels[0].mean;
78
- let pipe = (0, import_sharp.default)(upscaled).linear(contrast, mean * (1 - contrast));
81
+ let pipe = (0, import_sharp.default)(postProcessed).linear(contrast, mean * (1 - contrast));
79
82
  if (sharpen) pipe = pipe.sharpen({ sigma: 1, m1: 2, m2: 1 });
80
83
  enhanced = await pipe.toBuffer();
81
84
  } else {
82
- enhanced = sharpen ? await (0, import_sharp.default)(upscaled).sharpen({ sigma: 1, m1: 2, m2: 1 }).toBuffer() : upscaled;
85
+ enhanced = sharpen ? await (0, import_sharp.default)(postProcessed).sharpen({ sigma: 1, m1: 2, m2: 1 }).toBuffer() : postProcessed;
86
+ }
87
+ if (threshold !== false && typeof threshold === "number") {
88
+ enhanced = await (0, import_sharp.default)(enhanced).threshold(threshold).toBuffer();
89
+ }
90
+ const targetWidth = options?.targetWidth;
91
+ if (targetWidth && targetWidth > 0) {
92
+ enhanced = await (0, import_sharp.default)(enhanced).resize(targetWidth, null, { kernel: "lanczos3" }).toBuffer();
83
93
  }
84
94
  let cropped;
85
95
  if (crop === "none") {
@@ -129,15 +139,423 @@ function imageToBase64(imagePath) {
129
139
  const buffer = import_fs.default.readFileSync(imagePath);
130
140
  return buffer.toString("base64");
131
141
  }
142
+ var import_fs, import_path, import_sharp, LEGACY_CROP;
143
+ var init_preprocess = __esm({
144
+ "src/preprocess.ts"() {
145
+ "use strict";
146
+ import_fs = __toESM(require("fs"), 1);
147
+ import_path = __toESM(require("path"), 1);
148
+ import_sharp = __toESM(require("sharp"), 1);
149
+ LEGACY_CROP = { left: 0.1, top: 0.02, right: 0.9, bottom: 0.6 };
150
+ }
151
+ });
152
+
153
+ // src/tesseract.ts
154
+ var tesseract_exports = {};
155
+ __export(tesseract_exports, {
156
+ TESSERACT_VARIANTS: () => TESSERACT_VARIANTS,
157
+ createTesseractReader: () => createTesseractReader
158
+ });
159
+ async function createTesseractReader() {
160
+ let createWorker;
161
+ try {
162
+ const tess = await import("tesseract.js");
163
+ createWorker = tess.createWorker;
164
+ } catch {
165
+ return null;
166
+ }
167
+ const worker = await createWorker("eng");
168
+ await worker.setParameters({
169
+ tessedit_char_whitelist: "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
170
+ tessedit_pageseg_mode: "7"
171
+ // PSM.SINGLE_LINE
172
+ });
173
+ return {
174
+ async recognize(image) {
175
+ const { data } = await worker.recognize(image);
176
+ return data.text.trim().replace(/[^A-Z0-9]/g, "");
177
+ },
178
+ async recognizeMulti(input, variants) {
179
+ const results = [];
180
+ for (const opts of variants) {
181
+ try {
182
+ const buf = await preprocessCaptchaToBuffer(input, opts);
183
+ const { data } = await worker.recognize(buf);
184
+ const clean = data.text.trim().replace(/[^A-Z0-9]/g, "");
185
+ if (clean.length >= 2 && clean.length <= 8) {
186
+ results.push(clean);
187
+ }
188
+ } catch {
189
+ }
190
+ }
191
+ return results;
192
+ },
193
+ async dispose() {
194
+ await worker.terminate();
195
+ }
196
+ };
197
+ }
198
+ var TESSERACT_VARIANTS;
199
+ var init_tesseract = __esm({
200
+ "src/tesseract.ts"() {
201
+ "use strict";
202
+ init_preprocess();
203
+ TESSERACT_VARIANTS = [
204
+ // Variant 1: standard enhanced
205
+ {
206
+ blur: 1.5,
207
+ greyscale: true,
208
+ scale: 4,
209
+ contrast: 3,
210
+ sharpen: true,
211
+ crop: "auto",
212
+ padding: true
213
+ },
214
+ // Variant 2: enhanced + negated
215
+ {
216
+ blur: 1.5,
217
+ greyscale: true,
218
+ scale: 4,
219
+ contrast: 3,
220
+ sharpen: true,
221
+ negate: true,
222
+ crop: "auto",
223
+ padding: true
224
+ }
225
+ ];
226
+ }
227
+ });
228
+
229
+ // src/index.ts
230
+ var index_exports = {};
231
+ __export(index_exports, {
232
+ DITHER_CONFUSION_GROUPS: () => DITHER_CONFUSION_GROUPS,
233
+ LEGACY_CONFUSION_GROUPS: () => LEGACY_CONFUSION_GROUPS,
234
+ Solver: () => Solver,
235
+ TESSERACT_VARIANTS: () => TESSERACT_VARIANTS,
236
+ createTesseractReader: () => createTesseractReader,
237
+ disambiguateResult: () => disambiguateResult,
238
+ imageToBase64: () => imageToBase64,
239
+ majorityVote: () => majorityVote,
240
+ majorityVoteDetailed: () => majorityVoteDetailed,
241
+ preprocessCaptcha: () => preprocessCaptcha,
242
+ preprocessCaptchaToBuffer: () => preprocessCaptchaToBuffer
243
+ });
244
+ module.exports = __toCommonJS(index_exports);
132
245
 
133
246
  // src/solver.ts
134
- var PROMPT = `You are an expert OCR assistant reading distorted text from a CAPTCHA image.
135
- Two versions of the same captcha are provided. Cross-reference both to determine the correct text.
136
- The text may contain uppercase letters (A-Z), lowercase letters (a-z), and/or digits (0-9).
137
- Pay close attention to:
138
- - Letter case: lowercase "e" has a horizontal bar inside, digit "0" does not. Lowercase "r" has a short descender, uppercase "T" has a flat top.
139
- - Similar shapes: "5" has a flat top + curved bottom, "S" is fully curved. "4" has an angled stroke, "A" has a pointed top. "6" has a closed bottom loop, "8" has two loops. "2" has a curved top + flat bottom, "Z" has all straight lines.
140
- Output ONLY the exact characters you read, preserving case. Nothing else.`;
247
+ var import_ai = require("ai");
248
+ init_preprocess();
249
+
250
+ // src/disambiguate.ts
251
+ var import_sharp2 = __toESM(require("sharp"), 1);
252
+ async function disambiguateResult(result, rankedByPos, binaryImage) {
253
+ const ambiguousPositions = [];
254
+ for (let pos = 0; pos < result.length; pos++) {
255
+ if (result[pos] !== "2" && result[pos] !== "Z") continue;
256
+ const ranked = rankedByPos[pos];
257
+ const hasAlt = (ranked.get("6") ?? 0) >= 1 || (ranked.get("L") ?? 0) >= 1 || (ranked.get("1") ?? 0) >= 1;
258
+ if (hasAlt) {
259
+ ambiguousPositions.push(pos);
260
+ continue;
261
+ }
262
+ const twoZCount = result.filter((c) => c === "2" || c === "Z").length;
263
+ if (twoZCount >= 3) {
264
+ ambiguousPositions.push(pos);
265
+ }
266
+ }
267
+ if (ambiguousPositions.length === 0) return;
268
+ const meta = await (0, import_sharp2.default)(binaryImage).metadata();
269
+ const fullW = meta.width;
270
+ const fullH = meta.height;
271
+ const cropTop = Math.floor(fullH * 0.12);
272
+ const cropH = Math.floor(fullH * 0.76);
273
+ const { data, info } = await (0, import_sharp2.default)(binaryImage).extract({ left: 0, top: cropTop, width: fullW, height: cropH }).greyscale().negate().raw().toBuffer({ resolveWithObject: true });
274
+ const w = info.width;
275
+ const h = info.height;
276
+ const pixels = new Uint8Array(data);
277
+ const regions = segmentCharacters(pixels, w, h, result.length);
278
+ if (!regions || regions.length !== result.length) return;
279
+ for (const pos of ambiguousPositions) {
280
+ const region = regions[pos];
281
+ const features = analyseCharacter(pixels, w, h, region);
282
+ const newChar = classifyFromFeatures(features, result[pos]);
283
+ if (newChar) {
284
+ result[pos] = newChar;
285
+ }
286
+ }
287
+ }
288
+ function segmentCharacters(pixels, w, h, expectedCount) {
289
+ const colDensity = new Float64Array(w);
290
+ for (let x = 0; x < w; x++) {
291
+ let count = 0;
292
+ for (let y = 0; y < h; y++) {
293
+ if (pixels[y * w + x] >= 128) count++;
294
+ }
295
+ colDensity[x] = count / h;
296
+ }
297
+ let contentLeft = 0;
298
+ let contentRight = w;
299
+ for (let x = 0; x < w; x++) {
300
+ if (colDensity[x] > 0.05) {
301
+ contentLeft = x;
302
+ break;
303
+ }
304
+ }
305
+ for (let x = w - 1; x >= 0; x--) {
306
+ if (colDensity[x] > 0.05) {
307
+ contentRight = x + 1;
308
+ break;
309
+ }
310
+ }
311
+ const smoothW = 15;
312
+ const smoothed = new Float64Array(w);
313
+ for (let x = contentLeft; x < contentRight; x++) {
314
+ let sum = 0;
315
+ let count = 0;
316
+ for (let dx = -smoothW; dx <= smoothW; dx++) {
317
+ const nx = x + dx;
318
+ if (nx >= contentLeft && nx < contentRight) {
319
+ sum += colDensity[nx];
320
+ count++;
321
+ }
322
+ }
323
+ smoothed[x] = sum / count;
324
+ }
325
+ const charWidth = (contentRight - contentLeft) / expectedCount;
326
+ const margin = Math.floor(charWidth * 0.6);
327
+ const searchLeft = contentLeft + margin;
328
+ const searchRight = contentRight - margin;
329
+ const valleys = [];
330
+ for (let x = searchLeft + 1; x < searchRight - 1; x++) {
331
+ if (smoothed[x] <= smoothed[x - 1] && smoothed[x] <= smoothed[x + 1]) {
332
+ const leftMax = Math.max(...Array.from(smoothed.slice(Math.max(searchLeft, x - 40), x)));
333
+ const rightMax = Math.max(
334
+ ...Array.from(smoothed.slice(x + 1, Math.min(searchRight, x + 41)))
335
+ );
336
+ const depth = Math.min(leftMax, rightMax) - smoothed[x];
337
+ if (depth > 0.01) {
338
+ valleys.push({ x, depth });
339
+ }
340
+ }
341
+ }
342
+ valleys.sort((a, b) => b.depth - a.depth);
343
+ const splits = [];
344
+ const minDist = charWidth * 0.6;
345
+ for (const v of valleys) {
346
+ if (splits.length >= expectedCount - 1) break;
347
+ if (splits.every((s) => Math.abs(s - v.x) > minDist)) {
348
+ splits.push(v.x);
349
+ }
350
+ }
351
+ if (splits.length < expectedCount - 1) {
352
+ const step = (contentRight - contentLeft) / expectedCount;
353
+ splits.length = 0;
354
+ for (let i = 1; i < expectedCount; i++) {
355
+ splits.push(Math.floor(contentLeft + step * i));
356
+ }
357
+ }
358
+ splits.sort((a, b) => a - b);
359
+ const boundaries = [contentLeft, ...splits, contentRight];
360
+ return boundaries.slice(0, expectedCount).map((start, idx) => {
361
+ const end = boundaries[idx + 1];
362
+ let top = h;
363
+ let bottom = 0;
364
+ for (let y = 0; y < h; y++) {
365
+ for (let x = start; x < end; x++) {
366
+ if (pixels[y * w + x] >= 128) {
367
+ if (y < top) top = y;
368
+ if (y > bottom) bottom = y;
369
+ }
370
+ }
371
+ }
372
+ return { left: start, right: end, top: Math.max(0, top), bottom: Math.min(h, bottom + 1) };
373
+ });
374
+ }
375
+ function detectHoles(pixels, imgW, region) {
376
+ const rw = region.right - region.left;
377
+ const rh = region.bottom - region.top;
378
+ if (rw < 3 || rh < 3) return { count: 0, hasBottom: false, hasTop: false };
379
+ const grid = new Uint8Array(rw * rh);
380
+ for (let ly = 0; ly < rh; ly++) {
381
+ for (let lx = 0; lx < rw; lx++) {
382
+ const px = pixels[(region.top + ly) * imgW + (region.left + lx)];
383
+ grid[ly * rw + lx] = px >= 128 ? 1 : 0;
384
+ }
385
+ }
386
+ const visited = new Uint8Array(rw * rh);
387
+ const queue = [];
388
+ for (let lx = 0; lx < rw; lx++) {
389
+ if (grid[lx] === 0 && !visited[lx]) {
390
+ visited[lx] = 1;
391
+ queue.push(lx);
392
+ }
393
+ const bottom = (rh - 1) * rw + lx;
394
+ if (grid[bottom] === 0 && !visited[bottom]) {
395
+ visited[bottom] = 1;
396
+ queue.push(bottom);
397
+ }
398
+ }
399
+ for (let ly = 0; ly < rh; ly++) {
400
+ const left = ly * rw;
401
+ if (grid[left] === 0 && !visited[left]) {
402
+ visited[left] = 1;
403
+ queue.push(left);
404
+ }
405
+ const right = ly * rw + rw - 1;
406
+ if (grid[right] === 0 && !visited[right]) {
407
+ visited[right] = 1;
408
+ queue.push(right);
409
+ }
410
+ }
411
+ let qi = 0;
412
+ while (qi < queue.length) {
413
+ const idx = queue[qi++];
414
+ const lx = idx % rw;
415
+ const ly = Math.floor(idx / rw);
416
+ for (const [dx, dy] of [
417
+ [0, 1],
418
+ [0, -1],
419
+ [1, 0],
420
+ [-1, 0]
421
+ ]) {
422
+ const nx = lx + dx;
423
+ const ny = ly + dy;
424
+ if (nx < 0 || nx >= rw || ny < 0 || ny >= rh) continue;
425
+ const ni = ny * rw + nx;
426
+ if (!visited[ni] && grid[ni] === 0) {
427
+ visited[ni] = 1;
428
+ queue.push(ni);
429
+ }
430
+ }
431
+ }
432
+ let holeCount = 0;
433
+ let hasBottom = false;
434
+ let hasTop = false;
435
+ const midY = rh / 2;
436
+ for (let ly = 0; ly < rh; ly++) {
437
+ for (let lx = 0; lx < rw; lx++) {
438
+ const idx = ly * rw + lx;
439
+ if (grid[idx] === 0 && !visited[idx]) {
440
+ const holeQueue = [idx];
441
+ visited[idx] = 1;
442
+ let hi = 0;
443
+ let area = 0;
444
+ let sumY = 0;
445
+ while (hi < holeQueue.length) {
446
+ const hidx = holeQueue[hi++];
447
+ area++;
448
+ sumY += Math.floor(hidx / rw);
449
+ const hx = hidx % rw;
450
+ const hy = Math.floor(hidx / rw);
451
+ for (const [dx, dy] of [
452
+ [0, 1],
453
+ [0, -1],
454
+ [1, 0],
455
+ [-1, 0]
456
+ ]) {
457
+ const hnx = hx + dx;
458
+ const hny = hy + dy;
459
+ if (hnx < 0 || hnx >= rw || hny < 0 || hny >= rh) continue;
460
+ const hni = hny * rw + hnx;
461
+ if (!visited[hni] && grid[hni] === 0) {
462
+ visited[hni] = 1;
463
+ holeQueue.push(hni);
464
+ }
465
+ }
466
+ }
467
+ const charArea = rw * rh;
468
+ if (area > charArea * 5e-3) {
469
+ holeCount++;
470
+ const avgY = sumY / area;
471
+ if (avgY >= midY) hasBottom = true;
472
+ else hasTop = true;
473
+ }
474
+ }
475
+ }
476
+ }
477
+ return { count: holeCount, hasBottom, hasTop };
478
+ }
479
+ function analyseCharacter(pixels, imgW, _imgH, region) {
480
+ const rw = region.right - region.left;
481
+ const rh = region.bottom - region.top;
482
+ const holes = detectHoles(pixels, imgW, region);
483
+ const aspectRatio = rh / Math.max(rw, 1);
484
+ const quarterH = Math.max(3, Math.floor(rh * 0.25));
485
+ let topMinX = rw, topMaxX = 0, botMinX = rw, botMaxX = 0;
486
+ for (let lx = 0; lx < rw; lx++) {
487
+ for (let ly = 0; ly < quarterH; ly++) {
488
+ if (pixels[(region.top + ly) * imgW + (region.left + lx)] >= 128) {
489
+ if (lx < topMinX) topMinX = lx;
490
+ if (lx > topMaxX) topMaxX = lx;
491
+ }
492
+ }
493
+ for (let ly = rh - quarterH; ly < rh; ly++) {
494
+ if (pixels[(region.top + ly) * imgW + (region.left + lx)] >= 128) {
495
+ if (lx < botMinX) botMinX = lx;
496
+ if (lx > botMaxX) botMaxX = lx;
497
+ }
498
+ }
499
+ }
500
+ const topWidth = topMaxX > topMinX ? (topMaxX - topMinX) / rw : 0;
501
+ const bottomWidth = botMaxX > botMinX ? (botMaxX - botMinX) / rw : 0;
502
+ const bottomHorizontalExtent = bottomWidth;
503
+ const topHorizontalExtent = topWidth;
504
+ const topQuarterH = Math.max(3, Math.floor(rh * 0.25));
505
+ const rightHalf = Math.floor(rw / 2);
506
+ let topRightDark = 0;
507
+ let topRightTotal = 0;
508
+ for (let ly = 0; ly < topQuarterH; ly++) {
509
+ for (let lx = rightHalf; lx < rw; lx++) {
510
+ topRightTotal++;
511
+ if (pixels[(region.top + ly) * imgW + (region.left + lx)] >= 128) {
512
+ topRightDark++;
513
+ }
514
+ }
515
+ }
516
+ const topCurvature = topRightTotal > 0 && topRightDark / topRightTotal > 0.15;
517
+ return {
518
+ hasHoleBottom: holes.hasBottom,
519
+ hasHoleTop: holes.hasTop,
520
+ holeCount: holes.count,
521
+ aspectRatio,
522
+ bottomHorizontalExtent,
523
+ topHorizontalExtent,
524
+ topCurvature
525
+ };
526
+ }
527
+ function classifyFromFeatures(features, _votedChar) {
528
+ if (features.hasHoleBottom && !features.hasHoleTop) {
529
+ return "6";
530
+ }
531
+ if (features.holeCount >= 2) {
532
+ return "8";
533
+ }
534
+ if (features.hasHoleTop && !features.hasHoleBottom) {
535
+ return null;
536
+ }
537
+ if (features.holeCount === 0 && features.aspectRatio > 1.8 && !features.topCurvature) {
538
+ return "1";
539
+ }
540
+ if (features.holeCount === 0 && features.bottomHorizontalExtent > 0.5 && features.bottomHorizontalExtent > features.topHorizontalExtent * 1.15 && features.aspectRatio > 0.8) {
541
+ return "L";
542
+ }
543
+ return null;
544
+ }
545
+
546
+ // src/solver.ts
547
+ var PROMPT = `Read the 4 distorted characters in these images. Two processed versions shown.
548
+ The text uses UPPERCASE A-Z and digits 0-9 only. No lowercase.
549
+
550
+ WARNING: The dithered rendering makes many characters appear as "2". Before writing "2", check:
551
+ - Could it be "6"? (has closed loop at bottom)
552
+ - Could it be "L"? (has vertical stem + horizontal foot, 90\xB0 corner)
553
+ - Could it be "1"? (thin vertical stroke, no curve)
554
+ - Could it be "Z"? (all straight lines, sharp angles)
555
+
556
+ Also watch for: O/0 have curved sides (not D which has flat left); B has two bumps (not D with one curve); X is two crossing diagonals (not K with vertical bar); G has horizontal bar inside (not C).
557
+
558
+ Output ONLY the 4 characters.`;
141
559
  var DEFAULT_MODELS = {
142
560
  openai: "gpt-4o",
143
561
  anthropic: "claude-sonnet-4-20250514",
@@ -175,6 +593,22 @@ var LEGACY_CONFUSION_GROUPS = {
175
593
  Z: "Z",
176
594
  "2": "Z"
177
595
  };
596
+ var DITHER_CONFUSION_GROUPS = {
597
+ D: "O",
598
+ O: "O",
599
+ I: "1",
600
+ "1": "1",
601
+ K: "X",
602
+ X: "X",
603
+ A: "X",
604
+ C: "G",
605
+ G: "G",
606
+ "9": "8",
607
+ "8": "8",
608
+ Y: "X",
609
+ E: "5",
610
+ "5": "5"
611
+ };
178
612
  function majorityVote(attempts, expectedLength, groups) {
179
613
  let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;
180
614
  if (filtered.length === 0) {
@@ -197,6 +631,7 @@ function majorityVote(attempts, expectedLength, groups) {
197
631
  if (sameLenAttempts.length === 0) return filtered[0];
198
632
  const useGroups = groups && typeof groups === "object" ? groups : void 0;
199
633
  const result = [];
634
+ const rankedByPos = [];
200
635
  for (let pos = 0; pos < bestLen; pos++) {
201
636
  const charCounts = /* @__PURE__ */ new Map();
202
637
  for (const a of sameLenAttempts) {
@@ -209,6 +644,7 @@ function majorityVote(attempts, expectedLength, groups) {
209
644
  const canonical = useGroups[ch] ?? ch;
210
645
  groupCounts.set(canonical, (groupCounts.get(canonical) ?? 0) + count);
211
646
  }
647
+ rankedByPos.push(groupCounts);
212
648
  let bestGroup = "";
213
649
  let bestGroupCount = 0;
214
650
  for (const [canonical, count] of groupCounts) {
@@ -219,6 +655,7 @@ function majorityVote(attempts, expectedLength, groups) {
219
655
  }
220
656
  result.push(bestGroup);
221
657
  } else {
658
+ rankedByPos.push(charCounts);
222
659
  let bestChar = "";
223
660
  let bestCharCount = 0;
224
661
  for (const [ch, count] of charCounts) {
@@ -230,8 +667,89 @@ function majorityVote(attempts, expectedLength, groups) {
230
667
  result.push(bestChar);
231
668
  }
232
669
  }
670
+ if (bestLen >= 4) {
671
+ const charFreq = /* @__PURE__ */ new Map();
672
+ for (const ch of result) {
673
+ charFreq.set(ch, (charFreq.get(ch) ?? 0) + 1);
674
+ }
675
+ for (const [ch, freq] of charFreq) {
676
+ if (freq < 3) continue;
677
+ let strongestPos = -1;
678
+ let strongestCount = 0;
679
+ for (let pos = 0; pos < bestLen; pos++) {
680
+ if (result[pos] !== ch) continue;
681
+ const count = rankedByPos[pos].get(ch) ?? 0;
682
+ if (count > strongestCount) {
683
+ strongestCount = count;
684
+ strongestPos = pos;
685
+ }
686
+ }
687
+ for (let pos = 0; pos < bestLen; pos++) {
688
+ if (result[pos] !== ch || pos === strongestPos) continue;
689
+ const ranked = rankedByPos[pos];
690
+ const usedChars = new Set(result);
691
+ let bestUnique = "";
692
+ let bestUniqueCount = 0;
693
+ let bestAny = "";
694
+ let bestAnyCount = 0;
695
+ for (const [c, count] of ranked) {
696
+ if (c === ch) continue;
697
+ if (count > bestAnyCount) {
698
+ bestAny = c;
699
+ bestAnyCount = count;
700
+ }
701
+ if (!usedChars.has(c) && count > bestUniqueCount) {
702
+ bestUnique = c;
703
+ bestUniqueCount = count;
704
+ }
705
+ }
706
+ const sub = bestUniqueCount >= 2 ? bestUnique : bestAnyCount >= 2 ? bestAny : "";
707
+ if (sub) {
708
+ result[pos] = sub;
709
+ }
710
+ }
711
+ }
712
+ }
233
713
  return result.join("");
234
714
  }
715
+ function majorityVoteDetailed(attempts, expectedLength, groups) {
716
+ let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;
717
+ if (filtered.length === 0) filtered = attempts;
718
+ if (filtered.length === 0) return { result: [], rankedByPos: [] };
719
+ const lenCounts = /* @__PURE__ */ new Map();
720
+ for (const a of filtered) lenCounts.set(a.length, (lenCounts.get(a.length) ?? 0) + 1);
721
+ let bestLen = 0;
722
+ let bestCount = 0;
723
+ for (const [len, count] of lenCounts) {
724
+ if (count > bestCount) {
725
+ bestLen = len;
726
+ bestCount = count;
727
+ }
728
+ }
729
+ const sameLenAttempts = filtered.filter((a) => a.length === bestLen);
730
+ if (sameLenAttempts.length === 0) return { result: [...filtered[0]], rankedByPos: [] };
731
+ const useGroups = groups && typeof groups === "object" ? groups : void 0;
732
+ const result = [];
733
+ const rankedByPos = [];
734
+ for (let pos = 0; pos < bestLen; pos++) {
735
+ const counts = /* @__PURE__ */ new Map();
736
+ for (const a of sameLenAttempts) {
737
+ const ch = useGroups ? useGroups[a[pos]] ?? a[pos] : a[pos];
738
+ counts.set(ch, (counts.get(ch) ?? 0) + 1);
739
+ }
740
+ rankedByPos.push(counts);
741
+ let bestChar = "";
742
+ let bestCharCount = 0;
743
+ for (const [ch, count] of counts) {
744
+ if (count > bestCharCount) {
745
+ bestChar = ch;
746
+ bestCharCount = count;
747
+ }
748
+ }
749
+ result.push(bestChar);
750
+ }
751
+ return { result, rankedByPos };
752
+ }
235
753
  function sumOptional(a, b) {
236
754
  if (a === void 0 && b === void 0) return void 0;
237
755
  return (a ?? 0) + (b ?? 0);
@@ -325,50 +843,206 @@ var Solver = class {
325
843
  */
326
844
  async solve(input, options = {}) {
327
845
  const {
328
- numAttempts = 7,
846
+ numAttempts = 9,
329
847
  expectedLength,
330
848
  maxRetries = 2,
331
849
  verbose = true,
332
850
  confusionGroups = false,
333
- preprocess
851
+ preprocess,
852
+ useTesseract = true,
853
+ useDisambiguation = true
334
854
  } = options;
335
855
  const model = await this.getModel();
336
- const [enhancedBuffer, colorBuffer] = await Promise.all([
856
+ const [enhancedBuffer, heavyCleanBuffer, mediumCleanBuffer] = await Promise.all([
337
857
  preprocessCaptchaToBuffer(input, preprocess),
338
858
  preprocessCaptchaToBuffer(input, {
339
859
  blur: 0,
340
- scale: 4,
860
+ greyscale: true,
861
+ scale: 8,
862
+ upscaleKernel: "nearest",
863
+ postBlur: 15,
864
+ normalise: true,
865
+ contrast: 1,
866
+ sharpen: false,
867
+ threshold: 140,
868
+ negate: true,
869
+ crop: "none",
870
+ targetWidth: 800,
871
+ padding: 20
872
+ }),
873
+ preprocessCaptchaToBuffer(input, {
874
+ blur: 0,
875
+ greyscale: true,
876
+ scale: 8,
877
+ upscaleKernel: "nearest",
878
+ postBlur: 8,
879
+ normalise: true,
341
880
  contrast: 1,
342
881
  sharpen: false,
882
+ threshold: 120,
883
+ negate: true,
343
884
  crop: "none",
344
- padding: 40,
345
- greyscale: false
885
+ targetWidth: 800,
886
+ padding: 20
346
887
  })
347
888
  ]);
348
- const results = await Promise.all(
349
- Array.from(
350
- { length: numAttempts },
351
- () => this.singleAttempt(model, enhancedBuffer, colorBuffer, maxRetries)
889
+ const halfN = Math.ceil(numAttempts / 2);
890
+ const visionResults = await Promise.all([
891
+ ...Array.from(
892
+ { length: halfN },
893
+ () => this.singleAttempt(model, enhancedBuffer, heavyCleanBuffer, maxRetries)
894
+ ),
895
+ ...Array.from(
896
+ { length: numAttempts - halfN },
897
+ () => this.singleAttempt(model, enhancedBuffer, mediumCleanBuffer, maxRetries)
352
898
  )
353
- );
354
- const valid = results.filter((r) => r !== null);
899
+ ]);
900
+ const valid = visionResults.filter((r) => r !== null);
355
901
  if (verbose) {
356
902
  valid.forEach((r, i) => console.log(` Attempt ${i + 1}: ${r.text}`));
357
903
  }
358
904
  const attempts = valid.map((r) => r.text);
359
905
  const attemptUsages = valid.map((r) => r.usage);
906
+ if (useTesseract) {
907
+ try {
908
+ const reader = await this.getTesseractReader();
909
+ if (reader) {
910
+ const { TESSERACT_VARIANTS: TESSERACT_VARIANTS2 } = await Promise.resolve().then(() => (init_tesseract(), tesseract_exports));
911
+ const tessReads = await reader.recognizeMulti(input, TESSERACT_VARIANTS2);
912
+ for (const read of tessReads) {
913
+ attempts.push(read);
914
+ if (verbose) console.log(` Tesseract: ${read}`);
915
+ }
916
+ }
917
+ } catch {
918
+ }
919
+ }
920
+ const correctionAttempts = Math.min(3, Math.floor(numAttempts / 3));
921
+ if (correctionAttempts > 0 && attempts.length > 0) {
922
+ const initialVote = majorityVote(attempts, expectedLength, confusionGroups);
923
+ const suspiciousCount = [...initialVote].filter((c) => c === "2" || c === "Z").length;
924
+ if (suspiciousCount >= 2 && initialVote.length === (expectedLength ?? initialVote.length)) {
925
+ const corrPrompt = this.buildCorrectionPrompt(initialVote);
926
+ if (corrPrompt) {
927
+ const corrections = await Promise.all(
928
+ Array.from(
929
+ { length: correctionAttempts },
930
+ () => this.selfCorrect(model, enhancedBuffer, heavyCleanBuffer, initialVote, corrPrompt)
931
+ )
932
+ );
933
+ for (const c of corrections) {
934
+ if (c) {
935
+ for (let w = 0; w < 5; w++) attempts.push(c.text);
936
+ if (verbose) console.log(` Corrected: ${c.text}`);
937
+ }
938
+ }
939
+ }
940
+ }
941
+ }
360
942
  const usage = aggregateUsage(attemptUsages);
361
943
  if (attempts.length === 0) {
362
944
  if (verbose) console.log(" All attempts failed!");
363
945
  return { text: "", attempts, usage, attemptUsages };
364
946
  }
947
+ const { result, rankedByPos } = majorityVoteDetailed(attempts, expectedLength, confusionGroups);
948
+ if (useDisambiguation && result.length > 0 && rankedByPos.length > 0) {
949
+ try {
950
+ await disambiguateResult(result, rankedByPos, heavyCleanBuffer);
951
+ const lightCleanBuffer = await preprocessCaptchaToBuffer(input, {
952
+ median: 3,
953
+ blur: 0,
954
+ greyscale: true,
955
+ scale: 4,
956
+ postBlur: 3,
957
+ normalise: true,
958
+ contrast: 1,
959
+ sharpen: false,
960
+ threshold: 128,
961
+ crop: "none",
962
+ padding: 20
963
+ });
964
+ await disambiguateResult(result, rankedByPos, lightCleanBuffer);
965
+ } catch {
966
+ }
967
+ }
968
+ const finalText = majorityVote(
969
+ [...attempts, result.join("")],
970
+ // include disambiguated result as an extra "vote"
971
+ expectedLength,
972
+ confusionGroups
973
+ );
365
974
  return {
366
- text: majorityVote(attempts, expectedLength, confusionGroups),
975
+ text: finalText,
367
976
  attempts,
368
977
  usage,
369
978
  attemptUsages
370
979
  };
371
980
  }
981
+ _tesseractReader = void 0;
982
+ async getTesseractReader() {
983
+ if (this._tesseractReader !== void 0) return this._tesseractReader;
984
+ try {
985
+ const { createTesseractReader: createTesseractReader2 } = await Promise.resolve().then(() => (init_tesseract(), tesseract_exports));
986
+ this._tesseractReader = await createTesseractReader2();
987
+ } catch {
988
+ this._tesseractReader = null;
989
+ }
990
+ return this._tesseractReader;
991
+ }
992
+ /** Clean up resources (Tesseract worker). */
993
+ async dispose() {
994
+ if (this._tesseractReader) {
995
+ await this._tesseractReader.dispose();
996
+ this._tesseractReader = null;
997
+ }
998
+ }
999
+ buildCorrectionPrompt(initial) {
1000
+ const checks = [...initial].map((c, pos) => {
1001
+ if (c !== "2" && c !== "Z") return null;
1002
+ if (pos === 0)
1003
+ return `Pos ${pos + 1} ("${c}"): thin stroke \u2192 "1"? closed loop at bottom \u2192 "6"? vertical+foot \u2192 "L"?`;
1004
+ if (pos < initial.length - 1)
1005
+ return `Pos ${pos + 1} ("${c}"): vertical + horizontal foot \u2192 "L"? thin stroke \u2192 "1"? loop \u2192 "6"?`;
1006
+ return `Pos ${pos + 1} ("${c}"): curved top \u2192 keep "2"; straight angles \u2192 "Z"`;
1007
+ }).filter(Boolean);
1008
+ if (!checks.length) return null;
1009
+ const prefix = [...initial].filter((c) => c === "2" || c === "Z").length >= 3 ? `"${initial}" has many similar chars \u2014 unusual for a captcha.
1010
+ ` : "";
1011
+ return `${prefix}Recheck:
1012
+ ${checks.join("\n")}
1013
+ Only change with clear evidence. Output ONLY the corrected 4 characters.`;
1014
+ }
1015
+ async selfCorrect(model, primaryBuffer, secondaryBuffer, initial, correctionPrompt) {
1016
+ try {
1017
+ const { text } = await (0, import_ai.generateText)({
1018
+ model,
1019
+ messages: [
1020
+ {
1021
+ role: "user",
1022
+ content: [
1023
+ { type: "text", text: PROMPT },
1024
+ { type: "image", image: primaryBuffer },
1025
+ { type: "image", image: secondaryBuffer }
1026
+ ]
1027
+ },
1028
+ { role: "assistant", content: initial },
1029
+ {
1030
+ role: "user",
1031
+ content: [
1032
+ { type: "text", text: correctionPrompt },
1033
+ { type: "image", image: primaryBuffer }
1034
+ ]
1035
+ }
1036
+ ],
1037
+ temperature: 0.3,
1038
+ maxOutputTokens: 32
1039
+ });
1040
+ const cleaned = text.trim().replace(/[^A-Za-z0-9]/g, "").toUpperCase();
1041
+ return cleaned.length >= 2 && cleaned.length <= 8 ? { text: cleaned } : null;
1042
+ } catch {
1043
+ return null;
1044
+ }
1045
+ }
372
1046
  /**
373
1047
  * Make a single API call to read the captcha.
374
1048
  * Retries up to `maxRetries` times on failure.
@@ -393,11 +1067,29 @@ var Solver = class {
393
1067
  });
394
1068
  const raw = text.trim();
395
1069
  const lower = raw.toLowerCase();
396
- if (lower.includes("sorry") || lower.includes("can't help") || lower.includes("cannot help") || lower.includes("unable to") || lower.includes("i can't") || raw.length > 20) {
1070
+ if (lower.includes("sorry") || lower.includes("can't help") || lower.includes("cannot help") || lower.includes("unable to") || lower.includes("i can't")) {
397
1071
  return null;
398
1072
  }
399
- const cleaned = raw.replace(/[^A-Za-z0-9]/g, "");
400
- return cleaned ? { text: cleaned, usage } : null;
1073
+ let answer = "";
1074
+ const allAlpha = raw.replace(/[^A-Za-z0-9]/g, "").toUpperCase();
1075
+ if (allAlpha.length <= 10) {
1076
+ answer = allAlpha;
1077
+ } else {
1078
+ const lines = raw.split(/\n/).reverse();
1079
+ for (const line of lines) {
1080
+ const tokens = line.trim().split(/\s+/);
1081
+ for (let ti = tokens.length - 1; ti >= 0; ti--) {
1082
+ const clean = tokens[ti].replace(/[^A-Za-z0-9]/g, "").toUpperCase();
1083
+ if (clean.length >= 2 && clean.length <= 8) {
1084
+ answer = clean;
1085
+ break;
1086
+ }
1087
+ }
1088
+ if (answer) break;
1089
+ }
1090
+ if (!answer) answer = allAlpha.slice(-8);
1091
+ }
1092
+ return answer ? { text: answer, usage } : null;
401
1093
  } catch (_err) {
402
1094
  if (retry < maxRetries) {
403
1095
  await new Promise((r) => setTimeout(r, 1e3 * (retry + 1)));
@@ -409,12 +1101,21 @@ var Solver = class {
409
1101
  return null;
410
1102
  }
411
1103
  };
1104
+
1105
+ // src/index.ts
1106
+ init_preprocess();
1107
+ init_tesseract();
412
1108
  // Annotate the CommonJS export names for ESM import in node:
413
1109
  0 && (module.exports = {
1110
+ DITHER_CONFUSION_GROUPS,
414
1111
  LEGACY_CONFUSION_GROUPS,
415
1112
  Solver,
1113
+ TESSERACT_VARIANTS,
1114
+ createTesseractReader,
1115
+ disambiguateResult,
416
1116
  imageToBase64,
417
1117
  majorityVote,
1118
+ majorityVoteDetailed,
418
1119
  preprocessCaptcha,
419
1120
  preprocessCaptchaToBuffer
420
1121
  });