npm - @yigitahmetsahin/captcha-solver - Versions diffs - 2.0.1 → 3.1.0 - Mend

@yigitahmetsahin/captcha-solver 2.0.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -1,6 +1,78 @@
 import { LanguageModelUsage, LanguageModel } from 'ai';
 export { LanguageModelUsage } from 'ai';
+interface CropFractions {
+    /** Fraction from left edge (0–1, default: 0.1) */
+    left: number;
+    /** Fraction from top edge (0–1, default: 0.02) */
+    top: number;
+    /** Fraction from left to keep (0–1, default: 0.9) */
+    right: number;
+    /** Fraction from top to keep (0–1, default: 0.6) */
+    bottom: number;
+}
+interface PreprocessOptions {
+    /**
+     * Fraction of image height to keep from the top, cropping the bottom (default: 1.0, no pre-crop).
+     * Useful for removing dark bands at the bottom of dithered captchas.
+     */
+    preCropHeight?: number;
+    /** Median filter size at original resolution before other processing (default: 0, off). Odd number. */
+    median?: number;
+    /** Gaussian blur radius (default: 1.5). Set to 0 to skip. */
+    blur?: number;
+    /** Convert to greyscale (default: true) */
+    greyscale?: boolean;
+    /** Upscale factor (default: 4) */
+    scale?: number;
+    /** Upscale interpolation kernel (default: 'lanczos3') */
+    upscaleKernel?: 'lanczos3' | 'nearest' | 'cubic' | 'mitchell';
+    /** Gaussian blur applied AFTER upscaling — use large values (10-20) for dither removal (default: 0, off) */
+    postBlur?: number;
+    /** Normalise (stretch histogram to full range) before contrast/threshold (default: false) */
+    normalise?: boolean;
+    /** Contrast multiplier around image mean (default: 3.0). Set to 1 to skip. */
+    contrast?: number;
+    /** Enable unsharp-mask sharpening (default: true) */
+    sharpen?: boolean;
+    /** Binary threshold (0-255). Applied after contrast. (default: false, off) */
+    threshold?: number | false;
+    /** Invert colors (negate) after processing (default: false) */
+    negate?: boolean;
+    /**
+     * Crop mode (default: 'auto'):
+     *  - 'auto'   – trim whitespace after contrast enhancement, with margin
+     *  - 'legacy' – fixed-percentage crop (original behavior)
+     *  - 'none'   – skip cropping
+     *  - CropFractions – custom crop percentages
+     */
+    crop?: 'auto' | 'legacy' | 'none' | CropFractions;
+    /** Add white padding around the result (default: true). Pass false to skip, or a number for custom px. */
+    padding?: boolean | number;
+    /** Resize final image to this width in pixels, maintaining aspect ratio (default: none). Useful for downscaling after high-res processing. */
+    targetWidth?: number;
+}
+/**
+ * Preprocess a captcha image and return a base64-encoded PNG string.
+ */
+declare function preprocessCaptcha(input: string | Buffer, options?: PreprocessOptions): Promise<string>;
+/**
+ * Preprocess a captcha image and return the resulting PNG as a raw Buffer.
+ *
+ * Pipeline:
+ *   1. Gaussian blur in color space (smooths dither pattern)
+ *   2. Grayscale conversion
+ *   3. Upscale with Lanczos
+ *   4. Contrast boost around image mean + sharpen
+ *   5. Crop (auto-detect, legacy fixed, none, or custom)
+ *   6. Add white padding
+ */
+declare function preprocessCaptchaToBuffer(input: string | Buffer, options?: PreprocessOptions): Promise<Buffer>;
+/**
+ * Read an image file and return its base64-encoded content.
+ */
+declare function imageToBase64(imagePath: string): string;
 type Provider = 'openai' | 'anthropic' | 'google';
 interface SolverOptions {
     /** AI provider to use when constructing the model from an API key (default: "openai") */
@@ -9,7 +81,7 @@ interface SolverOptions {
     model?: string;
 }
 interface SolveOptions {
-    /** Number of voting attempts (default: 5) */
+    /** Number of voting attempts (default: 7) */
     numAttempts?: number;
     /** Expected captcha length — results of other lengths are discarded */
     expectedLength?: number;
@@ -17,6 +89,19 @@ interface SolveOptions {
     maxRetries?: number;
     /** Whether to log attempt details (default: true) */
     verbose?: boolean;
+    /**
+     * Confusion groups for majority voting.
+     * Pass a Record<string, string> to merge visually similar characters,
+     * or `false` to disable (default: false).
+     * Use LEGACY_CONFUSION_GROUPS to restore pre-3.0 behavior.
+     */
+    confusionGroups?: Record<string, string> | false;
+    /** Preprocessing options passed to the image pipeline */
+    preprocess?: PreprocessOptions;
+    /** Use Tesseract OCR as an additional voter (default: true if tesseract.js is installed) */
+    useTesseract?: boolean;
+    /** Use programmatic hole-detection to disambiguate 2/6/L/1 (default: true) */
+    useDisambiguation?: boolean;
 }
 interface SolveResult {
     /** The solved captcha text (majority-voted) */
@@ -28,6 +113,38 @@ interface SolveResult {
     /** Per-attempt usage breakdown */
     attemptUsages: LanguageModelUsage[];
 }
+/**
+ * Pre-3.0 confusion groups that merge visually similar characters.
+ * Opt-in via `{ confusionGroups: LEGACY_CONFUSION_GROUPS }`.
+ *
+ * Maps: 1/I/L → '1', O/D/0 → 'O', S/5 → 'S', Z/2 → 'Z'
+ */
+declare const LEGACY_CONFUSION_GROUPS: Record<string, string>;
+/**
+ * Confusion groups optimised for dithered / halftone captchas.
+ * Vision models systematically misread certain characters in dithered rendering.
+ *
+ * Maps: D→'O', I→'1', K/A→'X', C→'G', 9→'8', Y→'X', E→'5'
+ */
+declare const DITHER_CONFUSION_GROUPS: Record<string, string>;
+/**
+ * Character-level majority vote across multiple attempts.
+ * When `groups` is provided, visually similar characters are merged
+ * during counting (e.g. 1/I/L all count toward '1').
+ *
+ * After voting, a repetition penalty is applied: if any character appears
+ * 3+ times in the result (unlikely in real captchas), positions with that
+ * character are reconsidered using the next-best alternative.
+ */
+declare function majorityVote(attempts: string[], expectedLength?: number, groups?: Record<string, string> | false): string;
+/**
+ * Raw character-level majority vote WITHOUT repetition penalty.
+ * Returns per-position vote maps for disambiguation.
+ */
+declare function majorityVoteDetailed(attempts: string[], expectedLength?: number, groups?: Record<string, string> | false): {
+    result: string[];
+    rankedByPos: Map<string, number>[];
+};
 declare class Solver {
     private _model;
     private _pendingModel;
@@ -58,6 +175,12 @@ declare class Solver {
      * @returns Solved text, per-attempt answers, and token usage
      */
     solve(input: string | Buffer, options?: SolveOptions): Promise<SolveResult>;
+    private _tesseractReader;
+    private getTesseractReader;
+    /** Clean up resources (Tesseract worker). */
+    dispose(): Promise<void>;
+    private buildCorrectionPrompt;
+    private selfCorrect;
     /**
      * Make a single API call to read the captcha.
      * Retries up to `maxRetries` times on failure.
@@ -65,29 +188,30 @@ declare class Solver {
     private singleAttempt;
 }
+interface TesseractReader {
+    recognize: (image: Buffer) => Promise<string>;
+    recognizeMulti: (input: string | Buffer, variants: PreprocessOptions[]) => Promise<string[]>;
+    dispose: () => Promise<void>;
+}
 /**
- * Preprocess a captcha image using sharp (libvips).
- *
- * Pipeline:
- *   1. Gaussian blur in color space (smooths dither pattern)
- *   2. Grayscale conversion
- *   3. Upscale 4× with Lanczos
- *   4. Contrast boost (3× around image mean) + sharpen
- *   5. Crop decorative borders
- *   6. Add white padding
- *
- * Accepts a file path or a raw image Buffer.
- * Returns a base64-encoded PNG string.
+ * Create a Tesseract OCR reader. Returns null if tesseract.js is not installed.
+ * The reader uses PSM_SINGLE_LINE and an A-Z0-9 whitelist.
  */
-declare function preprocessCaptcha(input: string | Buffer): Promise<string>;
+declare function createTesseractReader(): Promise<TesseractReader | null>;
 /**
- * Same preprocessing pipeline as `preprocessCaptcha`, but returns the
- * resulting PNG as a raw Buffer (useful for AI SDK image content parts).
+ * Default preprocessing variants for Tesseract OCR.
+ * Different blur/threshold levels produce diverse reads.
  */
-declare function preprocessCaptchaToBuffer(input: string | Buffer): Promise<Buffer>;
+declare const TESSERACT_VARIANTS: PreprocessOptions[];
 /**
- * Read an image file and return its base64-encoded content.
+ * Disambiguate characters in a voted result using deterministic image features.
+ * Only acts on positions voted as "2" or "Z" where alternatives like 6/L/1 received votes.
+ *
+ * @param result - The voted character array (mutable, modified in place)
+ * @param rankedByPos - Per-position vote counts from majorityVote
+ * @param binaryImage - The preprocessed binary image buffer (dark text on white, from threshold+negate)
  */
-declare function imageToBase64(imagePath: string): string;
+declare function disambiguateResult(result: string[], rankedByPos: Map<string, number>[], binaryImage: Buffer): Promise<void>;
-export { type Provider, type SolveOptions, type SolveResult, Solver, type SolverOptions, imageToBase64, preprocessCaptcha, preprocessCaptchaToBuffer };
+export { type CropFractions, DITHER_CONFUSION_GROUPS, LEGACY_CONFUSION_GROUPS, type PreprocessOptions, type Provider, type SolveOptions, type SolveResult, Solver, type SolverOptions, TESSERACT_VARIANTS, type TesseractReader, createTesseractReader, disambiguateResult, imageToBase64, majorityVote, majorityVoteDetailed, preprocessCaptcha, preprocessCaptchaToBuffer };