@yigitahmetsahin/captcha-solver 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 yigitahmetsahin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,115 @@
1
+ # @yigitahmetsahin/captcha-solver
2
+
3
+ AI-powered captcha solver using image preprocessing and OpenAI vision models with majority voting.
4
+
5
+ [![CI](https://github.com/yigitahmetsahin/captcha-solver/actions/workflows/ci.yml/badge.svg)](https://github.com/yigitahmetsahin/captcha-solver/actions/workflows/ci.yml)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
+ [![TypeScript](https://img.shields.io/badge/TypeScript-5.0+-blue.svg)](https://www.typescriptlang.org/)
8
+
9
+ ## Features
10
+
11
+ - **AI Vision OCR** - Uses OpenAI vision models (o3, gpt-4o, etc.) to read distorted captcha text
12
+ - **Image Preprocessing** - PIL-based pipeline: grayscale, blur, upscale, contrast/sharpness enhancement, cropping
13
+ - **Majority Voting** - Runs multiple attempts and uses character-level majority voting for accuracy
14
+ - **Configurable** - Adjustable model, attempt count, expected length, and verbosity
15
+ - **TypeScript** - Full type safety with strict mode
16
+
17
+ ## Prerequisites
18
+
19
+ - Node.js >= 18
20
+ - Python 3 with PIL/Pillow (`pip install Pillow`)
21
+ - OpenAI API key
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ npm install @yigitahmetsahin/captcha-solver
27
+ ```
28
+
29
+ ## Quick Start
30
+
31
+ ```typescript
32
+ import 'dotenv/config';
33
+ import { solveCaptchaImage } from '@yigitahmetsahin/captcha-solver';
34
+
35
+ const answer = await solveCaptchaImage('./captcha.png', {
36
+ numAttempts: 5,
37
+ expectedLength: 4,
38
+ model: 'o3',
39
+ });
40
+
41
+ console.log('Captcha answer:', answer);
42
+ ```
43
+
44
+ ## API
45
+
46
+ ### `solveCaptchaImage(imagePath, options?)`
47
+
48
+ Solve a captcha image using OpenAI vision + preprocessing + majority voting.
49
+
50
+ **Parameters:**
51
+
52
+ | Option | Type | Default | Description |
53
+ | ---------------- | --------- | ------- | ----------------------------------------------- |
54
+ | `model` | `string` | `'o3'` | OpenAI model to use |
55
+ | `numAttempts` | `number` | `5` | Number of voting attempts |
56
+ | `expectedLength` | `number` | - | Expected captcha length (filters wrong lengths) |
57
+ | `maxRetries` | `number` | `2` | Max retries per attempt on API failure |
58
+ | `verbose` | `boolean` | `true` | Whether to log attempt details |
59
+
60
+ **Returns:** `Promise<string>` - The solved captcha text.
61
+
62
+ ### `preprocessCaptcha(imagePath)`
63
+
64
+ Preprocess a captcha image for better OCR accuracy. Returns base64-encoded PNG.
65
+
66
+ ### `imageToBase64(imagePath)`
67
+
68
+ Read an image file and return its base64-encoded content.
69
+
70
+ ## CLI Usage
71
+
72
+ ```bash
73
+ # Solve a single captcha
74
+ npm run solve -- path/to/captcha.png
75
+
76
+ # Solve with a specific model
77
+ npm run solve -- path/to/captcha.png --model gpt-4o
78
+
79
+ # Run benchmark (20 iterations)
80
+ npm run benchmark
81
+ ```
82
+
83
+ ## How It Works
84
+
85
+ 1. **Preprocessing** - The image is processed through a PIL pipeline:
86
+ - Convert to grayscale
87
+ - Apply Gaussian blur to smooth noise
88
+ - Upscale 4x with Lanczos interpolation
89
+ - Enhance contrast (3x) and sharpness (2x)
90
+ - Crop decorative borders
91
+ - Add white padding
92
+
93
+ 2. **Multiple Attempts** - The preprocessed image is sent to OpenAI's vision API multiple times with temperature=1 for diverse responses.
94
+
95
+ 3. **Majority Voting** - Character-level majority voting across all attempts determines the final answer, filtering by expected length if specified.
96
+
97
+ ## Development
98
+
99
+ ```bash
100
+ # Install dependencies
101
+ npm install
102
+
103
+ # Run tests
104
+ npm test
105
+
106
+ # Lint + format + type-check
107
+ npm run lint
108
+
109
+ # Build
110
+ npm run build
111
+ ```
112
+
113
+ ## License
114
+
115
+ MIT
package/dist/index.cjs ADDED
@@ -0,0 +1,198 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/index.ts
31
+ var index_exports = {};
32
+ __export(index_exports, {
33
+ imageToBase64: () => imageToBase64,
34
+ preprocessCaptcha: () => preprocessCaptcha,
35
+ solveCaptchaImage: () => solveCaptchaImage
36
+ });
37
+ module.exports = __toCommonJS(index_exports);
38
+
39
+ // src/solver.ts
40
+ var import_openai = __toESM(require("openai"), 1);
41
+
42
+ // src/preprocess.ts
43
+ var import_fs = __toESM(require("fs"), 1);
44
+ var import_child_process = require("child_process");
45
+ var import_path = __toESM(require("path"), 1);
46
+ var PYTHON_SCRIPT = `
47
+ import sys, base64, io
48
+ from PIL import Image, ImageFilter, ImageEnhance, ImageOps
49
+
50
+ image_path = sys.argv[1]
51
+ img = Image.open(image_path)
52
+ img = ImageOps.grayscale(img)
53
+ img = img.filter(ImageFilter.GaussianBlur(radius=1.2))
54
+ img = img.resize((img.width * 4, img.height * 4), Image.LANCZOS)
55
+ img = ImageEnhance.Contrast(img).enhance(3.0)
56
+ img = ImageEnhance.Sharpness(img).enhance(2.0)
57
+ w, h = img.size
58
+ img = img.crop((int(w * 0.10), int(h * 0.02), int(w * 0.90), int(h * 0.60)))
59
+ padded = Image.new('L', (img.width + 60, img.height + 40), 255)
60
+ padded.paste(img, (30, 20))
61
+ padded = padded.convert('RGB')
62
+ buf = io.BytesIO()
63
+ padded.save(buf, format='PNG')
64
+ sys.stdout.buffer.write(base64.b64encode(buf.getvalue()))
65
+ `;
66
+ async function preprocessCaptcha(imagePath) {
67
+ const absPath = import_path.default.resolve(imagePath);
68
+ const scriptPath = "/tmp/_captcha_preprocess.py";
69
+ import_fs.default.writeFileSync(scriptPath, PYTHON_SCRIPT);
70
+ const result = (0, import_child_process.execSync)(`python3 "${scriptPath}" "${absPath}"`, {
71
+ maxBuffer: 10 * 1024 * 1024,
72
+ // 10MB
73
+ encoding: "utf-8"
74
+ });
75
+ return result.trim();
76
+ }
77
+ function imageToBase64(imagePath) {
78
+ const buffer = import_fs.default.readFileSync(imagePath);
79
+ return buffer.toString("base64");
80
+ }
81
+
82
+ // src/solver.ts
83
+ var PROMPT = `You are an assistant helping a visually impaired person read distorted text from an image.
84
+ The text contains uppercase letters A-Z and/or digits 0-9.
85
+ A thin vertical stroke is likely the digit 1, not the letter I.
86
+ A round closed shape is the letter O, not the letter D.
87
+ Output ONLY the exact characters you read, nothing else.`;
88
+ async function singleAttempt(client, base64Image, model, maxRetries) {
89
+ for (let retry = 0; retry <= maxRetries; retry++) {
90
+ try {
91
+ const isReasoningModel = model.startsWith("o");
92
+ const tokenParam = isReasoningModel ? { max_completion_tokens: 2e3 } : { max_tokens: 256 };
93
+ const response = await client.chat.completions.create({
94
+ model,
95
+ messages: [
96
+ {
97
+ role: "user",
98
+ content: [
99
+ { type: "text", text: PROMPT },
100
+ {
101
+ type: "image_url",
102
+ image_url: {
103
+ url: `data:image/png;base64,${base64Image}`
104
+ }
105
+ }
106
+ ]
107
+ }
108
+ ],
109
+ temperature: 1,
110
+ ...tokenParam
111
+ });
112
+ const raw = response.choices[0]?.message?.content?.trim() ?? "";
113
+ const lower = raw.toLowerCase();
114
+ if (lower.includes("sorry") || lower.includes("can't help") || lower.includes("cannot help") || lower.includes("unable to") || lower.includes("i can't") || raw.length > 20) {
115
+ return null;
116
+ }
117
+ const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, "");
118
+ return cleaned || null;
119
+ } catch (_err) {
120
+ if (retry < maxRetries) {
121
+ await new Promise((r) => setTimeout(r, 1e3 * (retry + 1)));
122
+ continue;
123
+ }
124
+ return null;
125
+ }
126
+ }
127
+ return null;
128
+ }
129
+ function majorityVote(attempts, expectedLength) {
130
+ let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;
131
+ if (filtered.length === 0) {
132
+ filtered = attempts;
133
+ }
134
+ if (filtered.length === 0) return "";
135
+ const lenCounts = /* @__PURE__ */ new Map();
136
+ for (const a of filtered) {
137
+ lenCounts.set(a.length, (lenCounts.get(a.length) ?? 0) + 1);
138
+ }
139
+ let bestLen = 0;
140
+ let bestCount = 0;
141
+ for (const [len, count] of lenCounts) {
142
+ if (count > bestCount) {
143
+ bestLen = len;
144
+ bestCount = count;
145
+ }
146
+ }
147
+ const sameLenAttempts = filtered.filter((a) => a.length === bestLen);
148
+ if (sameLenAttempts.length === 0) return filtered[0];
149
+ const result = [];
150
+ for (let pos = 0; pos < bestLen; pos++) {
151
+ const charCounts = /* @__PURE__ */ new Map();
152
+ for (const a of sameLenAttempts) {
153
+ const ch = a[pos];
154
+ charCounts.set(ch, (charCounts.get(ch) ?? 0) + 1);
155
+ }
156
+ let bestChar = "";
157
+ let bestCharCount = 0;
158
+ for (const [ch, count] of charCounts) {
159
+ if (count > bestCharCount) {
160
+ bestChar = ch;
161
+ bestCharCount = count;
162
+ }
163
+ }
164
+ result.push(bestChar);
165
+ }
166
+ return result.join("");
167
+ }
168
+ async function solveCaptchaImage(imagePath, options = {}) {
169
+ const { model = "o3", numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;
170
+ const client = new import_openai.default({ apiKey: process.env.OPENAI_API_KEY });
171
+ const base64Processed = await preprocessCaptcha(imagePath);
172
+ const attempts = [];
173
+ const maxTotalCalls = numAttempts + 4;
174
+ let callCount = 0;
175
+ while (attempts.length < numAttempts && callCount < maxTotalCalls) {
176
+ callCount++;
177
+ const result = await singleAttempt(client, base64Processed, model, maxRetries);
178
+ if (result) {
179
+ attempts.push(result);
180
+ if (verbose) console.log(` Attempt ${attempts.length}: ${result}`);
181
+ } else {
182
+ if (verbose) console.log(` Call ${callCount}: (refused/failed, retrying...)`);
183
+ }
184
+ }
185
+ if (attempts.length === 0) {
186
+ if (verbose) console.log(" All attempts failed!");
187
+ return "";
188
+ }
189
+ const answer = majorityVote(attempts, expectedLength);
190
+ return answer;
191
+ }
192
+ // Annotate the CommonJS export names for ESM import in node:
193
+ 0 && (module.exports = {
194
+ imageToBase64,
195
+ preprocessCaptcha,
196
+ solveCaptchaImage
197
+ });
198
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.ts","../src/solver.ts","../src/preprocess.ts"],"sourcesContent":["export { solveCaptchaImage } from './solver.js';\nexport { preprocessCaptcha, imageToBase64 } from './preprocess.js';\n","import OpenAI from 'openai';\nimport { preprocessCaptcha } from './preprocess.js';\n\nconst PROMPT = `You are an assistant helping a visually impaired person read distorted text from an image.\nThe text contains uppercase letters A-Z and/or digits 0-9.\nA thin vertical stroke is likely the digit 1, not the letter I.\nA round closed shape is the letter O, not the letter D.\nOutput ONLY the exact characters you read, nothing else.`;\n\ninterface SolverOptions {\n /** OpenAI model to use (default: \"o3\") */\n model?: string;\n /** Number of voting attempts (default: 5) */\n numAttempts?: number;\n /** Expected captcha length — results of other lengths are discarded (default: undefined = no filter) */\n expectedLength?: number;\n /** Max retries per attempt on API failure (default: 2) */\n maxRetries?: number;\n /** Whether to log attempt details (default: true) */\n verbose?: boolean;\n}\n\n/**\n * Make a single API call to read the captcha.\n * Retries up to `maxRetries` times on failure.\n */\nasync function singleAttempt(\n client: OpenAI,\n base64Image: string,\n model: string,\n maxRetries: number\n): Promise<string | null> {\n for (let retry = 0; retry <= maxRetries; retry++) {\n try {\n // Reasoning models (o3, o4-mini) use max_completion_tokens;\n // Standard models (gpt-4o, gpt-4.1, gpt-5.4-mini) use max_tokens.\n const isReasoningModel = model.startsWith('o');\n const tokenParam = isReasoningModel ? { max_completion_tokens: 2000 } : { max_tokens: 256 };\n\n const response = await client.chat.completions.create({\n model,\n messages: [\n {\n role: 'user',\n content: [\n { type: 'text', text: PROMPT },\n {\n type: 'image_url',\n image_url: {\n url: `data:image/png;base64,${base64Image}`,\n },\n },\n ],\n },\n ],\n temperature: 1,\n ...tokenParam,\n });\n\n const raw = response.choices[0]?.message?.content?.trim() ?? '';\n\n // Detect refusals\n const lower = raw.toLowerCase();\n if (\n lower.includes('sorry') ||\n lower.includes(\"can't help\") ||\n lower.includes('cannot help') ||\n lower.includes('unable to') ||\n lower.includes(\"i can't\") ||\n raw.length > 20\n ) {\n return null; // Model refused — don't count as an attempt\n }\n\n // Clean: keep only uppercase letters and digits\n const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, '');\n return cleaned || null;\n } catch (_err) {\n if (retry < maxRetries) {\n // Wait briefly before retry\n await new Promise((r) => setTimeout(r, 1000 * (retry + 1)));\n continue;\n }\n return null;\n }\n }\n return null;\n}\n\n/**\n * Character-level majority vote across multiple attempts.\n */\nfunction majorityVote(attempts: string[], expectedLength?: number): string {\n // Filter to expected length if specified\n let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;\n\n // If length filter removed everything, fall back to most common length\n if (filtered.length === 0) {\n filtered = attempts;\n }\n\n if (filtered.length === 0) return '';\n\n // Find most common length\n const lenCounts = new Map<number, number>();\n for (const a of filtered) {\n lenCounts.set(a.length, (lenCounts.get(a.length) ?? 0) + 1);\n }\n let bestLen = 0;\n let bestCount = 0;\n for (const [len, count] of lenCounts) {\n if (count > bestCount) {\n bestLen = len;\n bestCount = count;\n }\n }\n\n const sameLenAttempts = filtered.filter((a) => a.length === bestLen);\n if (sameLenAttempts.length === 0) return filtered[0];\n\n // Vote per character position\n const result: string[] = [];\n for (let pos = 0; pos < bestLen; pos++) {\n const charCounts = new Map<string, number>();\n for (const a of sameLenAttempts) {\n const ch = a[pos];\n charCounts.set(ch, (charCounts.get(ch) ?? 0) + 1);\n }\n let bestChar = '';\n let bestCharCount = 0;\n for (const [ch, count] of charCounts) {\n if (count > bestCharCount) {\n bestChar = ch;\n bestCharCount = count;\n }\n }\n result.push(bestChar);\n }\n\n return result.join('');\n}\n\n/**\n * Solve a captcha image using OpenAI vision + preprocessing + majority voting.\n */\nexport async function solveCaptchaImage(\n imagePath: string,\n options: SolverOptions = {}\n): Promise<string> {\n const { model = 'o3', numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;\n\n const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });\n\n // Preprocess the image\n const base64Processed = await preprocessCaptcha(imagePath);\n\n // Run attempts — retry refusals/failures to guarantee numAttempts valid results\n const attempts: string[] = [];\n const maxTotalCalls = numAttempts + 4; // allow up to 4 extra calls for refusals\n let callCount = 0;\n while (attempts.length < numAttempts && callCount < maxTotalCalls) {\n callCount++;\n const result = await singleAttempt(client, base64Processed, model, maxRetries);\n if (result) {\n attempts.push(result);\n if (verbose) console.log(` Attempt ${attempts.length}: ${result}`);\n } else {\n if (verbose) console.log(` Call ${callCount}: (refused/failed, retrying...)`);\n }\n }\n\n if (attempts.length === 0) {\n if (verbose) console.log(' All attempts failed!');\n return '';\n }\n\n // Majority vote\n const answer = majorityVote(attempts, expectedLength);\n return answer;\n}\n","import fs from 'fs';\nimport { execSync } from 'child_process';\nimport path from 'path';\n\n// Inline Python script for image preprocessing\n// Uses PIL which produces optimal results for captcha OCR\nconst PYTHON_SCRIPT = `\nimport sys, base64, io\nfrom PIL import Image, ImageFilter, ImageEnhance, ImageOps\n\nimage_path = sys.argv[1]\nimg = Image.open(image_path)\nimg = ImageOps.grayscale(img)\nimg = img.filter(ImageFilter.GaussianBlur(radius=1.2))\nimg = img.resize((img.width * 4, img.height * 4), Image.LANCZOS)\nimg = ImageEnhance.Contrast(img).enhance(3.0)\nimg = ImageEnhance.Sharpness(img).enhance(2.0)\nw, h = img.size\nimg = img.crop((int(w * 0.10), int(h * 0.02), int(w * 0.90), int(h * 0.60)))\npadded = Image.new('L', (img.width + 60, img.height + 40), 255)\npadded.paste(img, (30, 20))\npadded = padded.convert('RGB')\nbuf = io.BytesIO()\npadded.save(buf, format='PNG')\nsys.stdout.buffer.write(base64.b64encode(buf.getvalue()))\n`;\n\n/**\n * Preprocess a captcha image using PIL (via Python subprocess).\n *\n * Pipeline:\n * 1. Grayscale\n * 2. Gaussian blur (radius=1.2) to smooth dither pattern\n * 3. Upscale 4x with Lanczos\n * 4. Contrast 3x + Sharpness 2x (PIL enhancement — preserves soft gradients)\n * 5. Crop decorative borders\n * 6. Add white padding\n *\n * Returns a base64-encoded PNG string.\n */\nexport async function preprocessCaptcha(imagePath: string): Promise<string> {\n const absPath = path.resolve(imagePath);\n\n // Write the Python script to a temp file\n const scriptPath = '/tmp/_captcha_preprocess.py';\n fs.writeFileSync(scriptPath, PYTHON_SCRIPT);\n\n // Execute Python and capture base64 output\n const result = execSync(`python3 \"${scriptPath}\" \"${absPath}\"`, {\n maxBuffer: 10 * 1024 * 1024, // 10MB\n encoding: 'utf-8',\n });\n\n return result.trim();\n}\n\n/**\n * Read an image file and return its base64-encoded content.\n */\nexport function imageToBase64(imagePath: string): string {\n const buffer = fs.readFileSync(imagePath);\n return buffer.toString('base64');\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,oBAAmB;;;ACAnB,gBAAe;AACf,2BAAyB;AACzB,kBAAiB;AAIjB,IAAM,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAkCtB,eAAsB,kBAAkB,WAAoC;AAC1E,QAAM,UAAU,YAAAA,QAAK,QAAQ,SAAS;AAGtC,QAAM,aAAa;AACnB,YAAAC,QAAG,cAAc,YAAY,aAAa;AAG1C,QAAM,aAAS,+BAAS,YAAY,UAAU,MAAM,OAAO,KAAK;AAAA,IAC9D,WAAW,KAAK,OAAO;AAAA;AAAA,IACvB,UAAU;AAAA,EACZ,CAAC;AAED,SAAO,OAAO,KAAK;AACrB;AAKO,SAAS,cAAc,WAA2B;AACvD,QAAM,SAAS,UAAAA,QAAG,aAAa,SAAS;AACxC,SAAO,OAAO,SAAS,QAAQ;AACjC;;;AD3DA,IAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAuBf,eAAe,cACb,QACA,aACA,OACA,YACwB;AACxB,WAAS,QAAQ,GAAG,SAAS,YAAY,SAAS;AAChD,QAAI;AAGF,YAAM,mBAAmB,MAAM,WAAW,GAAG;AAC7C,YAAM,aAAa,mBAAmB,EAAE,uBAAuB,IAAK,IAAI,EAAE,YAAY,IAAI;AAE1F,YAAM,WAAW,MAAM,OAAO,KAAK,YAAY,OAAO;AAAA,QACpD;AAAA,QACA,UAAU;AAAA,UACR;AAAA,YACE,MAAM;AAAA,YACN,SAAS;AAAA,cACP,EAAE,MAAM,QAAQ,MAAM,OAAO;AAAA,cAC7B;AAAA,gBACE,MAAM;AAAA,gBACN,WAAW;AAAA,kBACT,KAAK,yBAAyB,WAAW;AAAA,gBAC3C;AAAA,cACF;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,QACA,aAAa;AAAA,QACb,GAAG;AAAA,MACL,CAAC;AAED,YAAM,MAAM,SAAS,QAAQ,CAAC,GAAG,SAAS,SAAS,KAAK,KAAK;AAG7D,YAAM,QAAQ,IAAI,YAAY;AAC9B,UACE,MAAM,SAAS,OAAO,KACtB,MAAM,SAAS,YAAY,KAC3B,MAAM,SAAS,aAAa,KAC5B,MAAM,SAAS,WAAW,KAC1B,MAAM,SAAS,SAAS,KACxB,IAAI,SAAS,IACb;AACA,eAAO;AAAA,MACT;AAGA,YAAM,UAAU,IAAI,YAAY,EAAE,QAAQ,cAAc,EAAE;AAC1D,aAAO,WAAW;AAAA,IACpB,SAAS,MAAM;AACb,UAAI,QAAQ,YAAY;AAEtB,cAAM,IAAI,QAAQ,CAAC,MAAM,WAAW,GAAG,OAAQ,QAAQ,EAAE,CAAC;AAC1D;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACA,SAAO;AACT;AAKA,SAAS,aAAa,UAAoB,gBAAiC;AAEzE,MAAI,WAAW,iBAAiB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,cAAc,IAAI;AAGtF,MAAI,SAAS,WAAW,GAAG;AACzB,eAAW;AAAA,EACb;AAEA,MAAI,SAAS,WAAW,EAAG,QAAO;AAGlC,QAAM,YAAY,oBAAI,IAAoB;AAC1C,aAAW,KAAK,UAAU;AACxB,cAAU,IAAI,EAAE,SAAS,UAAU,IAAI,EAAE,MAAM,KAAK,KAAK,CAAC;AAAA,EAC5D;AACA,MAAI,UAAU;AACd,MAAI,YAAY;AAChB,aAAW,CAAC,KAAK,KAAK,KAAK,WAAW;AACpC,QAAI,QAAQ,WAAW;AACrB,gBAAU;AACV,kBAAY;AAAA,IACd;AAAA,EACF;AAEA,QAAM,kBAAkB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO;AACnE,MAAI,gBAAgB,WAAW,EAAG,QAAO,SAAS,CAAC;AAGnD,QAAM,SAAmB,CAAC;AAC1B,WAAS,MAAM,GAAG,MAAM,SAAS,OAAO;AACtC,UAAM,aAAa,oBAAI,IAAoB;AAC3C,eAAW,KAAK,iBAAiB;AAC/B,YAAM,KAAK,EAAE,GAAG;AAChB,iBAAW,IAAI,KAAK,WAAW,IAAI,EAAE,KAAK,KAAK,CAAC;AAAA,IAClD;AACA,QAAI,WAAW;AACf,QAAI,gBAAgB;AACpB,eAAW,CAAC,IAAI,KAAK,KAAK,YAAY;AACpC,UAAI,QAAQ,eAAe;AACzB,mBAAW;AACX,wBAAgB;AAAA,MAClB;AAAA,IACF;AACA,WAAO,KAAK,QAAQ;AAAA,EACtB;AAEA,SAAO,OAAO,KAAK,EAAE;AACvB;AAKA,eAAsB,kBACpB,WACA,UAAyB,CAAC,GACT;AACjB,QAAM,EAAE,QAAQ,MAAM,cAAc,GAAG,gBAAgB,aAAa,GAAG,UAAU,KAAK,IAAI;AAE1F,QAAM,SAAS,IAAI,cAAAC,QAAO,EAAE,QAAQ,QAAQ,IAAI,eAAe,CAAC;AAGhE,QAAM,kBAAkB,MAAM,kBAAkB,SAAS;AAGzD,QAAM,WAAqB,CAAC;AAC5B,QAAM,gBAAgB,cAAc;AACpC,MAAI,YAAY;AAChB,SAAO,SAAS,SAAS,eAAe,YAAY,eAAe;AACjE;AACA,UAAM,SAAS,MAAM,cAAc,QAAQ,iBAAiB,OAAO,UAAU;AAC7E,QAAI,QAAQ;AACV,eAAS,KAAK,MAAM;AACpB,UAAI,QAAS,SAAQ,IAAI,aAAa,SAAS,MAAM,KAAK,MAAM,EAAE;AAAA,IACpE,OAAO;AACL,UAAI,QAAS,SAAQ,IAAI,UAAU,SAAS,iCAAiC;AAAA,IAC/E;AAAA,EACF;AAEA,MAAI,SAAS,WAAW,GAAG;AACzB,QAAI,QAAS,SAAQ,IAAI,wBAAwB;AACjD,WAAO;AAAA,EACT;AAGA,QAAM,SAAS,aAAa,UAAU,cAAc;AACpD,SAAO;AACT;","names":["path","fs","OpenAI"]}
@@ -0,0 +1,37 @@
1
+ interface SolverOptions {
2
+ /** OpenAI model to use (default: "o3") */
3
+ model?: string;
4
+ /** Number of voting attempts (default: 5) */
5
+ numAttempts?: number;
6
+ /** Expected captcha length — results of other lengths are discarded (default: undefined = no filter) */
7
+ expectedLength?: number;
8
+ /** Max retries per attempt on API failure (default: 2) */
9
+ maxRetries?: number;
10
+ /** Whether to log attempt details (default: true) */
11
+ verbose?: boolean;
12
+ }
13
+ /**
14
+ * Solve a captcha image using OpenAI vision + preprocessing + majority voting.
15
+ */
16
+ declare function solveCaptchaImage(imagePath: string, options?: SolverOptions): Promise<string>;
17
+
18
+ /**
19
+ * Preprocess a captcha image using PIL (via Python subprocess).
20
+ *
21
+ * Pipeline:
22
+ * 1. Grayscale
23
+ * 2. Gaussian blur (radius=1.2) to smooth dither pattern
24
+ * 3. Upscale 4x with Lanczos
25
+ * 4. Contrast 3x + Sharpness 2x (PIL enhancement — preserves soft gradients)
26
+ * 5. Crop decorative borders
27
+ * 6. Add white padding
28
+ *
29
+ * Returns a base64-encoded PNG string.
30
+ */
31
+ declare function preprocessCaptcha(imagePath: string): Promise<string>;
32
+ /**
33
+ * Read an image file and return its base64-encoded content.
34
+ */
35
+ declare function imageToBase64(imagePath: string): string;
36
+
37
+ export { imageToBase64, preprocessCaptcha, solveCaptchaImage };
@@ -0,0 +1,37 @@
1
+ interface SolverOptions {
2
+ /** OpenAI model to use (default: "o3") */
3
+ model?: string;
4
+ /** Number of voting attempts (default: 5) */
5
+ numAttempts?: number;
6
+ /** Expected captcha length — results of other lengths are discarded (default: undefined = no filter) */
7
+ expectedLength?: number;
8
+ /** Max retries per attempt on API failure (default: 2) */
9
+ maxRetries?: number;
10
+ /** Whether to log attempt details (default: true) */
11
+ verbose?: boolean;
12
+ }
13
+ /**
14
+ * Solve a captcha image using OpenAI vision + preprocessing + majority voting.
15
+ */
16
+ declare function solveCaptchaImage(imagePath: string, options?: SolverOptions): Promise<string>;
17
+
18
+ /**
19
+ * Preprocess a captcha image using PIL (via Python subprocess).
20
+ *
21
+ * Pipeline:
22
+ * 1. Grayscale
23
+ * 2. Gaussian blur (radius=1.2) to smooth dither pattern
24
+ * 3. Upscale 4x with Lanczos
25
+ * 4. Contrast 3x + Sharpness 2x (PIL enhancement — preserves soft gradients)
26
+ * 5. Crop decorative borders
27
+ * 6. Add white padding
28
+ *
29
+ * Returns a base64-encoded PNG string.
30
+ */
31
+ declare function preprocessCaptcha(imagePath: string): Promise<string>;
32
+ /**
33
+ * Read an image file and return its base64-encoded content.
34
+ */
35
+ declare function imageToBase64(imagePath: string): string;
36
+
37
+ export { imageToBase64, preprocessCaptcha, solveCaptchaImage };
package/dist/index.js ADDED
@@ -0,0 +1,159 @@
1
+ // src/solver.ts
2
+ import OpenAI from "openai";
3
+
4
+ // src/preprocess.ts
5
+ import fs from "fs";
6
+ import { execSync } from "child_process";
7
+ import path from "path";
8
+ var PYTHON_SCRIPT = `
9
+ import sys, base64, io
10
+ from PIL import Image, ImageFilter, ImageEnhance, ImageOps
11
+
12
+ image_path = sys.argv[1]
13
+ img = Image.open(image_path)
14
+ img = ImageOps.grayscale(img)
15
+ img = img.filter(ImageFilter.GaussianBlur(radius=1.2))
16
+ img = img.resize((img.width * 4, img.height * 4), Image.LANCZOS)
17
+ img = ImageEnhance.Contrast(img).enhance(3.0)
18
+ img = ImageEnhance.Sharpness(img).enhance(2.0)
19
+ w, h = img.size
20
+ img = img.crop((int(w * 0.10), int(h * 0.02), int(w * 0.90), int(h * 0.60)))
21
+ padded = Image.new('L', (img.width + 60, img.height + 40), 255)
22
+ padded.paste(img, (30, 20))
23
+ padded = padded.convert('RGB')
24
+ buf = io.BytesIO()
25
+ padded.save(buf, format='PNG')
26
+ sys.stdout.buffer.write(base64.b64encode(buf.getvalue()))
27
+ `;
28
+ async function preprocessCaptcha(imagePath) {
29
+ const absPath = path.resolve(imagePath);
30
+ const scriptPath = "/tmp/_captcha_preprocess.py";
31
+ fs.writeFileSync(scriptPath, PYTHON_SCRIPT);
32
+ const result = execSync(`python3 "${scriptPath}" "${absPath}"`, {
33
+ maxBuffer: 10 * 1024 * 1024,
34
+ // 10MB
35
+ encoding: "utf-8"
36
+ });
37
+ return result.trim();
38
+ }
39
+ function imageToBase64(imagePath) {
40
+ const buffer = fs.readFileSync(imagePath);
41
+ return buffer.toString("base64");
42
+ }
43
+
44
+ // src/solver.ts
45
+ var PROMPT = `You are an assistant helping a visually impaired person read distorted text from an image.
46
+ The text contains uppercase letters A-Z and/or digits 0-9.
47
+ A thin vertical stroke is likely the digit 1, not the letter I.
48
+ A round closed shape is the letter O, not the letter D.
49
+ Output ONLY the exact characters you read, nothing else.`;
50
+ async function singleAttempt(client, base64Image, model, maxRetries) {
51
+ for (let retry = 0; retry <= maxRetries; retry++) {
52
+ try {
53
+ const isReasoningModel = model.startsWith("o");
54
+ const tokenParam = isReasoningModel ? { max_completion_tokens: 2e3 } : { max_tokens: 256 };
55
+ const response = await client.chat.completions.create({
56
+ model,
57
+ messages: [
58
+ {
59
+ role: "user",
60
+ content: [
61
+ { type: "text", text: PROMPT },
62
+ {
63
+ type: "image_url",
64
+ image_url: {
65
+ url: `data:image/png;base64,${base64Image}`
66
+ }
67
+ }
68
+ ]
69
+ }
70
+ ],
71
+ temperature: 1,
72
+ ...tokenParam
73
+ });
74
+ const raw = response.choices[0]?.message?.content?.trim() ?? "";
75
+ const lower = raw.toLowerCase();
76
+ if (lower.includes("sorry") || lower.includes("can't help") || lower.includes("cannot help") || lower.includes("unable to") || lower.includes("i can't") || raw.length > 20) {
77
+ return null;
78
+ }
79
+ const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, "");
80
+ return cleaned || null;
81
+ } catch (_err) {
82
+ if (retry < maxRetries) {
83
+ await new Promise((r) => setTimeout(r, 1e3 * (retry + 1)));
84
+ continue;
85
+ }
86
+ return null;
87
+ }
88
+ }
89
+ return null;
90
+ }
91
+ function majorityVote(attempts, expectedLength) {
92
+ let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;
93
+ if (filtered.length === 0) {
94
+ filtered = attempts;
95
+ }
96
+ if (filtered.length === 0) return "";
97
+ const lenCounts = /* @__PURE__ */ new Map();
98
+ for (const a of filtered) {
99
+ lenCounts.set(a.length, (lenCounts.get(a.length) ?? 0) + 1);
100
+ }
101
+ let bestLen = 0;
102
+ let bestCount = 0;
103
+ for (const [len, count] of lenCounts) {
104
+ if (count > bestCount) {
105
+ bestLen = len;
106
+ bestCount = count;
107
+ }
108
+ }
109
+ const sameLenAttempts = filtered.filter((a) => a.length === bestLen);
110
+ if (sameLenAttempts.length === 0) return filtered[0];
111
+ const result = [];
112
+ for (let pos = 0; pos < bestLen; pos++) {
113
+ const charCounts = /* @__PURE__ */ new Map();
114
+ for (const a of sameLenAttempts) {
115
+ const ch = a[pos];
116
+ charCounts.set(ch, (charCounts.get(ch) ?? 0) + 1);
117
+ }
118
+ let bestChar = "";
119
+ let bestCharCount = 0;
120
+ for (const [ch, count] of charCounts) {
121
+ if (count > bestCharCount) {
122
+ bestChar = ch;
123
+ bestCharCount = count;
124
+ }
125
+ }
126
+ result.push(bestChar);
127
+ }
128
+ return result.join("");
129
+ }
130
+ async function solveCaptchaImage(imagePath, options = {}) {
131
+ const { model = "o3", numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;
132
+ const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
133
+ const base64Processed = await preprocessCaptcha(imagePath);
134
+ const attempts = [];
135
+ const maxTotalCalls = numAttempts + 4;
136
+ let callCount = 0;
137
+ while (attempts.length < numAttempts && callCount < maxTotalCalls) {
138
+ callCount++;
139
+ const result = await singleAttempt(client, base64Processed, model, maxRetries);
140
+ if (result) {
141
+ attempts.push(result);
142
+ if (verbose) console.log(` Attempt ${attempts.length}: ${result}`);
143
+ } else {
144
+ if (verbose) console.log(` Call ${callCount}: (refused/failed, retrying...)`);
145
+ }
146
+ }
147
+ if (attempts.length === 0) {
148
+ if (verbose) console.log(" All attempts failed!");
149
+ return "";
150
+ }
151
+ const answer = majorityVote(attempts, expectedLength);
152
+ return answer;
153
+ }
154
+ export {
155
+ imageToBase64,
156
+ preprocessCaptcha,
157
+ solveCaptchaImage
158
+ };
159
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/solver.ts","../src/preprocess.ts"],"sourcesContent":["import OpenAI from 'openai';\nimport { preprocessCaptcha } from './preprocess.js';\n\nconst PROMPT = `You are an assistant helping a visually impaired person read distorted text from an image.\nThe text contains uppercase letters A-Z and/or digits 0-9.\nA thin vertical stroke is likely the digit 1, not the letter I.\nA round closed shape is the letter O, not the letter D.\nOutput ONLY the exact characters you read, nothing else.`;\n\ninterface SolverOptions {\n /** OpenAI model to use (default: \"o3\") */\n model?: string;\n /** Number of voting attempts (default: 5) */\n numAttempts?: number;\n /** Expected captcha length — results of other lengths are discarded (default: undefined = no filter) */\n expectedLength?: number;\n /** Max retries per attempt on API failure (default: 2) */\n maxRetries?: number;\n /** Whether to log attempt details (default: true) */\n verbose?: boolean;\n}\n\n/**\n * Make a single API call to read the captcha.\n * Retries up to `maxRetries` times on failure.\n */\nasync function singleAttempt(\n client: OpenAI,\n base64Image: string,\n model: string,\n maxRetries: number\n): Promise<string | null> {\n for (let retry = 0; retry <= maxRetries; retry++) {\n try {\n // Reasoning models (o3, o4-mini) use max_completion_tokens;\n // Standard models (gpt-4o, gpt-4.1, gpt-5.4-mini) use max_tokens.\n const isReasoningModel = model.startsWith('o');\n const tokenParam = isReasoningModel ? { max_completion_tokens: 2000 } : { max_tokens: 256 };\n\n const response = await client.chat.completions.create({\n model,\n messages: [\n {\n role: 'user',\n content: [\n { type: 'text', text: PROMPT },\n {\n type: 'image_url',\n image_url: {\n url: `data:image/png;base64,${base64Image}`,\n },\n },\n ],\n },\n ],\n temperature: 1,\n ...tokenParam,\n });\n\n const raw = response.choices[0]?.message?.content?.trim() ?? '';\n\n // Detect refusals\n const lower = raw.toLowerCase();\n if (\n lower.includes('sorry') ||\n lower.includes(\"can't help\") ||\n lower.includes('cannot help') ||\n lower.includes('unable to') ||\n lower.includes(\"i can't\") ||\n raw.length > 20\n ) {\n return null; // Model refused — don't count as an attempt\n }\n\n // Clean: keep only uppercase letters and digits\n const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, '');\n return cleaned || null;\n } catch (_err) {\n if (retry < maxRetries) {\n // Wait briefly before retry\n await new Promise((r) => setTimeout(r, 1000 * (retry + 1)));\n continue;\n }\n return null;\n }\n }\n return null;\n}\n\n/**\n * Character-level majority vote across multiple attempts.\n */\nfunction majorityVote(attempts: string[], expectedLength?: number): string {\n // Filter to expected length if specified\n let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;\n\n // If length filter removed everything, fall back to most common length\n if (filtered.length === 0) {\n filtered = attempts;\n }\n\n if (filtered.length === 0) return '';\n\n // Find most common length\n const lenCounts = new Map<number, number>();\n for (const a of filtered) {\n lenCounts.set(a.length, (lenCounts.get(a.length) ?? 0) + 1);\n }\n let bestLen = 0;\n let bestCount = 0;\n for (const [len, count] of lenCounts) {\n if (count > bestCount) {\n bestLen = len;\n bestCount = count;\n }\n }\n\n const sameLenAttempts = filtered.filter((a) => a.length === bestLen);\n if (sameLenAttempts.length === 0) return filtered[0];\n\n // Vote per character position\n const result: string[] = [];\n for (let pos = 0; pos < bestLen; pos++) {\n const charCounts = new Map<string, number>();\n for (const a of sameLenAttempts) {\n const ch = a[pos];\n charCounts.set(ch, (charCounts.get(ch) ?? 0) + 1);\n }\n let bestChar = '';\n let bestCharCount = 0;\n for (const [ch, count] of charCounts) {\n if (count > bestCharCount) {\n bestChar = ch;\n bestCharCount = count;\n }\n }\n result.push(bestChar);\n }\n\n return result.join('');\n}\n\n/**\n * Solve a captcha image using OpenAI vision + preprocessing + majority voting.\n */\nexport async function solveCaptchaImage(\n imagePath: string,\n options: SolverOptions = {}\n): Promise<string> {\n const { model = 'o3', numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;\n\n const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });\n\n // Preprocess the image\n const base64Processed = await preprocessCaptcha(imagePath);\n\n // Run attempts — retry refusals/failures to guarantee numAttempts valid results\n const attempts: string[] = [];\n const maxTotalCalls = numAttempts + 4; // allow up to 4 extra calls for refusals\n let callCount = 0;\n while (attempts.length < numAttempts && callCount < maxTotalCalls) {\n callCount++;\n const result = await singleAttempt(client, base64Processed, model, maxRetries);\n if (result) {\n attempts.push(result);\n if (verbose) console.log(` Attempt ${attempts.length}: ${result}`);\n } else {\n if (verbose) console.log(` Call ${callCount}: (refused/failed, retrying...)`);\n }\n }\n\n if (attempts.length === 0) {\n if (verbose) console.log(' All attempts failed!');\n return '';\n }\n\n // Majority vote\n const answer = majorityVote(attempts, expectedLength);\n return answer;\n}\n","import fs from 'fs';\nimport { execSync } from 'child_process';\nimport path from 'path';\n\n// Inline Python script for image preprocessing\n// Uses PIL which produces optimal results for captcha OCR\nconst PYTHON_SCRIPT = `\nimport sys, base64, io\nfrom PIL import Image, ImageFilter, ImageEnhance, ImageOps\n\nimage_path = sys.argv[1]\nimg = Image.open(image_path)\nimg = ImageOps.grayscale(img)\nimg = img.filter(ImageFilter.GaussianBlur(radius=1.2))\nimg = img.resize((img.width * 4, img.height * 4), Image.LANCZOS)\nimg = ImageEnhance.Contrast(img).enhance(3.0)\nimg = ImageEnhance.Sharpness(img).enhance(2.0)\nw, h = img.size\nimg = img.crop((int(w * 0.10), int(h * 0.02), int(w * 0.90), int(h * 0.60)))\npadded = Image.new('L', (img.width + 60, img.height + 40), 255)\npadded.paste(img, (30, 20))\npadded = padded.convert('RGB')\nbuf = io.BytesIO()\npadded.save(buf, format='PNG')\nsys.stdout.buffer.write(base64.b64encode(buf.getvalue()))\n`;\n\n/**\n * Preprocess a captcha image using PIL (via Python subprocess).\n *\n * Pipeline:\n * 1. Grayscale\n * 2. Gaussian blur (radius=1.2) to smooth dither pattern\n * 3. Upscale 4x with Lanczos\n * 4. Contrast 3x + Sharpness 2x (PIL enhancement — preserves soft gradients)\n * 5. Crop decorative borders\n * 6. Add white padding\n *\n * Returns a base64-encoded PNG string.\n */\nexport async function preprocessCaptcha(imagePath: string): Promise<string> {\n const absPath = path.resolve(imagePath);\n\n // Write the Python script to a temp file\n const scriptPath = '/tmp/_captcha_preprocess.py';\n fs.writeFileSync(scriptPath, PYTHON_SCRIPT);\n\n // Execute Python and capture base64 output\n const result = execSync(`python3 \"${scriptPath}\" \"${absPath}\"`, {\n maxBuffer: 10 * 1024 * 1024, // 10MB\n encoding: 'utf-8',\n });\n\n return result.trim();\n}\n\n/**\n * Read an image file and return its base64-encoded content.\n */\nexport function imageToBase64(imagePath: string): string {\n const buffer = fs.readFileSync(imagePath);\n return buffer.toString('base64');\n}\n"],"mappings":";AAAA,OAAO,YAAY;;;ACAnB,OAAO,QAAQ;AACf,SAAS,gBAAgB;AACzB,OAAO,UAAU;AAIjB,IAAM,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAkCtB,eAAsB,kBAAkB,WAAoC;AAC1E,QAAM,UAAU,KAAK,QAAQ,SAAS;AAGtC,QAAM,aAAa;AACnB,KAAG,cAAc,YAAY,aAAa;AAG1C,QAAM,SAAS,SAAS,YAAY,UAAU,MAAM,OAAO,KAAK;AAAA,IAC9D,WAAW,KAAK,OAAO;AAAA;AAAA,IACvB,UAAU;AAAA,EACZ,CAAC;AAED,SAAO,OAAO,KAAK;AACrB;AAKO,SAAS,cAAc,WAA2B;AACvD,QAAM,SAAS,GAAG,aAAa,SAAS;AACxC,SAAO,OAAO,SAAS,QAAQ;AACjC;;;AD3DA,IAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAuBf,eAAe,cACb,QACA,aACA,OACA,YACwB;AACxB,WAAS,QAAQ,GAAG,SAAS,YAAY,SAAS;AAChD,QAAI;AAGF,YAAM,mBAAmB,MAAM,WAAW,GAAG;AAC7C,YAAM,aAAa,mBAAmB,EAAE,uBAAuB,IAAK,IAAI,EAAE,YAAY,IAAI;AAE1F,YAAM,WAAW,MAAM,OAAO,KAAK,YAAY,OAAO;AAAA,QACpD;AAAA,QACA,UAAU;AAAA,UACR;AAAA,YACE,MAAM;AAAA,YACN,SAAS;AAAA,cACP,EAAE,MAAM,QAAQ,MAAM,OAAO;AAAA,cAC7B;AAAA,gBACE,MAAM;AAAA,gBACN,WAAW;AAAA,kBACT,KAAK,yBAAyB,WAAW;AAAA,gBAC3C;AAAA,cACF;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,QACA,aAAa;AAAA,QACb,GAAG;AAAA,MACL,CAAC;AAED,YAAM,MAAM,SAAS,QAAQ,CAAC,GAAG,SAAS,SAAS,KAAK,KAAK;AAG7D,YAAM,QAAQ,IAAI,YAAY;AAC9B,UACE,MAAM,SAAS,OAAO,KACtB,MAAM,SAAS,YAAY,KAC3B,MAAM,SAAS,aAAa,KAC5B,MAAM,SAAS,WAAW,KAC1B,MAAM,SAAS,SAAS,KACxB,IAAI,SAAS,IACb;AACA,eAAO;AAAA,MACT;AAGA,YAAM,UAAU,IAAI,YAAY,EAAE,QAAQ,cAAc,EAAE;AAC1D,aAAO,WAAW;AAAA,IACpB,SAAS,MAAM;AACb,UAAI,QAAQ,YAAY;AAEtB,cAAM,IAAI,QAAQ,CAAC,MAAM,WAAW,GAAG,OAAQ,QAAQ,EAAE,CAAC;AAC1D;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACA,SAAO;AACT;AAKA,SAAS,aAAa,UAAoB,gBAAiC;AAEzE,MAAI,WAAW,iBAAiB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,cAAc,IAAI;AAGtF,MAAI,SAAS,WAAW,GAAG;AACzB,eAAW;AAAA,EACb;AAEA,MAAI,SAAS,WAAW,EAAG,QAAO;AAGlC,QAAM,YAAY,oBAAI,IAAoB;AAC1C,aAAW,KAAK,UAAU;AACxB,cAAU,IAAI,EAAE,SAAS,UAAU,IAAI,EAAE,MAAM,KAAK,KAAK,CAAC;AAAA,EAC5D;AACA,MAAI,UAAU;AACd,MAAI,YAAY;AAChB,aAAW,CAAC,KAAK,KAAK,KAAK,WAAW;AACpC,QAAI,QAAQ,WAAW;AACrB,gBAAU;AACV,kBAAY;AAAA,IACd;AAAA,EACF;AAEA,QAAM,kBAAkB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO;AACnE,MAAI,gBAAgB,WAAW,EAAG,QAAO,SAAS,CAAC;AAGnD,QAAM,SAAmB,CAAC;AAC1B,WAAS,MAAM,GAAG,MAAM,SAAS,OAAO;AACtC,UAAM,aAAa,oBAAI,IAAoB;AAC3C,eAAW,KAAK,iBAAiB;AAC/B,YAAM,KAAK,EAAE,GAAG;AAChB,iBAAW,IAAI,KAAK,WAAW,IAAI,EAAE,KAAK,KAAK,CAAC;AAAA,IAClD;AACA,QAAI,WAAW;AACf,QAAI,gBAAgB;AACpB,eAAW,CAAC,IAAI,KAAK,KAAK,YAAY;AACpC,UAAI,QAAQ,eAAe;AACzB,mBAAW;AACX,wBAAgB;AAAA,MAClB;AAAA,IACF;AACA,WAAO,KAAK,QAAQ;AAAA,EACtB;AAEA,SAAO,OAAO,KAAK,EAAE;AACvB;AAKA,eAAsB,kBACpB,WACA,UAAyB,CAAC,GACT;AACjB,QAAM,EAAE,QAAQ,MAAM,cAAc,GAAG,gBAAgB,aAAa,GAAG,UAAU,KAAK,IAAI;AAE1F,QAAM,SAAS,IAAI,OAAO,EAAE,QAAQ,QAAQ,IAAI,eAAe,CAAC;AAGhE,QAAM,kBAAkB,MAAM,kBAAkB,SAAS;AAGzD,QAAM,WAAqB,CAAC;AAC5B,QAAM,gBAAgB,cAAc;AACpC,MAAI,YAAY;AAChB,SAAO,SAAS,SAAS,eAAe,YAAY,eAAe;AACjE;AACA,UAAM,SAAS,MAAM,cAAc,QAAQ,iBAAiB,OAAO,UAAU;AAC7E,QAAI,QAAQ;AACV,eAAS,KAAK,MAAM;AACpB,UAAI,QAAS,SAAQ,IAAI,aAAa,SAAS,MAAM,KAAK,MAAM,EAAE;AAAA,IACpE,OAAO;AACL,UAAI,QAAS,SAAQ,IAAI,UAAU,SAAS,iCAAiC;AAAA,IAC/E;AAAA,EACF;AAEA,MAAI,SAAS,WAAW,GAAG;AACzB,QAAI,QAAS,SAAQ,IAAI,wBAAwB;AACjD,WAAO;AAAA,EACT;AAGA,QAAM,SAAS,aAAa,UAAU,cAAc;AACpD,SAAO;AACT;","names":[]}
package/package.json ADDED
@@ -0,0 +1,74 @@
1
+ {
2
+ "name": "@yigitahmetsahin/captcha-solver",
3
+ "version": "1.0.0",
4
+ "description": "AI-powered captcha solver using image preprocessing and OpenAI vision models",
5
+ "main": "dist/index.js",
6
+ "module": "dist/index.mjs",
7
+ "types": "dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.mjs",
12
+ "require": "./dist/index.js"
13
+ }
14
+ },
15
+ "files": [
16
+ "dist"
17
+ ],
18
+ "type": "module",
19
+ "scripts": {
20
+ "build": "tsup",
21
+ "solve": "tsx run.ts",
22
+ "benchmark": "tsx run.ts --benchmark 20",
23
+ "test": "vitest run",
24
+ "test:watch": "vitest",
25
+ "test:coverage": "vitest run --coverage",
26
+ "format": "prettier --write .",
27
+ "lint": "npm run format && eslint src --fix && tsc --noEmit -p tsconfig.check.json",
28
+ "lint:check": "prettier --check . && eslint src && tsc --noEmit -p tsconfig.check.json",
29
+ "prepublishOnly": "npm run build"
30
+ },
31
+ "keywords": [
32
+ "captcha",
33
+ "solver",
34
+ "ocr",
35
+ "openai",
36
+ "vision",
37
+ "image-processing",
38
+ "typescript"
39
+ ],
40
+ "author": "yigitahmetsahin",
41
+ "license": "MIT",
42
+ "repository": {
43
+ "type": "git",
44
+ "url": "git+https://github.com/yigitahmetsahin/captcha-solver.git"
45
+ },
46
+ "bugs": {
47
+ "url": "https://github.com/yigitahmetsahin/captcha-solver/issues"
48
+ },
49
+ "homepage": "https://github.com/yigitahmetsahin/captcha-solver#readme",
50
+ "dependencies": {
51
+ "dotenv": "^16.4.7",
52
+ "openai": "^4.77.0",
53
+ "sharp": "^0.33.5"
54
+ },
55
+ "devDependencies": {
56
+ "@eslint/js": "^9.39.2",
57
+ "@types/node": "^22.10.0",
58
+ "@vitest/coverage-v8": "^4.0.18",
59
+ "eslint": "^9.39.2",
60
+ "eslint-config-prettier": "^10.1.8",
61
+ "prettier": "^3.8.1",
62
+ "tsup": "^8.5.1",
63
+ "tsx": "^4.19.0",
64
+ "typescript": "^5.7.0",
65
+ "typescript-eslint": "^8.53.1",
66
+ "vitest": "^4.0.17"
67
+ },
68
+ "engines": {
69
+ "node": ">=18"
70
+ },
71
+ "publishConfig": {
72
+ "access": "public"
73
+ }
74
+ }