@yigitahmetsahin/captcha-solver 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +136 -71
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +53 -8
- package/dist/index.d.ts +53 -8
- package/dist/index.js +134 -70
- package/dist/index.js.map +1 -1
- package/package.json +23 -3
package/dist/index.cjs
CHANGED
|
@@ -30,25 +30,30 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
30
30
|
// src/index.ts
|
|
31
31
|
var index_exports = {};
|
|
32
32
|
__export(index_exports, {
|
|
33
|
+
Solver: () => Solver,
|
|
33
34
|
imageToBase64: () => imageToBase64,
|
|
34
35
|
preprocessCaptcha: () => preprocessCaptcha,
|
|
35
|
-
|
|
36
|
+
preprocessCaptchaToBuffer: () => preprocessCaptchaToBuffer
|
|
36
37
|
});
|
|
37
38
|
module.exports = __toCommonJS(index_exports);
|
|
38
39
|
|
|
39
40
|
// src/solver.ts
|
|
40
|
-
var
|
|
41
|
+
var import_ai = require("ai");
|
|
41
42
|
|
|
42
43
|
// src/preprocess.ts
|
|
43
44
|
var import_fs = __toESM(require("fs"), 1);
|
|
44
45
|
var import_path = __toESM(require("path"), 1);
|
|
45
46
|
var import_sharp = __toESM(require("sharp"), 1);
|
|
46
|
-
async function preprocessCaptcha(
|
|
47
|
-
const
|
|
48
|
-
|
|
47
|
+
async function preprocessCaptcha(input) {
|
|
48
|
+
const buf = await preprocessCaptchaToBuffer(input);
|
|
49
|
+
return buf.toString("base64");
|
|
50
|
+
}
|
|
51
|
+
async function preprocessCaptchaToBuffer(input) {
|
|
52
|
+
const source = typeof input === "string" ? import_path.default.resolve(input) : input;
|
|
53
|
+
const metadata = await (0, import_sharp.default)(source).metadata();
|
|
49
54
|
const origW = metadata.width;
|
|
50
55
|
const origH = metadata.height;
|
|
51
|
-
const smoothed = await (0, import_sharp.default)(
|
|
56
|
+
const smoothed = await (0, import_sharp.default)(source).blur(1.5).greyscale().toBuffer();
|
|
52
57
|
const upscaled = await (0, import_sharp.default)(smoothed).resize(origW * 4, origH * 4, { kernel: "lanczos3" }).toBuffer();
|
|
53
58
|
const stats = await (0, import_sharp.default)(upscaled).stats();
|
|
54
59
|
const mean = stats.channels[0].mean;
|
|
@@ -61,14 +66,13 @@ async function preprocessCaptcha(imagePath) {
|
|
|
61
66
|
const cropBottom = Math.floor(scaledH * 0.6);
|
|
62
67
|
const cropW = cropRight - cropLeft;
|
|
63
68
|
const cropH = cropBottom - cropTop;
|
|
64
|
-
|
|
69
|
+
return (0, import_sharp.default)(enhanced).extract({ left: cropLeft, top: cropTop, width: cropW, height: cropH }).extend({
|
|
65
70
|
top: 20,
|
|
66
71
|
bottom: 20,
|
|
67
72
|
left: 30,
|
|
68
73
|
right: 30,
|
|
69
74
|
background: { r: 255, g: 255, b: 255 }
|
|
70
75
|
}).png().toBuffer();
|
|
71
|
-
return result.toString("base64");
|
|
72
76
|
}
|
|
73
77
|
function imageToBase64(imagePath) {
|
|
74
78
|
const buffer = import_fs.default.readFileSync(imagePath);
|
|
@@ -81,60 +85,40 @@ The text contains uppercase letters A-Z and/or digits 0-9.
|
|
|
81
85
|
A thin vertical stroke is the digit 1. Never read it as the letter I or L.
|
|
82
86
|
A round closed shape is the letter O, not the letter D.
|
|
83
87
|
Output ONLY the exact characters you read, nothing else.`;
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
content: [
|
|
95
|
-
{ type: "text", text: PROMPT },
|
|
96
|
-
{
|
|
97
|
-
type: "image_url",
|
|
98
|
-
image_url: {
|
|
99
|
-
url: `data:image/png;base64,${base64Image}`
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
]
|
|
103
|
-
}
|
|
104
|
-
],
|
|
105
|
-
temperature: 1,
|
|
106
|
-
...tokenParam
|
|
107
|
-
});
|
|
108
|
-
const raw = response.choices[0]?.message?.content?.trim() ?? "";
|
|
109
|
-
const lower = raw.toLowerCase();
|
|
110
|
-
if (lower.includes("sorry") || lower.includes("can't help") || lower.includes("cannot help") || lower.includes("unable to") || lower.includes("i can't") || raw.length > 20) {
|
|
111
|
-
return null;
|
|
112
|
-
}
|
|
113
|
-
const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, "");
|
|
114
|
-
return cleaned || null;
|
|
115
|
-
} catch (_err) {
|
|
116
|
-
if (retry < maxRetries) {
|
|
117
|
-
await new Promise((r) => setTimeout(r, 1e3 * (retry + 1)));
|
|
118
|
-
continue;
|
|
119
|
-
}
|
|
120
|
-
return null;
|
|
88
|
+
var DEFAULT_MODELS = {
|
|
89
|
+
openai: "gpt-4o",
|
|
90
|
+
anthropic: "claude-sonnet-4-20250514",
|
|
91
|
+
google: "gemini-2.0-flash"
|
|
92
|
+
};
|
|
93
|
+
async function resolveModel(apiKey, provider, modelId) {
|
|
94
|
+
switch (provider) {
|
|
95
|
+
case "openai": {
|
|
96
|
+
const { createOpenAI } = await import("@ai-sdk/openai");
|
|
97
|
+
return createOpenAI({ apiKey })(modelId);
|
|
121
98
|
}
|
|
99
|
+
case "anthropic": {
|
|
100
|
+
const { createAnthropic } = await import("@ai-sdk/anthropic");
|
|
101
|
+
return createAnthropic({ apiKey })(modelId);
|
|
102
|
+
}
|
|
103
|
+
case "google": {
|
|
104
|
+
const { createGoogleGenerativeAI } = await import("@ai-sdk/google");
|
|
105
|
+
return createGoogleGenerativeAI({ apiKey })(modelId);
|
|
106
|
+
}
|
|
107
|
+
default:
|
|
108
|
+
throw new Error(
|
|
109
|
+
`Unknown provider "${provider}". Install the matching @ai-sdk/* package and pass the model directly.`
|
|
110
|
+
);
|
|
122
111
|
}
|
|
123
|
-
return null;
|
|
124
112
|
}
|
|
125
113
|
var CONFUSION_GROUPS = {
|
|
126
|
-
// Thin vertical strokes → digit 1
|
|
127
114
|
"1": "1",
|
|
128
115
|
I: "1",
|
|
129
116
|
L: "1",
|
|
130
|
-
// Round shapes → letter O
|
|
131
117
|
O: "O",
|
|
132
118
|
D: "O",
|
|
133
119
|
"0": "O",
|
|
134
|
-
// Similar curves
|
|
135
120
|
S: "S",
|
|
136
121
|
"5": "S",
|
|
137
|
-
// Straight edges
|
|
138
122
|
Z: "Z",
|
|
139
123
|
"2": "Z"
|
|
140
124
|
};
|
|
@@ -182,34 +166,115 @@ function majorityVote(attempts, expectedLength) {
|
|
|
182
166
|
}
|
|
183
167
|
return result.join("");
|
|
184
168
|
}
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
169
|
+
var Solver = class {
|
|
170
|
+
_model = null;
|
|
171
|
+
_pendingModel = null;
|
|
172
|
+
/**
|
|
173
|
+
* Create a captcha solver.
|
|
174
|
+
*
|
|
175
|
+
* @example
|
|
176
|
+
* // Simple — defaults to OpenAI gpt-4o
|
|
177
|
+
* const solver = new Solver('sk-...');
|
|
178
|
+
*
|
|
179
|
+
* @example
|
|
180
|
+
* // Specify provider and model
|
|
181
|
+
* const solver = new Solver('sk-ant-...', { provider: 'anthropic', model: 'claude-sonnet-4-20250514' });
|
|
182
|
+
*
|
|
183
|
+
* @example
|
|
184
|
+
* // Pass an AI SDK model directly
|
|
185
|
+
* import { createOpenAI } from '@ai-sdk/openai';
|
|
186
|
+
* const openai = createOpenAI({ apiKey: 'sk-...' });
|
|
187
|
+
* const solver = new Solver(openai('gpt-4o'));
|
|
188
|
+
*/
|
|
189
|
+
constructor(keyOrModel, options) {
|
|
190
|
+
if (typeof keyOrModel === "string") {
|
|
191
|
+
const provider = options?.provider ?? "openai";
|
|
192
|
+
const modelId = options?.model ?? DEFAULT_MODELS[provider];
|
|
193
|
+
this._pendingModel = resolveModel(keyOrModel, provider, modelId);
|
|
198
194
|
} else {
|
|
199
|
-
|
|
195
|
+
this._model = keyOrModel;
|
|
200
196
|
}
|
|
201
197
|
}
|
|
202
|
-
|
|
203
|
-
if (
|
|
204
|
-
|
|
198
|
+
async getModel() {
|
|
199
|
+
if (this._model) return this._model;
|
|
200
|
+
this._model = await this._pendingModel;
|
|
201
|
+
this._pendingModel = null;
|
|
202
|
+
return this._model;
|
|
205
203
|
}
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
204
|
+
/**
|
|
205
|
+
* Solve a captcha image.
|
|
206
|
+
*
|
|
207
|
+
* @param input - File path (string) or raw image Buffer
|
|
208
|
+
* @param options - Solve options (attempts, expected length, etc.)
|
|
209
|
+
* @returns The captcha text
|
|
210
|
+
*/
|
|
211
|
+
async solve(input, options = {}) {
|
|
212
|
+
const { numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;
|
|
213
|
+
const model = await this.getModel();
|
|
214
|
+
const imageBuffer = await preprocessCaptchaToBuffer(input);
|
|
215
|
+
const attempts = [];
|
|
216
|
+
const maxTotalCalls = numAttempts + 4;
|
|
217
|
+
let callCount = 0;
|
|
218
|
+
while (attempts.length < numAttempts && callCount < maxTotalCalls) {
|
|
219
|
+
callCount++;
|
|
220
|
+
const result = await this.singleAttempt(model, imageBuffer, maxRetries);
|
|
221
|
+
if (result) {
|
|
222
|
+
attempts.push(result);
|
|
223
|
+
if (verbose) console.log(` Attempt ${attempts.length}: ${result}`);
|
|
224
|
+
} else {
|
|
225
|
+
if (verbose) console.log(` Call ${callCount}: (refused/failed, retrying...)`);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
if (attempts.length === 0) {
|
|
229
|
+
if (verbose) console.log(" All attempts failed!");
|
|
230
|
+
return "";
|
|
231
|
+
}
|
|
232
|
+
return majorityVote(attempts, expectedLength);
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Make a single API call to read the captcha.
|
|
236
|
+
* Retries up to `maxRetries` times on failure.
|
|
237
|
+
*/
|
|
238
|
+
async singleAttempt(model, imageBuffer, maxRetries) {
|
|
239
|
+
for (let retry = 0; retry <= maxRetries; retry++) {
|
|
240
|
+
try {
|
|
241
|
+
const { text } = await (0, import_ai.generateText)({
|
|
242
|
+
model,
|
|
243
|
+
messages: [
|
|
244
|
+
{
|
|
245
|
+
role: "user",
|
|
246
|
+
content: [
|
|
247
|
+
{ type: "text", text: PROMPT },
|
|
248
|
+
{ type: "image", image: imageBuffer }
|
|
249
|
+
]
|
|
250
|
+
}
|
|
251
|
+
],
|
|
252
|
+
temperature: 1,
|
|
253
|
+
maxOutputTokens: 256
|
|
254
|
+
});
|
|
255
|
+
const raw = text.trim();
|
|
256
|
+
const lower = raw.toLowerCase();
|
|
257
|
+
if (lower.includes("sorry") || lower.includes("can't help") || lower.includes("cannot help") || lower.includes("unable to") || lower.includes("i can't") || raw.length > 20) {
|
|
258
|
+
return null;
|
|
259
|
+
}
|
|
260
|
+
const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, "");
|
|
261
|
+
return cleaned || null;
|
|
262
|
+
} catch (_err) {
|
|
263
|
+
if (retry < maxRetries) {
|
|
264
|
+
await new Promise((r) => setTimeout(r, 1e3 * (retry + 1)));
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
return null;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
return null;
|
|
271
|
+
}
|
|
272
|
+
};
|
|
209
273
|
// Annotate the CommonJS export names for ESM import in node:
|
|
210
274
|
0 && (module.exports = {
|
|
275
|
+
Solver,
|
|
211
276
|
imageToBase64,
|
|
212
277
|
preprocessCaptcha,
|
|
213
|
-
|
|
278
|
+
preprocessCaptchaToBuffer
|
|
214
279
|
});
|
|
215
280
|
//# sourceMappingURL=index.cjs.map
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts","../src/solver.ts","../src/preprocess.ts"],"sourcesContent":["export { solveCaptchaImage } from './solver.js';\nexport { preprocessCaptcha, imageToBase64 } from './preprocess.js';\n","import OpenAI from 'openai';\nimport { preprocessCaptcha } from './preprocess.js';\n\nconst PROMPT = `You are an assistant helping a visually impaired person read distorted text from an image.\nThe text contains uppercase letters A-Z and/or digits 0-9.\nA thin vertical stroke is the digit 1. Never read it as the letter I or L.\nA round closed shape is the letter O, not the letter D.\nOutput ONLY the exact characters you read, nothing else.`;\n\ninterface SolverOptions {\n /** OpenAI model to use (default: \"o3\") */\n model?: string;\n /** Number of voting attempts (default: 5) */\n numAttempts?: number;\n /** Expected captcha length — results of other lengths are discarded (default: undefined = no filter) */\n expectedLength?: number;\n /** Max retries per attempt on API failure (default: 2) */\n maxRetries?: number;\n /** Whether to log attempt details (default: true) */\n verbose?: boolean;\n}\n\n/**\n * Make a single API call to read the captcha.\n * Retries up to `maxRetries` times on failure.\n */\nasync function singleAttempt(\n client: OpenAI,\n base64Image: string,\n model: string,\n maxRetries: number\n): Promise<string | null> {\n for (let retry = 0; retry <= maxRetries; retry++) {\n try {\n // Reasoning models (o3, o4-mini) use max_completion_tokens;\n // Standard models (gpt-4o, gpt-4.1, gpt-5.4-mini) use max_tokens.\n const isReasoningModel = model.startsWith('o');\n const tokenParam = isReasoningModel ? { max_completion_tokens: 2000 } : { max_tokens: 256 };\n\n const response = await client.chat.completions.create({\n model,\n messages: [\n {\n role: 'user',\n content: [\n { type: 'text', text: PROMPT },\n {\n type: 'image_url',\n image_url: {\n url: `data:image/png;base64,${base64Image}`,\n },\n },\n ],\n },\n ],\n temperature: 1,\n ...tokenParam,\n });\n\n const raw = response.choices[0]?.message?.content?.trim() ?? '';\n\n // Detect refusals\n const lower = raw.toLowerCase();\n if (\n lower.includes('sorry') ||\n lower.includes(\"can't help\") ||\n lower.includes('cannot help') ||\n lower.includes('unable to') ||\n lower.includes(\"i can't\") ||\n raw.length > 20\n ) {\n return null; // Model refused — don't count as an attempt\n }\n\n // Clean: keep only uppercase letters and digits\n const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, '');\n return cleaned || null;\n } catch (_err) {\n if (retry < maxRetries) {\n // Wait briefly before retry\n await new Promise((r) => setTimeout(r, 1000 * (retry + 1)));\n continue;\n }\n return null;\n }\n }\n return null;\n}\n\n/**\n * Confusion groups: characters the model commonly misreads as each other.\n * Each group maps to its canonical (most likely correct) character.\n */\nconst CONFUSION_GROUPS: Record<string, string> = {\n // Thin vertical strokes → digit 1\n '1': '1',\n I: '1',\n L: '1',\n // Round shapes → letter O\n O: 'O',\n D: 'O',\n '0': 'O',\n // Similar curves\n S: 'S',\n '5': 'S',\n // Straight edges\n Z: 'Z',\n '2': 'Z',\n};\n\n/**\n * Character-level majority vote across multiple attempts.\n *\n * Uses confusion-aware voting: characters that the model commonly\n * confuses (e.g. 1/I/L, O/D/0) are grouped together during counting.\n * The canonical character for the winning group is used.\n */\nfunction majorityVote(attempts: string[], expectedLength?: number): string {\n // Filter to expected length if specified\n let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;\n\n // If length filter removed everything, fall back to most common length\n if (filtered.length === 0) {\n filtered = attempts;\n }\n\n if (filtered.length === 0) return '';\n\n // Find most common length\n const lenCounts = new Map<number, number>();\n for (const a of filtered) {\n lenCounts.set(a.length, (lenCounts.get(a.length) ?? 0) + 1);\n }\n let bestLen = 0;\n let bestCount = 0;\n for (const [len, count] of lenCounts) {\n if (count > bestCount) {\n bestLen = len;\n bestCount = count;\n }\n }\n\n const sameLenAttempts = filtered.filter((a) => a.length === bestLen);\n if (sameLenAttempts.length === 0) return filtered[0];\n\n // Vote per character position with confusion-aware grouping\n const result: string[] = [];\n for (let pos = 0; pos < bestLen; pos++) {\n // Count raw characters\n const charCounts = new Map<string, number>();\n for (const a of sameLenAttempts) {\n const ch = a[pos];\n charCounts.set(ch, (charCounts.get(ch) ?? 0) + 1);\n }\n\n // Group by canonical form and sum counts\n const groupCounts = new Map<string, number>();\n for (const [ch, count] of charCounts) {\n const canonical = CONFUSION_GROUPS[ch] ?? ch;\n groupCounts.set(canonical, (groupCounts.get(canonical) ?? 0) + count);\n }\n\n // Pick the group with the highest combined count\n let bestGroup = '';\n let bestGroupCount = 0;\n for (const [canonical, count] of groupCounts) {\n if (count > bestGroupCount) {\n bestGroup = canonical;\n bestGroupCount = count;\n }\n }\n\n result.push(bestGroup);\n }\n\n return result.join('');\n}\n\n/**\n * Solve a captcha image using OpenAI vision + preprocessing + majority voting.\n */\nexport async function solveCaptchaImage(\n imagePath: string,\n options: SolverOptions = {}\n): Promise<string> {\n const { model = 'o3', numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;\n\n const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });\n\n // Preprocess the image\n const base64Processed = await preprocessCaptcha(imagePath);\n\n // Run attempts — retry refusals/failures to guarantee numAttempts valid results\n const attempts: string[] = [];\n const maxTotalCalls = numAttempts + 4; // allow up to 4 extra calls for refusals\n let callCount = 0;\n while (attempts.length < numAttempts && callCount < maxTotalCalls) {\n callCount++;\n const result = await singleAttempt(client, base64Processed, model, maxRetries);\n if (result) {\n attempts.push(result);\n if (verbose) console.log(` Attempt ${attempts.length}: ${result}`);\n } else {\n if (verbose) console.log(` Call ${callCount}: (refused/failed, retrying...)`);\n }\n }\n\n if (attempts.length === 0) {\n if (verbose) console.log(' All attempts failed!');\n return '';\n }\n\n // Majority vote\n const answer = majorityVote(attempts, expectedLength);\n return answer;\n}\n","import fs from 'fs';\nimport path from 'path';\nimport sharp from 'sharp';\n\n/**\n * Preprocess a captcha image using sharp (libvips).\n *\n * Pipeline:\n * 1. Gaussian blur in color space (smooths dither pattern)\n * 2. Grayscale conversion\n * 3. Upscale 4× with Lanczos\n * 4. Contrast boost (3× around image mean) + sharpen\n * 5. Crop decorative borders\n * 6. Add white padding\n *\n * Returns a base64-encoded PNG string.\n */\nexport async function preprocessCaptcha(imagePath: string): Promise<string> {\n const absPath = path.resolve(imagePath);\n\n // Read original dimensions for crop/resize calculations\n const metadata = await sharp(absPath).metadata();\n const origW = metadata.width!;\n const origH = metadata.height!;\n\n // Step 1-2: Blur in color space (smooths dither pattern) → greyscale\n // Separate from resize to prevent pipeline reordering\n const smoothed = await sharp(absPath).blur(1.5).greyscale().toBuffer();\n\n // Step 3: Upscale 4× with Lanczos\n const upscaled = await sharp(smoothed)\n .resize(origW * 4, origH * 4, { kernel: 'lanczos3' })\n .toBuffer();\n\n // Step 4: Contrast 3× around actual image mean + sharpen\n // Matches PIL's ImageEnhance.Contrast: output = factor*input + mean*(1-factor)\n const stats = await sharp(upscaled).stats();\n const mean = stats.channels[0].mean;\n const enhanced = await sharp(upscaled)\n .linear(3.0, mean * (1 - 3.0))\n .sharpen({ sigma: 1.0, m1: 2.0, m2: 1.0 })\n .toBuffer();\n\n // Step 5: Crop decorative borders\n // Remove 10% left/right, 2% top, 40% bottom (keep top 60%)\n // Math.floor matches Python's int() truncation\n const scaledW = origW * 4;\n const scaledH = origH * 4;\n const cropLeft = Math.floor(scaledW * 0.1);\n const cropTop = Math.floor(scaledH * 0.02);\n const cropRight = Math.floor(scaledW * 0.9);\n const cropBottom = Math.floor(scaledH * 0.6);\n const cropW = cropRight - cropLeft;\n const cropH = cropBottom - cropTop;\n\n // Step 5-6: Crop → add white padding → output PNG\n const result = await sharp(enhanced)\n .extract({ left: cropLeft, top: cropTop, width: cropW, height: cropH })\n .extend({\n top: 20,\n bottom: 20,\n left: 30,\n right: 30,\n background: { r: 255, g: 255, b: 255 },\n })\n .png()\n .toBuffer();\n\n return result.toString('base64');\n}\n\n/**\n * Read an image file and return its base64-encoded content.\n */\nexport function imageToBase64(imagePath: string): string {\n const buffer = fs.readFileSync(imagePath);\n return buffer.toString('base64');\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,oBAAmB;;;ACAnB,gBAAe;AACf,kBAAiB;AACjB,mBAAkB;AAelB,eAAsB,kBAAkB,WAAoC;AAC1E,QAAM,UAAU,YAAAA,QAAK,QAAQ,SAAS;AAGtC,QAAM,WAAW,UAAM,aAAAC,SAAM,OAAO,EAAE,SAAS;AAC/C,QAAM,QAAQ,SAAS;AACvB,QAAM,QAAQ,SAAS;AAIvB,QAAM,WAAW,UAAM,aAAAA,SAAM,OAAO,EAAE,KAAK,GAAG,EAAE,UAAU,EAAE,SAAS;AAGrE,QAAM,WAAW,UAAM,aAAAA,SAAM,QAAQ,EAClC,OAAO,QAAQ,GAAG,QAAQ,GAAG,EAAE,QAAQ,WAAW,CAAC,EACnD,SAAS;AAIZ,QAAM,QAAQ,UAAM,aAAAA,SAAM,QAAQ,EAAE,MAAM;AAC1C,QAAM,OAAO,MAAM,SAAS,CAAC,EAAE;AAC/B,QAAM,WAAW,UAAM,aAAAA,SAAM,QAAQ,EAClC,OAAO,GAAK,QAAQ,IAAI,EAAI,EAC5B,QAAQ,EAAE,OAAO,GAAK,IAAI,GAAK,IAAI,EAAI,CAAC,EACxC,SAAS;AAKZ,QAAM,UAAU,QAAQ;AACxB,QAAM,UAAU,QAAQ;AACxB,QAAM,WAAW,KAAK,MAAM,UAAU,GAAG;AACzC,QAAM,UAAU,KAAK,MAAM,UAAU,IAAI;AACzC,QAAM,YAAY,KAAK,MAAM,UAAU,GAAG;AAC1C,QAAM,aAAa,KAAK,MAAM,UAAU,GAAG;AAC3C,QAAM,QAAQ,YAAY;AAC1B,QAAM,QAAQ,aAAa;AAG3B,QAAM,SAAS,UAAM,aAAAA,SAAM,QAAQ,EAChC,QAAQ,EAAE,MAAM,UAAU,KAAK,SAAS,OAAO,OAAO,QAAQ,MAAM,CAAC,EACrE,OAAO;AAAA,IACN,KAAK;AAAA,IACL,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,OAAO;AAAA,IACP,YAAY,EAAE,GAAG,KAAK,GAAG,KAAK,GAAG,IAAI;AAAA,EACvC,CAAC,EACA,IAAI,EACJ,SAAS;AAEZ,SAAO,OAAO,SAAS,QAAQ;AACjC;AAKO,SAAS,cAAc,WAA2B;AACvD,QAAM,SAAS,UAAAC,QAAG,aAAa,SAAS;AACxC,SAAO,OAAO,SAAS,QAAQ;AACjC;;;AD1EA,IAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAuBf,eAAe,cACb,QACA,aACA,OACA,YACwB;AACxB,WAAS,QAAQ,GAAG,SAAS,YAAY,SAAS;AAChD,QAAI;AAGF,YAAM,mBAAmB,MAAM,WAAW,GAAG;AAC7C,YAAM,aAAa,mBAAmB,EAAE,uBAAuB,IAAK,IAAI,EAAE,YAAY,IAAI;AAE1F,YAAM,WAAW,MAAM,OAAO,KAAK,YAAY,OAAO;AAAA,QACpD;AAAA,QACA,UAAU;AAAA,UACR;AAAA,YACE,MAAM;AAAA,YACN,SAAS;AAAA,cACP,EAAE,MAAM,QAAQ,MAAM,OAAO;AAAA,cAC7B;AAAA,gBACE,MAAM;AAAA,gBACN,WAAW;AAAA,kBACT,KAAK,yBAAyB,WAAW;AAAA,gBAC3C;AAAA,cACF;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,QACA,aAAa;AAAA,QACb,GAAG;AAAA,MACL,CAAC;AAED,YAAM,MAAM,SAAS,QAAQ,CAAC,GAAG,SAAS,SAAS,KAAK,KAAK;AAG7D,YAAM,QAAQ,IAAI,YAAY;AAC9B,UACE,MAAM,SAAS,OAAO,KACtB,MAAM,SAAS,YAAY,KAC3B,MAAM,SAAS,aAAa,KAC5B,MAAM,SAAS,WAAW,KAC1B,MAAM,SAAS,SAAS,KACxB,IAAI,SAAS,IACb;AACA,eAAO;AAAA,MACT;AAGA,YAAM,UAAU,IAAI,YAAY,EAAE,QAAQ,cAAc,EAAE;AAC1D,aAAO,WAAW;AAAA,IACpB,SAAS,MAAM;AACb,UAAI,QAAQ,YAAY;AAEtB,cAAM,IAAI,QAAQ,CAAC,MAAM,WAAW,GAAG,OAAQ,QAAQ,EAAE,CAAC;AAC1D;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACA,SAAO;AACT;AAMA,IAAM,mBAA2C;AAAA;AAAA,EAE/C,KAAK;AAAA,EACL,GAAG;AAAA,EACH,GAAG;AAAA;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,KAAK;AAAA;AAAA,EAEL,GAAG;AAAA,EACH,KAAK;AAAA;AAAA,EAEL,GAAG;AAAA,EACH,KAAK;AACP;AASA,SAAS,aAAa,UAAoB,gBAAiC;AAEzE,MAAI,WAAW,iBAAiB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,cAAc,IAAI;AAGtF,MAAI,SAAS,WAAW,GAAG;AACzB,eAAW;AAAA,EACb;AAEA,MAAI,SAAS,WAAW,EAAG,QAAO;AAGlC,QAAM,YAAY,oBAAI,IAAoB;AAC1C,aAAW,KAAK,UAAU;AACxB,cAAU,IAAI,EAAE,SAAS,UAAU,IAAI,EAAE,MAAM,KAAK,KAAK,CAAC;AAAA,EAC5D;AACA,MAAI,UAAU;AACd,MAAI,YAAY;AAChB,aAAW,CAAC,KAAK,KAAK,KAAK,WAAW;AACpC,QAAI,QAAQ,WAAW;AACrB,gBAAU;AACV,kBAAY;AAAA,IACd;AAAA,EACF;AAEA,QAAM,kBAAkB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO;AACnE,MAAI,gBAAgB,WAAW,EAAG,QAAO,SAAS,CAAC;AAGnD,QAAM,SAAmB,CAAC;AAC1B,WAAS,MAAM,GAAG,MAAM,SAAS,OAAO;AAEtC,UAAM,aAAa,oBAAI,IAAoB;AAC3C,eAAW,KAAK,iBAAiB;AAC/B,YAAM,KAAK,EAAE,GAAG;AAChB,iBAAW,IAAI,KAAK,WAAW,IAAI,EAAE,KAAK,KAAK,CAAC;AAAA,IAClD;AAGA,UAAM,cAAc,oBAAI,IAAoB;AAC5C,eAAW,CAAC,IAAI,KAAK,KAAK,YAAY;AACpC,YAAM,YAAY,iBAAiB,EAAE,KAAK;AAC1C,kBAAY,IAAI,YAAY,YAAY,IAAI,SAAS,KAAK,KAAK,KAAK;AAAA,IACtE;AAGA,QAAI,YAAY;AAChB,QAAI,iBAAiB;AACrB,eAAW,CAAC,WAAW,KAAK,KAAK,aAAa;AAC5C,UAAI,QAAQ,gBAAgB;AAC1B,oBAAY;AACZ,yBAAiB;AAAA,MACnB;AAAA,IACF;AAEA,WAAO,KAAK,SAAS;AAAA,EACvB;AAEA,SAAO,OAAO,KAAK,EAAE;AACvB;AAKA,eAAsB,kBACpB,WACA,UAAyB,CAAC,GACT;AACjB,QAAM,EAAE,QAAQ,MAAM,cAAc,GAAG,gBAAgB,aAAa,GAAG,UAAU,KAAK,IAAI;AAE1F,QAAM,SAAS,IAAI,cAAAC,QAAO,EAAE,QAAQ,QAAQ,IAAI,eAAe,CAAC;AAGhE,QAAM,kBAAkB,MAAM,kBAAkB,SAAS;AAGzD,QAAM,WAAqB,CAAC;AAC5B,QAAM,gBAAgB,cAAc;AACpC,MAAI,YAAY;AAChB,SAAO,SAAS,SAAS,eAAe,YAAY,eAAe;AACjE;AACA,UAAM,SAAS,MAAM,cAAc,QAAQ,iBAAiB,OAAO,UAAU;AAC7E,QAAI,QAAQ;AACV,eAAS,KAAK,MAAM;AACpB,UAAI,QAAS,SAAQ,IAAI,aAAa,SAAS,MAAM,KAAK,MAAM,EAAE;AAAA,IACpE,OAAO;AACL,UAAI,QAAS,SAAQ,IAAI,UAAU,SAAS,iCAAiC;AAAA,IAC/E;AAAA,EACF;AAEA,MAAI,SAAS,WAAW,GAAG;AACzB,QAAI,QAAS,SAAQ,IAAI,wBAAwB;AACjD,WAAO;AAAA,EACT;AAGA,QAAM,SAAS,aAAa,UAAU,cAAc;AACpD,SAAO;AACT;","names":["path","sharp","fs","OpenAI"]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/solver.ts","../src/preprocess.ts"],"sourcesContent":["export { Solver } from './solver.js';\nexport type { SolverOptions, SolveOptions, Provider } from './solver.js';\nexport { preprocessCaptcha, preprocessCaptchaToBuffer, imageToBase64 } from './preprocess.js';\n","import type { LanguageModel } from 'ai';\nimport { generateText } from 'ai';\nimport { preprocessCaptchaToBuffer } from './preprocess.js';\n\nconst PROMPT = `You are an assistant helping a visually impaired person read distorted text from an image.\nThe text contains uppercase letters A-Z and/or digits 0-9.\nA thin vertical stroke is the digit 1. Never read it as the letter I or L.\nA round closed shape is the letter O, not the letter D.\nOutput ONLY the exact characters you read, nothing else.`;\n\n// ── Types ────────────────────────────────────────────────────────────\n\nexport type Provider = 'openai' | 'anthropic' | 'google';\n\nexport interface SolverOptions {\n /** AI provider to use when constructing the model from an API key (default: \"openai\") */\n provider?: Provider;\n /** Model ID passed to the provider (default: \"gpt-4o\") */\n model?: string;\n}\n\nexport interface SolveOptions {\n /** Number of voting attempts (default: 5) */\n numAttempts?: number;\n /** Expected captcha length — results of other lengths are discarded */\n expectedLength?: number;\n /** Max retries per attempt on API failure (default: 2) */\n maxRetries?: number;\n /** Whether to log attempt details (default: true) */\n verbose?: boolean;\n}\n\n// ── Provider resolution ──────────────────────────────────────────────\n\nconst DEFAULT_MODELS: Record<Provider, string> = {\n openai: 'gpt-4o',\n anthropic: 'claude-sonnet-4-20250514',\n google: 'gemini-2.0-flash',\n};\n\nasync function resolveModel(\n apiKey: string,\n provider: Provider,\n modelId: string\n): Promise<LanguageModel> {\n switch (provider) {\n case 'openai': {\n const { createOpenAI } = await import('@ai-sdk/openai');\n return createOpenAI({ apiKey })(modelId);\n }\n case 'anthropic': {\n // @ts-expect-error — optional peer dependency\n const { createAnthropic } = await import('@ai-sdk/anthropic');\n return createAnthropic({ apiKey })(modelId);\n }\n case 'google': {\n // @ts-expect-error — optional peer dependency\n const { createGoogleGenerativeAI } = await import('@ai-sdk/google');\n return createGoogleGenerativeAI({ apiKey })(modelId);\n }\n default:\n throw new Error(\n `Unknown provider \"${provider}\". Install the matching @ai-sdk/* package and pass the model directly.`\n );\n }\n}\n\n// ── Confusion groups ─────────────────────────────────────────────────\n\n/**\n * Characters the model commonly misreads as each other.\n * Each group maps to its canonical (most likely correct) character.\n */\nconst CONFUSION_GROUPS: Record<string, string> = {\n '1': '1',\n I: '1',\n L: '1',\n O: 'O',\n D: 'O',\n '0': 'O',\n S: 'S',\n '5': 'S',\n Z: 'Z',\n '2': 'Z',\n};\n\n// ── Majority voting ──────────────────────────────────────────────────\n\n/**\n * Character-level majority vote across multiple attempts.\n * Uses confusion-aware voting: characters that the model commonly\n * confuses (e.g. 1/I/L, O/D/0) are grouped together during counting.\n */\nfunction majorityVote(attempts: string[], expectedLength?: number): string {\n let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;\n\n if (filtered.length === 0) {\n filtered = attempts;\n }\n if (filtered.length === 0) return '';\n\n // Find most common length\n const lenCounts = new Map<number, number>();\n for (const a of filtered) {\n lenCounts.set(a.length, (lenCounts.get(a.length) ?? 0) + 1);\n }\n let bestLen = 0;\n let bestCount = 0;\n for (const [len, count] of lenCounts) {\n if (count > bestCount) {\n bestLen = len;\n bestCount = count;\n }\n }\n\n const sameLenAttempts = filtered.filter((a) => a.length === bestLen);\n if (sameLenAttempts.length === 0) return filtered[0];\n\n // Vote per character position with confusion-aware grouping\n const result: string[] = [];\n for (let pos = 0; pos < bestLen; pos++) {\n const charCounts = new Map<string, number>();\n for (const a of sameLenAttempts) {\n const ch = a[pos];\n charCounts.set(ch, (charCounts.get(ch) ?? 0) + 1);\n }\n\n const groupCounts = new Map<string, number>();\n for (const [ch, count] of charCounts) {\n const canonical = CONFUSION_GROUPS[ch] ?? ch;\n groupCounts.set(canonical, (groupCounts.get(canonical) ?? 0) + count);\n }\n\n let bestGroup = '';\n let bestGroupCount = 0;\n for (const [canonical, count] of groupCounts) {\n if (count > bestGroupCount) {\n bestGroup = canonical;\n bestGroupCount = count;\n }\n }\n\n result.push(bestGroup);\n }\n\n return result.join('');\n}\n\n// ── Solver class ─────────────────────────────────────────────────────\n\nexport class Solver {\n private _model: LanguageModel | null = null;\n private _pendingModel: Promise<LanguageModel> | null = null;\n\n /**\n * Create a captcha solver.\n *\n * @example\n * // Simple — defaults to OpenAI gpt-4o\n * const solver = new Solver('sk-...');\n *\n * @example\n * // Specify provider and model\n * const solver = new Solver('sk-ant-...', { provider: 'anthropic', model: 'claude-sonnet-4-20250514' });\n *\n * @example\n * // Pass an AI SDK model directly\n * import { createOpenAI } from '@ai-sdk/openai';\n * const openai = createOpenAI({ apiKey: 'sk-...' });\n * const solver = new Solver(openai('gpt-4o'));\n */\n constructor(keyOrModel: string | LanguageModel, options?: SolverOptions) {\n if (typeof keyOrModel === 'string') {\n const provider = options?.provider ?? 'openai';\n const modelId = options?.model ?? DEFAULT_MODELS[provider];\n // Lazily resolve the model on first use\n this._pendingModel = resolveModel(keyOrModel, provider, modelId);\n } else {\n this._model = keyOrModel;\n }\n }\n\n private async getModel(): Promise<LanguageModel> {\n if (this._model) return this._model;\n this._model = await this._pendingModel!;\n this._pendingModel = null;\n return this._model;\n }\n\n /**\n * Solve a captcha image.\n *\n * @param input - File path (string) or raw image Buffer\n * @param options - Solve options (attempts, expected length, etc.)\n * @returns The captcha text\n */\n async solve(input: string | Buffer, options: SolveOptions = {}): Promise<string> {\n const { numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;\n\n const model = await this.getModel();\n const imageBuffer = await preprocessCaptchaToBuffer(input);\n\n // Run attempts — retry refusals/failures to guarantee numAttempts valid results\n const attempts: string[] = [];\n const maxTotalCalls = numAttempts + 4;\n let callCount = 0;\n\n while (attempts.length < numAttempts && callCount < maxTotalCalls) {\n callCount++;\n const result = await this.singleAttempt(model, imageBuffer, maxRetries);\n if (result) {\n attempts.push(result);\n if (verbose) console.log(` Attempt ${attempts.length}: ${result}`);\n } else {\n if (verbose) console.log(` Call ${callCount}: (refused/failed, retrying...)`);\n }\n }\n\n if (attempts.length === 0) {\n if (verbose) console.log(' All attempts failed!');\n return '';\n }\n\n return majorityVote(attempts, expectedLength);\n }\n\n /**\n * Make a single API call to read the captcha.\n * Retries up to `maxRetries` times on failure.\n */\n private async singleAttempt(\n model: LanguageModel,\n imageBuffer: Buffer,\n maxRetries: number\n ): Promise<string | null> {\n for (let retry = 0; retry <= maxRetries; retry++) {\n try {\n const { text } = await generateText({\n model,\n messages: [\n {\n role: 'user',\n content: [\n { type: 'text', text: PROMPT },\n { type: 'image', image: imageBuffer },\n ],\n },\n ],\n temperature: 1,\n maxOutputTokens: 256,\n });\n\n const raw = text.trim();\n\n // Detect refusals\n const lower = raw.toLowerCase();\n if (\n lower.includes('sorry') ||\n lower.includes(\"can't help\") ||\n lower.includes('cannot help') ||\n lower.includes('unable to') ||\n lower.includes(\"i can't\") ||\n raw.length > 20\n ) {\n return null;\n }\n\n // Clean: keep only uppercase letters and digits\n const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, '');\n return cleaned || null;\n } catch (_err) {\n if (retry < maxRetries) {\n await new Promise((r) => setTimeout(r, 1000 * (retry + 1)));\n continue;\n }\n return null;\n }\n }\n return null;\n }\n}\n","import fs from 'fs';\nimport path from 'path';\nimport sharp from 'sharp';\n\n/**\n * Preprocess a captcha image using sharp (libvips).\n *\n * Pipeline:\n * 1. Gaussian blur in color space (smooths dither pattern)\n * 2. Grayscale conversion\n * 3. Upscale 4× with Lanczos\n * 4. Contrast boost (3× around image mean) + sharpen\n * 5. Crop decorative borders\n * 6. Add white padding\n *\n * Accepts a file path or a raw image Buffer.\n * Returns a base64-encoded PNG string.\n */\nexport async function preprocessCaptcha(input: string | Buffer): Promise<string> {\n const buf = await preprocessCaptchaToBuffer(input);\n return buf.toString('base64');\n}\n\n/**\n * Same preprocessing pipeline as `preprocessCaptcha`, but returns the\n * resulting PNG as a raw Buffer (useful for AI SDK image content parts).\n */\nexport async function preprocessCaptchaToBuffer(input: string | Buffer): Promise<Buffer> {\n const source = typeof input === 'string' ? path.resolve(input) : input;\n\n // Read original dimensions for crop/resize calculations\n const metadata = await sharp(source).metadata();\n const origW = metadata.width!;\n const origH = metadata.height!;\n\n // Step 1-2: Blur in color space (smooths dither pattern) → greyscale\n // Separate from resize to prevent pipeline reordering\n const smoothed = await sharp(source).blur(1.5).greyscale().toBuffer();\n\n // Step 3: Upscale 4× with Lanczos\n const upscaled = await sharp(smoothed)\n .resize(origW * 4, origH * 4, { kernel: 'lanczos3' })\n .toBuffer();\n\n // Step 4: Contrast 3× around actual image mean + sharpen\n // Matches PIL's ImageEnhance.Contrast: output = factor*input + mean*(1-factor)\n const stats = await sharp(upscaled).stats();\n const mean = stats.channels[0].mean;\n const enhanced = await sharp(upscaled)\n .linear(3.0, mean * (1 - 3.0))\n .sharpen({ sigma: 1.0, m1: 2.0, m2: 1.0 })\n .toBuffer();\n\n // Step 5: Crop decorative borders\n // Remove 10% left/right, 2% top, 40% bottom (keep top 60%)\n // Math.floor matches Python's int() truncation\n const scaledW = origW * 4;\n const scaledH = origH * 4;\n const cropLeft = Math.floor(scaledW * 0.1);\n const cropTop = Math.floor(scaledH * 0.02);\n const cropRight = Math.floor(scaledW * 0.9);\n const cropBottom = Math.floor(scaledH * 0.6);\n const cropW = cropRight - cropLeft;\n const cropH = cropBottom - cropTop;\n\n // Step 5-6: Crop → add white padding → output PNG\n return sharp(enhanced)\n .extract({ left: cropLeft, top: cropTop, width: cropW, height: cropH })\n .extend({\n top: 20,\n bottom: 20,\n left: 30,\n right: 30,\n background: { r: 255, g: 255, b: 255 },\n })\n .png()\n .toBuffer();\n}\n\n/**\n * Read an image file and return its base64-encoded content.\n */\nexport function imageToBase64(imagePath: string): string {\n const buffer = fs.readFileSync(imagePath);\n return buffer.toString('base64');\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACCA,gBAA6B;;;ACD7B,gBAAe;AACf,kBAAiB;AACjB,mBAAkB;AAgBlB,eAAsB,kBAAkB,OAAyC;AAC/E,QAAM,MAAM,MAAM,0BAA0B,KAAK;AACjD,SAAO,IAAI,SAAS,QAAQ;AAC9B;AAMA,eAAsB,0BAA0B,OAAyC;AACvF,QAAM,SAAS,OAAO,UAAU,WAAW,YAAAA,QAAK,QAAQ,KAAK,IAAI;AAGjE,QAAM,WAAW,UAAM,aAAAC,SAAM,MAAM,EAAE,SAAS;AAC9C,QAAM,QAAQ,SAAS;AACvB,QAAM,QAAQ,SAAS;AAIvB,QAAM,WAAW,UAAM,aAAAA,SAAM,MAAM,EAAE,KAAK,GAAG,EAAE,UAAU,EAAE,SAAS;AAGpE,QAAM,WAAW,UAAM,aAAAA,SAAM,QAAQ,EAClC,OAAO,QAAQ,GAAG,QAAQ,GAAG,EAAE,QAAQ,WAAW,CAAC,EACnD,SAAS;AAIZ,QAAM,QAAQ,UAAM,aAAAA,SAAM,QAAQ,EAAE,MAAM;AAC1C,QAAM,OAAO,MAAM,SAAS,CAAC,EAAE;AAC/B,QAAM,WAAW,UAAM,aAAAA,SAAM,QAAQ,EAClC,OAAO,GAAK,QAAQ,IAAI,EAAI,EAC5B,QAAQ,EAAE,OAAO,GAAK,IAAI,GAAK,IAAI,EAAI,CAAC,EACxC,SAAS;AAKZ,QAAM,UAAU,QAAQ;AACxB,QAAM,UAAU,QAAQ;AACxB,QAAM,WAAW,KAAK,MAAM,UAAU,GAAG;AACzC,QAAM,UAAU,KAAK,MAAM,UAAU,IAAI;AACzC,QAAM,YAAY,KAAK,MAAM,UAAU,GAAG;AAC1C,QAAM,aAAa,KAAK,MAAM,UAAU,GAAG;AAC3C,QAAM,QAAQ,YAAY;AAC1B,QAAM,QAAQ,aAAa;AAG3B,aAAO,aAAAA,SAAM,QAAQ,EAClB,QAAQ,EAAE,MAAM,UAAU,KAAK,SAAS,OAAO,OAAO,QAAQ,MAAM,CAAC,EACrE,OAAO;AAAA,IACN,KAAK;AAAA,IACL,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,OAAO;AAAA,IACP,YAAY,EAAE,GAAG,KAAK,GAAG,KAAK,GAAG,IAAI;AAAA,EACvC,CAAC,EACA,IAAI,EACJ,SAAS;AACd;AAKO,SAAS,cAAc,WAA2B;AACvD,QAAM,SAAS,UAAAC,QAAG,aAAa,SAAS;AACxC,SAAO,OAAO,SAAS,QAAQ;AACjC;;;ADjFA,IAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AA8Bf,IAAM,iBAA2C;AAAA,EAC/C,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,QAAQ;AACV;AAEA,eAAe,aACb,QACA,UACA,SACwB;AACxB,UAAQ,UAAU;AAAA,IAChB,KAAK,UAAU;AACb,YAAM,EAAE,aAAa,IAAI,MAAM,OAAO,gBAAgB;AACtD,aAAO,aAAa,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IACzC;AAAA,IACA,KAAK,aAAa;AAEhB,YAAM,EAAE,gBAAgB,IAAI,MAAM,OAAO,mBAAmB;AAC5D,aAAO,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IAC5C;AAAA,IACA,KAAK,UAAU;AAEb,YAAM,EAAE,yBAAyB,IAAI,MAAM,OAAO,gBAAgB;AAClE,aAAO,yBAAyB,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IACrD;AAAA,IACA;AACE,YAAM,IAAI;AAAA,QACR,qBAAqB,QAAQ;AAAA,MAC/B;AAAA,EACJ;AACF;AAQA,IAAM,mBAA2C;AAAA,EAC/C,KAAK;AAAA,EACL,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,KAAK;AAAA,EACL,GAAG;AAAA,EACH,KAAK;AAAA,EACL,GAAG;AAAA,EACH,KAAK;AACP;AASA,SAAS,aAAa,UAAoB,gBAAiC;AACzE,MAAI,WAAW,iBAAiB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,cAAc,IAAI;AAEtF,MAAI,SAAS,WAAW,GAAG;AACzB,eAAW;AAAA,EACb;AACA,MAAI,SAAS,WAAW,EAAG,QAAO;AAGlC,QAAM,YAAY,oBAAI,IAAoB;AAC1C,aAAW,KAAK,UAAU;AACxB,cAAU,IAAI,EAAE,SAAS,UAAU,IAAI,EAAE,MAAM,KAAK,KAAK,CAAC;AAAA,EAC5D;AACA,MAAI,UAAU;AACd,MAAI,YAAY;AAChB,aAAW,CAAC,KAAK,KAAK,KAAK,WAAW;AACpC,QAAI,QAAQ,WAAW;AACrB,gBAAU;AACV,kBAAY;AAAA,IACd;AAAA,EACF;AAEA,QAAM,kBAAkB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO;AACnE,MAAI,gBAAgB,WAAW,EAAG,QAAO,SAAS,CAAC;AAGnD,QAAM,SAAmB,CAAC;AAC1B,WAAS,MAAM,GAAG,MAAM,SAAS,OAAO;AACtC,UAAM,aAAa,oBAAI,IAAoB;AAC3C,eAAW,KAAK,iBAAiB;AAC/B,YAAM,KAAK,EAAE,GAAG;AAChB,iBAAW,IAAI,KAAK,WAAW,IAAI,EAAE,KAAK,KAAK,CAAC;AAAA,IAClD;AAEA,UAAM,cAAc,oBAAI,IAAoB;AAC5C,eAAW,CAAC,IAAI,KAAK,KAAK,YAAY;AACpC,YAAM,YAAY,iBAAiB,EAAE,KAAK;AAC1C,kBAAY,IAAI,YAAY,YAAY,IAAI,SAAS,KAAK,KAAK,KAAK;AAAA,IACtE;AAEA,QAAI,YAAY;AAChB,QAAI,iBAAiB;AACrB,eAAW,CAAC,WAAW,KAAK,KAAK,aAAa;AAC5C,UAAI,QAAQ,gBAAgB;AAC1B,oBAAY;AACZ,yBAAiB;AAAA,MACnB;AAAA,IACF;AAEA,WAAO,KAAK,SAAS;AAAA,EACvB;AAEA,SAAO,OAAO,KAAK,EAAE;AACvB;AAIO,IAAM,SAAN,MAAa;AAAA,EACV,SAA+B;AAAA,EAC/B,gBAA+C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmBvD,YAAY,YAAoC,SAAyB;AACvE,QAAI,OAAO,eAAe,UAAU;AAClC,YAAM,WAAW,SAAS,YAAY;AACtC,YAAM,UAAU,SAAS,SAAS,eAAe,QAAQ;AAEzD,WAAK,gBAAgB,aAAa,YAAY,UAAU,OAAO;AAAA,IACjE,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAc,WAAmC;AAC/C,QAAI,KAAK,OAAQ,QAAO,KAAK;AAC7B,SAAK,SAAS,MAAM,KAAK;AACzB,SAAK,gBAAgB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MAAM,OAAwB,UAAwB,CAAC,GAAoB;AAC/E,UAAM,EAAE,cAAc,GAAG,gBAAgB,aAAa,GAAG,UAAU,KAAK,IAAI;AAE5E,UAAM,QAAQ,MAAM,KAAK,SAAS;AAClC,UAAM,cAAc,MAAM,0BAA0B,KAAK;AAGzD,UAAM,WAAqB,CAAC;AAC5B,UAAM,gBAAgB,cAAc;AACpC,QAAI,YAAY;AAEhB,WAAO,SAAS,SAAS,eAAe,YAAY,eAAe;AACjE;AACA,YAAM,SAAS,MAAM,KAAK,cAAc,OAAO,aAAa,UAAU;AACtE,UAAI,QAAQ;AACV,iBAAS,KAAK,MAAM;AACpB,YAAI,QAAS,SAAQ,IAAI,aAAa,SAAS,MAAM,KAAK,MAAM,EAAE;AAAA,MACpE,OAAO;AACL,YAAI,QAAS,SAAQ,IAAI,UAAU,SAAS,iCAAiC;AAAA,MAC/E;AAAA,IACF;AAEA,QAAI,SAAS,WAAW,GAAG;AACzB,UAAI,QAAS,SAAQ,IAAI,wBAAwB;AACjD,aAAO;AAAA,IACT;AAEA,WAAO,aAAa,UAAU,cAAc;AAAA,EAC9C;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAc,cACZ,OACA,aACA,YACwB;AACxB,aAAS,QAAQ,GAAG,SAAS,YAAY,SAAS;AAChD,UAAI;AACF,cAAM,EAAE,KAAK,IAAI,UAAM,wBAAa;AAAA,UAClC;AAAA,UACA,UAAU;AAAA,YACR;AAAA,cACE,MAAM;AAAA,cACN,SAAS;AAAA,gBACP,EAAE,MAAM,QAAQ,MAAM,OAAO;AAAA,gBAC7B,EAAE,MAAM,SAAS,OAAO,YAAY;AAAA,cACtC;AAAA,YACF;AAAA,UACF;AAAA,UACA,aAAa;AAAA,UACb,iBAAiB;AAAA,QACnB,CAAC;AAED,cAAM,MAAM,KAAK,KAAK;AAGtB,cAAM,QAAQ,IAAI,YAAY;AAC9B,YACE,MAAM,SAAS,OAAO,KACtB,MAAM,SAAS,YAAY,KAC3B,MAAM,SAAS,aAAa,KAC5B,MAAM,SAAS,WAAW,KAC1B,MAAM,SAAS,SAAS,KACxB,IAAI,SAAS,IACb;AACA,iBAAO;AAAA,QACT;AAGA,cAAM,UAAU,IAAI,YAAY,EAAE,QAAQ,cAAc,EAAE;AAC1D,eAAO,WAAW;AAAA,MACpB,SAAS,MAAM;AACb,YAAI,QAAQ,YAAY;AACtB,gBAAM,IAAI,QAAQ,CAAC,MAAM,WAAW,GAAG,OAAQ,QAAQ,EAAE,CAAC;AAC1D;AAAA,QACF;AACA,eAAO;AAAA,MACT;AAAA,IACF;AACA,WAAO;AAAA,EACT;AACF;","names":["path","sharp","fs"]}
|
package/dist/index.d.cts
CHANGED
|
@@ -1,19 +1,58 @@
|
|
|
1
|
+
import { LanguageModel } from 'ai';
|
|
2
|
+
|
|
3
|
+
type Provider = 'openai' | 'anthropic' | 'google';
|
|
1
4
|
interface SolverOptions {
|
|
2
|
-
/**
|
|
5
|
+
/** AI provider to use when constructing the model from an API key (default: "openai") */
|
|
6
|
+
provider?: Provider;
|
|
7
|
+
/** Model ID passed to the provider (default: "gpt-4o") */
|
|
3
8
|
model?: string;
|
|
9
|
+
}
|
|
10
|
+
interface SolveOptions {
|
|
4
11
|
/** Number of voting attempts (default: 5) */
|
|
5
12
|
numAttempts?: number;
|
|
6
|
-
/** Expected captcha length — results of other lengths are discarded
|
|
13
|
+
/** Expected captcha length — results of other lengths are discarded */
|
|
7
14
|
expectedLength?: number;
|
|
8
15
|
/** Max retries per attempt on API failure (default: 2) */
|
|
9
16
|
maxRetries?: number;
|
|
10
17
|
/** Whether to log attempt details (default: true) */
|
|
11
18
|
verbose?: boolean;
|
|
12
19
|
}
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
20
|
+
declare class Solver {
|
|
21
|
+
private _model;
|
|
22
|
+
private _pendingModel;
|
|
23
|
+
/**
|
|
24
|
+
* Create a captcha solver.
|
|
25
|
+
*
|
|
26
|
+
* @example
|
|
27
|
+
* // Simple — defaults to OpenAI gpt-4o
|
|
28
|
+
* const solver = new Solver('sk-...');
|
|
29
|
+
*
|
|
30
|
+
* @example
|
|
31
|
+
* // Specify provider and model
|
|
32
|
+
* const solver = new Solver('sk-ant-...', { provider: 'anthropic', model: 'claude-sonnet-4-20250514' });
|
|
33
|
+
*
|
|
34
|
+
* @example
|
|
35
|
+
* // Pass an AI SDK model directly
|
|
36
|
+
* import { createOpenAI } from '@ai-sdk/openai';
|
|
37
|
+
* const openai = createOpenAI({ apiKey: 'sk-...' });
|
|
38
|
+
* const solver = new Solver(openai('gpt-4o'));
|
|
39
|
+
*/
|
|
40
|
+
constructor(keyOrModel: string | LanguageModel, options?: SolverOptions);
|
|
41
|
+
private getModel;
|
|
42
|
+
/**
|
|
43
|
+
* Solve a captcha image.
|
|
44
|
+
*
|
|
45
|
+
* @param input - File path (string) or raw image Buffer
|
|
46
|
+
* @param options - Solve options (attempts, expected length, etc.)
|
|
47
|
+
* @returns The captcha text
|
|
48
|
+
*/
|
|
49
|
+
solve(input: string | Buffer, options?: SolveOptions): Promise<string>;
|
|
50
|
+
/**
|
|
51
|
+
* Make a single API call to read the captcha.
|
|
52
|
+
* Retries up to `maxRetries` times on failure.
|
|
53
|
+
*/
|
|
54
|
+
private singleAttempt;
|
|
55
|
+
}
|
|
17
56
|
|
|
18
57
|
/**
|
|
19
58
|
* Preprocess a captcha image using sharp (libvips).
|
|
@@ -26,12 +65,18 @@ declare function solveCaptchaImage(imagePath: string, options?: SolverOptions):
|
|
|
26
65
|
* 5. Crop decorative borders
|
|
27
66
|
* 6. Add white padding
|
|
28
67
|
*
|
|
68
|
+
* Accepts a file path or a raw image Buffer.
|
|
29
69
|
* Returns a base64-encoded PNG string.
|
|
30
70
|
*/
|
|
31
|
-
declare function preprocessCaptcha(
|
|
71
|
+
declare function preprocessCaptcha(input: string | Buffer): Promise<string>;
|
|
72
|
+
/**
|
|
73
|
+
* Same preprocessing pipeline as `preprocessCaptcha`, but returns the
|
|
74
|
+
* resulting PNG as a raw Buffer (useful for AI SDK image content parts).
|
|
75
|
+
*/
|
|
76
|
+
declare function preprocessCaptchaToBuffer(input: string | Buffer): Promise<Buffer>;
|
|
32
77
|
/**
|
|
33
78
|
* Read an image file and return its base64-encoded content.
|
|
34
79
|
*/
|
|
35
80
|
declare function imageToBase64(imagePath: string): string;
|
|
36
81
|
|
|
37
|
-
export { imageToBase64, preprocessCaptcha,
|
|
82
|
+
export { type Provider, type SolveOptions, Solver, type SolverOptions, imageToBase64, preprocessCaptcha, preprocessCaptchaToBuffer };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,19 +1,58 @@
|
|
|
1
|
+
import { LanguageModel } from 'ai';
|
|
2
|
+
|
|
3
|
+
type Provider = 'openai' | 'anthropic' | 'google';
|
|
1
4
|
interface SolverOptions {
|
|
2
|
-
/**
|
|
5
|
+
/** AI provider to use when constructing the model from an API key (default: "openai") */
|
|
6
|
+
provider?: Provider;
|
|
7
|
+
/** Model ID passed to the provider (default: "gpt-4o") */
|
|
3
8
|
model?: string;
|
|
9
|
+
}
|
|
10
|
+
interface SolveOptions {
|
|
4
11
|
/** Number of voting attempts (default: 5) */
|
|
5
12
|
numAttempts?: number;
|
|
6
|
-
/** Expected captcha length — results of other lengths are discarded
|
|
13
|
+
/** Expected captcha length — results of other lengths are discarded */
|
|
7
14
|
expectedLength?: number;
|
|
8
15
|
/** Max retries per attempt on API failure (default: 2) */
|
|
9
16
|
maxRetries?: number;
|
|
10
17
|
/** Whether to log attempt details (default: true) */
|
|
11
18
|
verbose?: boolean;
|
|
12
19
|
}
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
20
|
+
declare class Solver {
|
|
21
|
+
private _model;
|
|
22
|
+
private _pendingModel;
|
|
23
|
+
/**
|
|
24
|
+
* Create a captcha solver.
|
|
25
|
+
*
|
|
26
|
+
* @example
|
|
27
|
+
* // Simple — defaults to OpenAI gpt-4o
|
|
28
|
+
* const solver = new Solver('sk-...');
|
|
29
|
+
*
|
|
30
|
+
* @example
|
|
31
|
+
* // Specify provider and model
|
|
32
|
+
* const solver = new Solver('sk-ant-...', { provider: 'anthropic', model: 'claude-sonnet-4-20250514' });
|
|
33
|
+
*
|
|
34
|
+
* @example
|
|
35
|
+
* // Pass an AI SDK model directly
|
|
36
|
+
* import { createOpenAI } from '@ai-sdk/openai';
|
|
37
|
+
* const openai = createOpenAI({ apiKey: 'sk-...' });
|
|
38
|
+
* const solver = new Solver(openai('gpt-4o'));
|
|
39
|
+
*/
|
|
40
|
+
constructor(keyOrModel: string | LanguageModel, options?: SolverOptions);
|
|
41
|
+
private getModel;
|
|
42
|
+
/**
|
|
43
|
+
* Solve a captcha image.
|
|
44
|
+
*
|
|
45
|
+
* @param input - File path (string) or raw image Buffer
|
|
46
|
+
* @param options - Solve options (attempts, expected length, etc.)
|
|
47
|
+
* @returns The captcha text
|
|
48
|
+
*/
|
|
49
|
+
solve(input: string | Buffer, options?: SolveOptions): Promise<string>;
|
|
50
|
+
/**
|
|
51
|
+
* Make a single API call to read the captcha.
|
|
52
|
+
* Retries up to `maxRetries` times on failure.
|
|
53
|
+
*/
|
|
54
|
+
private singleAttempt;
|
|
55
|
+
}
|
|
17
56
|
|
|
18
57
|
/**
|
|
19
58
|
* Preprocess a captcha image using sharp (libvips).
|
|
@@ -26,12 +65,18 @@ declare function solveCaptchaImage(imagePath: string, options?: SolverOptions):
|
|
|
26
65
|
* 5. Crop decorative borders
|
|
27
66
|
* 6. Add white padding
|
|
28
67
|
*
|
|
68
|
+
* Accepts a file path or a raw image Buffer.
|
|
29
69
|
* Returns a base64-encoded PNG string.
|
|
30
70
|
*/
|
|
31
|
-
declare function preprocessCaptcha(
|
|
71
|
+
declare function preprocessCaptcha(input: string | Buffer): Promise<string>;
|
|
72
|
+
/**
|
|
73
|
+
* Same preprocessing pipeline as `preprocessCaptcha`, but returns the
|
|
74
|
+
* resulting PNG as a raw Buffer (useful for AI SDK image content parts).
|
|
75
|
+
*/
|
|
76
|
+
declare function preprocessCaptchaToBuffer(input: string | Buffer): Promise<Buffer>;
|
|
32
77
|
/**
|
|
33
78
|
* Read an image file and return its base64-encoded content.
|
|
34
79
|
*/
|
|
35
80
|
declare function imageToBase64(imagePath: string): string;
|
|
36
81
|
|
|
37
|
-
export { imageToBase64, preprocessCaptcha,
|
|
82
|
+
export { type Provider, type SolveOptions, Solver, type SolverOptions, imageToBase64, preprocessCaptcha, preprocessCaptchaToBuffer };
|
package/dist/index.js
CHANGED
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
// src/solver.ts
|
|
2
|
-
import
|
|
2
|
+
import { generateText } from "ai";
|
|
3
3
|
|
|
4
4
|
// src/preprocess.ts
|
|
5
5
|
import fs from "fs";
|
|
6
6
|
import path from "path";
|
|
7
7
|
import sharp from "sharp";
|
|
8
|
-
async function preprocessCaptcha(
|
|
9
|
-
const
|
|
10
|
-
|
|
8
|
+
async function preprocessCaptcha(input) {
|
|
9
|
+
const buf = await preprocessCaptchaToBuffer(input);
|
|
10
|
+
return buf.toString("base64");
|
|
11
|
+
}
|
|
12
|
+
async function preprocessCaptchaToBuffer(input) {
|
|
13
|
+
const source = typeof input === "string" ? path.resolve(input) : input;
|
|
14
|
+
const metadata = await sharp(source).metadata();
|
|
11
15
|
const origW = metadata.width;
|
|
12
16
|
const origH = metadata.height;
|
|
13
|
-
const smoothed = await sharp(
|
|
17
|
+
const smoothed = await sharp(source).blur(1.5).greyscale().toBuffer();
|
|
14
18
|
const upscaled = await sharp(smoothed).resize(origW * 4, origH * 4, { kernel: "lanczos3" }).toBuffer();
|
|
15
19
|
const stats = await sharp(upscaled).stats();
|
|
16
20
|
const mean = stats.channels[0].mean;
|
|
@@ -23,14 +27,13 @@ async function preprocessCaptcha(imagePath) {
|
|
|
23
27
|
const cropBottom = Math.floor(scaledH * 0.6);
|
|
24
28
|
const cropW = cropRight - cropLeft;
|
|
25
29
|
const cropH = cropBottom - cropTop;
|
|
26
|
-
|
|
30
|
+
return sharp(enhanced).extract({ left: cropLeft, top: cropTop, width: cropW, height: cropH }).extend({
|
|
27
31
|
top: 20,
|
|
28
32
|
bottom: 20,
|
|
29
33
|
left: 30,
|
|
30
34
|
right: 30,
|
|
31
35
|
background: { r: 255, g: 255, b: 255 }
|
|
32
36
|
}).png().toBuffer();
|
|
33
|
-
return result.toString("base64");
|
|
34
37
|
}
|
|
35
38
|
function imageToBase64(imagePath) {
|
|
36
39
|
const buffer = fs.readFileSync(imagePath);
|
|
@@ -43,60 +46,40 @@ The text contains uppercase letters A-Z and/or digits 0-9.
|
|
|
43
46
|
A thin vertical stroke is the digit 1. Never read it as the letter I or L.
|
|
44
47
|
A round closed shape is the letter O, not the letter D.
|
|
45
48
|
Output ONLY the exact characters you read, nothing else.`;
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
content: [
|
|
57
|
-
{ type: "text", text: PROMPT },
|
|
58
|
-
{
|
|
59
|
-
type: "image_url",
|
|
60
|
-
image_url: {
|
|
61
|
-
url: `data:image/png;base64,${base64Image}`
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
]
|
|
65
|
-
}
|
|
66
|
-
],
|
|
67
|
-
temperature: 1,
|
|
68
|
-
...tokenParam
|
|
69
|
-
});
|
|
70
|
-
const raw = response.choices[0]?.message?.content?.trim() ?? "";
|
|
71
|
-
const lower = raw.toLowerCase();
|
|
72
|
-
if (lower.includes("sorry") || lower.includes("can't help") || lower.includes("cannot help") || lower.includes("unable to") || lower.includes("i can't") || raw.length > 20) {
|
|
73
|
-
return null;
|
|
74
|
-
}
|
|
75
|
-
const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, "");
|
|
76
|
-
return cleaned || null;
|
|
77
|
-
} catch (_err) {
|
|
78
|
-
if (retry < maxRetries) {
|
|
79
|
-
await new Promise((r) => setTimeout(r, 1e3 * (retry + 1)));
|
|
80
|
-
continue;
|
|
81
|
-
}
|
|
82
|
-
return null;
|
|
49
|
+
var DEFAULT_MODELS = {
|
|
50
|
+
openai: "gpt-4o",
|
|
51
|
+
anthropic: "claude-sonnet-4-20250514",
|
|
52
|
+
google: "gemini-2.0-flash"
|
|
53
|
+
};
|
|
54
|
+
async function resolveModel(apiKey, provider, modelId) {
|
|
55
|
+
switch (provider) {
|
|
56
|
+
case "openai": {
|
|
57
|
+
const { createOpenAI } = await import("@ai-sdk/openai");
|
|
58
|
+
return createOpenAI({ apiKey })(modelId);
|
|
83
59
|
}
|
|
60
|
+
case "anthropic": {
|
|
61
|
+
const { createAnthropic } = await import("@ai-sdk/anthropic");
|
|
62
|
+
return createAnthropic({ apiKey })(modelId);
|
|
63
|
+
}
|
|
64
|
+
case "google": {
|
|
65
|
+
const { createGoogleGenerativeAI } = await import("@ai-sdk/google");
|
|
66
|
+
return createGoogleGenerativeAI({ apiKey })(modelId);
|
|
67
|
+
}
|
|
68
|
+
default:
|
|
69
|
+
throw new Error(
|
|
70
|
+
`Unknown provider "${provider}". Install the matching @ai-sdk/* package and pass the model directly.`
|
|
71
|
+
);
|
|
84
72
|
}
|
|
85
|
-
return null;
|
|
86
73
|
}
|
|
87
74
|
var CONFUSION_GROUPS = {
|
|
88
|
-
// Thin vertical strokes → digit 1
|
|
89
75
|
"1": "1",
|
|
90
76
|
I: "1",
|
|
91
77
|
L: "1",
|
|
92
|
-
// Round shapes → letter O
|
|
93
78
|
O: "O",
|
|
94
79
|
D: "O",
|
|
95
80
|
"0": "O",
|
|
96
|
-
// Similar curves
|
|
97
81
|
S: "S",
|
|
98
82
|
"5": "S",
|
|
99
|
-
// Straight edges
|
|
100
83
|
Z: "Z",
|
|
101
84
|
"2": "Z"
|
|
102
85
|
};
|
|
@@ -144,33 +127,114 @@ function majorityVote(attempts, expectedLength) {
|
|
|
144
127
|
}
|
|
145
128
|
return result.join("");
|
|
146
129
|
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
130
|
+
var Solver = class {
|
|
131
|
+
_model = null;
|
|
132
|
+
_pendingModel = null;
|
|
133
|
+
/**
|
|
134
|
+
* Create a captcha solver.
|
|
135
|
+
*
|
|
136
|
+
* @example
|
|
137
|
+
* // Simple — defaults to OpenAI gpt-4o
|
|
138
|
+
* const solver = new Solver('sk-...');
|
|
139
|
+
*
|
|
140
|
+
* @example
|
|
141
|
+
* // Specify provider and model
|
|
142
|
+
* const solver = new Solver('sk-ant-...', { provider: 'anthropic', model: 'claude-sonnet-4-20250514' });
|
|
143
|
+
*
|
|
144
|
+
* @example
|
|
145
|
+
* // Pass an AI SDK model directly
|
|
146
|
+
* import { createOpenAI } from '@ai-sdk/openai';
|
|
147
|
+
* const openai = createOpenAI({ apiKey: 'sk-...' });
|
|
148
|
+
* const solver = new Solver(openai('gpt-4o'));
|
|
149
|
+
*/
|
|
150
|
+
constructor(keyOrModel, options) {
|
|
151
|
+
if (typeof keyOrModel === "string") {
|
|
152
|
+
const provider = options?.provider ?? "openai";
|
|
153
|
+
const modelId = options?.model ?? DEFAULT_MODELS[provider];
|
|
154
|
+
this._pendingModel = resolveModel(keyOrModel, provider, modelId);
|
|
160
155
|
} else {
|
|
161
|
-
|
|
156
|
+
this._model = keyOrModel;
|
|
162
157
|
}
|
|
163
158
|
}
|
|
164
|
-
|
|
165
|
-
if (
|
|
166
|
-
|
|
159
|
+
async getModel() {
|
|
160
|
+
if (this._model) return this._model;
|
|
161
|
+
this._model = await this._pendingModel;
|
|
162
|
+
this._pendingModel = null;
|
|
163
|
+
return this._model;
|
|
167
164
|
}
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
165
|
+
/**
|
|
166
|
+
* Solve a captcha image.
|
|
167
|
+
*
|
|
168
|
+
* @param input - File path (string) or raw image Buffer
|
|
169
|
+
* @param options - Solve options (attempts, expected length, etc.)
|
|
170
|
+
* @returns The captcha text
|
|
171
|
+
*/
|
|
172
|
+
async solve(input, options = {}) {
|
|
173
|
+
const { numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;
|
|
174
|
+
const model = await this.getModel();
|
|
175
|
+
const imageBuffer = await preprocessCaptchaToBuffer(input);
|
|
176
|
+
const attempts = [];
|
|
177
|
+
const maxTotalCalls = numAttempts + 4;
|
|
178
|
+
let callCount = 0;
|
|
179
|
+
while (attempts.length < numAttempts && callCount < maxTotalCalls) {
|
|
180
|
+
callCount++;
|
|
181
|
+
const result = await this.singleAttempt(model, imageBuffer, maxRetries);
|
|
182
|
+
if (result) {
|
|
183
|
+
attempts.push(result);
|
|
184
|
+
if (verbose) console.log(` Attempt ${attempts.length}: ${result}`);
|
|
185
|
+
} else {
|
|
186
|
+
if (verbose) console.log(` Call ${callCount}: (refused/failed, retrying...)`);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
if (attempts.length === 0) {
|
|
190
|
+
if (verbose) console.log(" All attempts failed!");
|
|
191
|
+
return "";
|
|
192
|
+
}
|
|
193
|
+
return majorityVote(attempts, expectedLength);
|
|
194
|
+
}
|
|
195
|
+
/**
|
|
196
|
+
* Make a single API call to read the captcha.
|
|
197
|
+
* Retries up to `maxRetries` times on failure.
|
|
198
|
+
*/
|
|
199
|
+
async singleAttempt(model, imageBuffer, maxRetries) {
|
|
200
|
+
for (let retry = 0; retry <= maxRetries; retry++) {
|
|
201
|
+
try {
|
|
202
|
+
const { text } = await generateText({
|
|
203
|
+
model,
|
|
204
|
+
messages: [
|
|
205
|
+
{
|
|
206
|
+
role: "user",
|
|
207
|
+
content: [
|
|
208
|
+
{ type: "text", text: PROMPT },
|
|
209
|
+
{ type: "image", image: imageBuffer }
|
|
210
|
+
]
|
|
211
|
+
}
|
|
212
|
+
],
|
|
213
|
+
temperature: 1,
|
|
214
|
+
maxOutputTokens: 256
|
|
215
|
+
});
|
|
216
|
+
const raw = text.trim();
|
|
217
|
+
const lower = raw.toLowerCase();
|
|
218
|
+
if (lower.includes("sorry") || lower.includes("can't help") || lower.includes("cannot help") || lower.includes("unable to") || lower.includes("i can't") || raw.length > 20) {
|
|
219
|
+
return null;
|
|
220
|
+
}
|
|
221
|
+
const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, "");
|
|
222
|
+
return cleaned || null;
|
|
223
|
+
} catch (_err) {
|
|
224
|
+
if (retry < maxRetries) {
|
|
225
|
+
await new Promise((r) => setTimeout(r, 1e3 * (retry + 1)));
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
return null;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
return null;
|
|
232
|
+
}
|
|
233
|
+
};
|
|
171
234
|
export {
|
|
235
|
+
Solver,
|
|
172
236
|
imageToBase64,
|
|
173
237
|
preprocessCaptcha,
|
|
174
|
-
|
|
238
|
+
preprocessCaptchaToBuffer
|
|
175
239
|
};
|
|
176
240
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/solver.ts","../src/preprocess.ts"],"sourcesContent":["import OpenAI from 'openai';\nimport { preprocessCaptcha } from './preprocess.js';\n\nconst PROMPT = `You are an assistant helping a visually impaired person read distorted text from an image.\nThe text contains uppercase letters A-Z and/or digits 0-9.\nA thin vertical stroke is the digit 1. Never read it as the letter I or L.\nA round closed shape is the letter O, not the letter D.\nOutput ONLY the exact characters you read, nothing else.`;\n\ninterface SolverOptions {\n /** OpenAI model to use (default: \"o3\") */\n model?: string;\n /** Number of voting attempts (default: 5) */\n numAttempts?: number;\n /** Expected captcha length — results of other lengths are discarded (default: undefined = no filter) */\n expectedLength?: number;\n /** Max retries per attempt on API failure (default: 2) */\n maxRetries?: number;\n /** Whether to log attempt details (default: true) */\n verbose?: boolean;\n}\n\n/**\n * Make a single API call to read the captcha.\n * Retries up to `maxRetries` times on failure.\n */\nasync function singleAttempt(\n client: OpenAI,\n base64Image: string,\n model: string,\n maxRetries: number\n): Promise<string | null> {\n for (let retry = 0; retry <= maxRetries; retry++) {\n try {\n // Reasoning models (o3, o4-mini) use max_completion_tokens;\n // Standard models (gpt-4o, gpt-4.1, gpt-5.4-mini) use max_tokens.\n const isReasoningModel = model.startsWith('o');\n const tokenParam = isReasoningModel ? { max_completion_tokens: 2000 } : { max_tokens: 256 };\n\n const response = await client.chat.completions.create({\n model,\n messages: [\n {\n role: 'user',\n content: [\n { type: 'text', text: PROMPT },\n {\n type: 'image_url',\n image_url: {\n url: `data:image/png;base64,${base64Image}`,\n },\n },\n ],\n },\n ],\n temperature: 1,\n ...tokenParam,\n });\n\n const raw = response.choices[0]?.message?.content?.trim() ?? '';\n\n // Detect refusals\n const lower = raw.toLowerCase();\n if (\n lower.includes('sorry') ||\n lower.includes(\"can't help\") ||\n lower.includes('cannot help') ||\n lower.includes('unable to') ||\n lower.includes(\"i can't\") ||\n raw.length > 20\n ) {\n return null; // Model refused — don't count as an attempt\n }\n\n // Clean: keep only uppercase letters and digits\n const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, '');\n return cleaned || null;\n } catch (_err) {\n if (retry < maxRetries) {\n // Wait briefly before retry\n await new Promise((r) => setTimeout(r, 1000 * (retry + 1)));\n continue;\n }\n return null;\n }\n }\n return null;\n}\n\n/**\n * Confusion groups: characters the model commonly misreads as each other.\n * Each group maps to its canonical (most likely correct) character.\n */\nconst CONFUSION_GROUPS: Record<string, string> = {\n // Thin vertical strokes → digit 1\n '1': '1',\n I: '1',\n L: '1',\n // Round shapes → letter O\n O: 'O',\n D: 'O',\n '0': 'O',\n // Similar curves\n S: 'S',\n '5': 'S',\n // Straight edges\n Z: 'Z',\n '2': 'Z',\n};\n\n/**\n * Character-level majority vote across multiple attempts.\n *\n * Uses confusion-aware voting: characters that the model commonly\n * confuses (e.g. 1/I/L, O/D/0) are grouped together during counting.\n * The canonical character for the winning group is used.\n */\nfunction majorityVote(attempts: string[], expectedLength?: number): string {\n // Filter to expected length if specified\n let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;\n\n // If length filter removed everything, fall back to most common length\n if (filtered.length === 0) {\n filtered = attempts;\n }\n\n if (filtered.length === 0) return '';\n\n // Find most common length\n const lenCounts = new Map<number, number>();\n for (const a of filtered) {\n lenCounts.set(a.length, (lenCounts.get(a.length) ?? 0) + 1);\n }\n let bestLen = 0;\n let bestCount = 0;\n for (const [len, count] of lenCounts) {\n if (count > bestCount) {\n bestLen = len;\n bestCount = count;\n }\n }\n\n const sameLenAttempts = filtered.filter((a) => a.length === bestLen);\n if (sameLenAttempts.length === 0) return filtered[0];\n\n // Vote per character position with confusion-aware grouping\n const result: string[] = [];\n for (let pos = 0; pos < bestLen; pos++) {\n // Count raw characters\n const charCounts = new Map<string, number>();\n for (const a of sameLenAttempts) {\n const ch = a[pos];\n charCounts.set(ch, (charCounts.get(ch) ?? 0) + 1);\n }\n\n // Group by canonical form and sum counts\n const groupCounts = new Map<string, number>();\n for (const [ch, count] of charCounts) {\n const canonical = CONFUSION_GROUPS[ch] ?? ch;\n groupCounts.set(canonical, (groupCounts.get(canonical) ?? 0) + count);\n }\n\n // Pick the group with the highest combined count\n let bestGroup = '';\n let bestGroupCount = 0;\n for (const [canonical, count] of groupCounts) {\n if (count > bestGroupCount) {\n bestGroup = canonical;\n bestGroupCount = count;\n }\n }\n\n result.push(bestGroup);\n }\n\n return result.join('');\n}\n\n/**\n * Solve a captcha image using OpenAI vision + preprocessing + majority voting.\n */\nexport async function solveCaptchaImage(\n imagePath: string,\n options: SolverOptions = {}\n): Promise<string> {\n const { model = 'o3', numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;\n\n const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });\n\n // Preprocess the image\n const base64Processed = await preprocessCaptcha(imagePath);\n\n // Run attempts — retry refusals/failures to guarantee numAttempts valid results\n const attempts: string[] = [];\n const maxTotalCalls = numAttempts + 4; // allow up to 4 extra calls for refusals\n let callCount = 0;\n while (attempts.length < numAttempts && callCount < maxTotalCalls) {\n callCount++;\n const result = await singleAttempt(client, base64Processed, model, maxRetries);\n if (result) {\n attempts.push(result);\n if (verbose) console.log(` Attempt ${attempts.length}: ${result}`);\n } else {\n if (verbose) console.log(` Call ${callCount}: (refused/failed, retrying...)`);\n }\n }\n\n if (attempts.length === 0) {\n if (verbose) console.log(' All attempts failed!');\n return '';\n }\n\n // Majority vote\n const answer = majorityVote(attempts, expectedLength);\n return answer;\n}\n","import fs from 'fs';\nimport path from 'path';\nimport sharp from 'sharp';\n\n/**\n * Preprocess a captcha image using sharp (libvips).\n *\n * Pipeline:\n * 1. Gaussian blur in color space (smooths dither pattern)\n * 2. Grayscale conversion\n * 3. Upscale 4× with Lanczos\n * 4. Contrast boost (3× around image mean) + sharpen\n * 5. Crop decorative borders\n * 6. Add white padding\n *\n * Returns a base64-encoded PNG string.\n */\nexport async function preprocessCaptcha(imagePath: string): Promise<string> {\n const absPath = path.resolve(imagePath);\n\n // Read original dimensions for crop/resize calculations\n const metadata = await sharp(absPath).metadata();\n const origW = metadata.width!;\n const origH = metadata.height!;\n\n // Step 1-2: Blur in color space (smooths dither pattern) → greyscale\n // Separate from resize to prevent pipeline reordering\n const smoothed = await sharp(absPath).blur(1.5).greyscale().toBuffer();\n\n // Step 3: Upscale 4× with Lanczos\n const upscaled = await sharp(smoothed)\n .resize(origW * 4, origH * 4, { kernel: 'lanczos3' })\n .toBuffer();\n\n // Step 4: Contrast 3× around actual image mean + sharpen\n // Matches PIL's ImageEnhance.Contrast: output = factor*input + mean*(1-factor)\n const stats = await sharp(upscaled).stats();\n const mean = stats.channels[0].mean;\n const enhanced = await sharp(upscaled)\n .linear(3.0, mean * (1 - 3.0))\n .sharpen({ sigma: 1.0, m1: 2.0, m2: 1.0 })\n .toBuffer();\n\n // Step 5: Crop decorative borders\n // Remove 10% left/right, 2% top, 40% bottom (keep top 60%)\n // Math.floor matches Python's int() truncation\n const scaledW = origW * 4;\n const scaledH = origH * 4;\n const cropLeft = Math.floor(scaledW * 0.1);\n const cropTop = Math.floor(scaledH * 0.02);\n const cropRight = Math.floor(scaledW * 0.9);\n const cropBottom = Math.floor(scaledH * 0.6);\n const cropW = cropRight - cropLeft;\n const cropH = cropBottom - cropTop;\n\n // Step 5-6: Crop → add white padding → output PNG\n const result = await sharp(enhanced)\n .extract({ left: cropLeft, top: cropTop, width: cropW, height: cropH })\n .extend({\n top: 20,\n bottom: 20,\n left: 30,\n right: 30,\n background: { r: 255, g: 255, b: 255 },\n })\n .png()\n .toBuffer();\n\n return result.toString('base64');\n}\n\n/**\n * Read an image file and return its base64-encoded content.\n */\nexport function imageToBase64(imagePath: string): string {\n const buffer = fs.readFileSync(imagePath);\n return buffer.toString('base64');\n}\n"],"mappings":";AAAA,OAAO,YAAY;;;ACAnB,OAAO,QAAQ;AACf,OAAO,UAAU;AACjB,OAAO,WAAW;AAelB,eAAsB,kBAAkB,WAAoC;AAC1E,QAAM,UAAU,KAAK,QAAQ,SAAS;AAGtC,QAAM,WAAW,MAAM,MAAM,OAAO,EAAE,SAAS;AAC/C,QAAM,QAAQ,SAAS;AACvB,QAAM,QAAQ,SAAS;AAIvB,QAAM,WAAW,MAAM,MAAM,OAAO,EAAE,KAAK,GAAG,EAAE,UAAU,EAAE,SAAS;AAGrE,QAAM,WAAW,MAAM,MAAM,QAAQ,EAClC,OAAO,QAAQ,GAAG,QAAQ,GAAG,EAAE,QAAQ,WAAW,CAAC,EACnD,SAAS;AAIZ,QAAM,QAAQ,MAAM,MAAM,QAAQ,EAAE,MAAM;AAC1C,QAAM,OAAO,MAAM,SAAS,CAAC,EAAE;AAC/B,QAAM,WAAW,MAAM,MAAM,QAAQ,EAClC,OAAO,GAAK,QAAQ,IAAI,EAAI,EAC5B,QAAQ,EAAE,OAAO,GAAK,IAAI,GAAK,IAAI,EAAI,CAAC,EACxC,SAAS;AAKZ,QAAM,UAAU,QAAQ;AACxB,QAAM,UAAU,QAAQ;AACxB,QAAM,WAAW,KAAK,MAAM,UAAU,GAAG;AACzC,QAAM,UAAU,KAAK,MAAM,UAAU,IAAI;AACzC,QAAM,YAAY,KAAK,MAAM,UAAU,GAAG;AAC1C,QAAM,aAAa,KAAK,MAAM,UAAU,GAAG;AAC3C,QAAM,QAAQ,YAAY;AAC1B,QAAM,QAAQ,aAAa;AAG3B,QAAM,SAAS,MAAM,MAAM,QAAQ,EAChC,QAAQ,EAAE,MAAM,UAAU,KAAK,SAAS,OAAO,OAAO,QAAQ,MAAM,CAAC,EACrE,OAAO;AAAA,IACN,KAAK;AAAA,IACL,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,OAAO;AAAA,IACP,YAAY,EAAE,GAAG,KAAK,GAAG,KAAK,GAAG,IAAI;AAAA,EACvC,CAAC,EACA,IAAI,EACJ,SAAS;AAEZ,SAAO,OAAO,SAAS,QAAQ;AACjC;AAKO,SAAS,cAAc,WAA2B;AACvD,QAAM,SAAS,GAAG,aAAa,SAAS;AACxC,SAAO,OAAO,SAAS,QAAQ;AACjC;;;AD1EA,IAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAuBf,eAAe,cACb,QACA,aACA,OACA,YACwB;AACxB,WAAS,QAAQ,GAAG,SAAS,YAAY,SAAS;AAChD,QAAI;AAGF,YAAM,mBAAmB,MAAM,WAAW,GAAG;AAC7C,YAAM,aAAa,mBAAmB,EAAE,uBAAuB,IAAK,IAAI,EAAE,YAAY,IAAI;AAE1F,YAAM,WAAW,MAAM,OAAO,KAAK,YAAY,OAAO;AAAA,QACpD;AAAA,QACA,UAAU;AAAA,UACR;AAAA,YACE,MAAM;AAAA,YACN,SAAS;AAAA,cACP,EAAE,MAAM,QAAQ,MAAM,OAAO;AAAA,cAC7B;AAAA,gBACE,MAAM;AAAA,gBACN,WAAW;AAAA,kBACT,KAAK,yBAAyB,WAAW;AAAA,gBAC3C;AAAA,cACF;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,QACA,aAAa;AAAA,QACb,GAAG;AAAA,MACL,CAAC;AAED,YAAM,MAAM,SAAS,QAAQ,CAAC,GAAG,SAAS,SAAS,KAAK,KAAK;AAG7D,YAAM,QAAQ,IAAI,YAAY;AAC9B,UACE,MAAM,SAAS,OAAO,KACtB,MAAM,SAAS,YAAY,KAC3B,MAAM,SAAS,aAAa,KAC5B,MAAM,SAAS,WAAW,KAC1B,MAAM,SAAS,SAAS,KACxB,IAAI,SAAS,IACb;AACA,eAAO;AAAA,MACT;AAGA,YAAM,UAAU,IAAI,YAAY,EAAE,QAAQ,cAAc,EAAE;AAC1D,aAAO,WAAW;AAAA,IACpB,SAAS,MAAM;AACb,UAAI,QAAQ,YAAY;AAEtB,cAAM,IAAI,QAAQ,CAAC,MAAM,WAAW,GAAG,OAAQ,QAAQ,EAAE,CAAC;AAC1D;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACA,SAAO;AACT;AAMA,IAAM,mBAA2C;AAAA;AAAA,EAE/C,KAAK;AAAA,EACL,GAAG;AAAA,EACH,GAAG;AAAA;AAAA,EAEH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,KAAK;AAAA;AAAA,EAEL,GAAG;AAAA,EACH,KAAK;AAAA;AAAA,EAEL,GAAG;AAAA,EACH,KAAK;AACP;AASA,SAAS,aAAa,UAAoB,gBAAiC;AAEzE,MAAI,WAAW,iBAAiB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,cAAc,IAAI;AAGtF,MAAI,SAAS,WAAW,GAAG;AACzB,eAAW;AAAA,EACb;AAEA,MAAI,SAAS,WAAW,EAAG,QAAO;AAGlC,QAAM,YAAY,oBAAI,IAAoB;AAC1C,aAAW,KAAK,UAAU;AACxB,cAAU,IAAI,EAAE,SAAS,UAAU,IAAI,EAAE,MAAM,KAAK,KAAK,CAAC;AAAA,EAC5D;AACA,MAAI,UAAU;AACd,MAAI,YAAY;AAChB,aAAW,CAAC,KAAK,KAAK,KAAK,WAAW;AACpC,QAAI,QAAQ,WAAW;AACrB,gBAAU;AACV,kBAAY;AAAA,IACd;AAAA,EACF;AAEA,QAAM,kBAAkB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO;AACnE,MAAI,gBAAgB,WAAW,EAAG,QAAO,SAAS,CAAC;AAGnD,QAAM,SAAmB,CAAC;AAC1B,WAAS,MAAM,GAAG,MAAM,SAAS,OAAO;AAEtC,UAAM,aAAa,oBAAI,IAAoB;AAC3C,eAAW,KAAK,iBAAiB;AAC/B,YAAM,KAAK,EAAE,GAAG;AAChB,iBAAW,IAAI,KAAK,WAAW,IAAI,EAAE,KAAK,KAAK,CAAC;AAAA,IAClD;AAGA,UAAM,cAAc,oBAAI,IAAoB;AAC5C,eAAW,CAAC,IAAI,KAAK,KAAK,YAAY;AACpC,YAAM,YAAY,iBAAiB,EAAE,KAAK;AAC1C,kBAAY,IAAI,YAAY,YAAY,IAAI,SAAS,KAAK,KAAK,KAAK;AAAA,IACtE;AAGA,QAAI,YAAY;AAChB,QAAI,iBAAiB;AACrB,eAAW,CAAC,WAAW,KAAK,KAAK,aAAa;AAC5C,UAAI,QAAQ,gBAAgB;AAC1B,oBAAY;AACZ,yBAAiB;AAAA,MACnB;AAAA,IACF;AAEA,WAAO,KAAK,SAAS;AAAA,EACvB;AAEA,SAAO,OAAO,KAAK,EAAE;AACvB;AAKA,eAAsB,kBACpB,WACA,UAAyB,CAAC,GACT;AACjB,QAAM,EAAE,QAAQ,MAAM,cAAc,GAAG,gBAAgB,aAAa,GAAG,UAAU,KAAK,IAAI;AAE1F,QAAM,SAAS,IAAI,OAAO,EAAE,QAAQ,QAAQ,IAAI,eAAe,CAAC;AAGhE,QAAM,kBAAkB,MAAM,kBAAkB,SAAS;AAGzD,QAAM,WAAqB,CAAC;AAC5B,QAAM,gBAAgB,cAAc;AACpC,MAAI,YAAY;AAChB,SAAO,SAAS,SAAS,eAAe,YAAY,eAAe;AACjE;AACA,UAAM,SAAS,MAAM,cAAc,QAAQ,iBAAiB,OAAO,UAAU;AAC7E,QAAI,QAAQ;AACV,eAAS,KAAK,MAAM;AACpB,UAAI,QAAS,SAAQ,IAAI,aAAa,SAAS,MAAM,KAAK,MAAM,EAAE;AAAA,IACpE,OAAO;AACL,UAAI,QAAS,SAAQ,IAAI,UAAU,SAAS,iCAAiC;AAAA,IAC/E;AAAA,EACF;AAEA,MAAI,SAAS,WAAW,GAAG;AACzB,QAAI,QAAS,SAAQ,IAAI,wBAAwB;AACjD,WAAO;AAAA,EACT;AAGA,QAAM,SAAS,aAAa,UAAU,cAAc;AACpD,SAAO;AACT;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/solver.ts","../src/preprocess.ts"],"sourcesContent":["import type { LanguageModel } from 'ai';\nimport { generateText } from 'ai';\nimport { preprocessCaptchaToBuffer } from './preprocess.js';\n\nconst PROMPT = `You are an assistant helping a visually impaired person read distorted text from an image.\nThe text contains uppercase letters A-Z and/or digits 0-9.\nA thin vertical stroke is the digit 1. Never read it as the letter I or L.\nA round closed shape is the letter O, not the letter D.\nOutput ONLY the exact characters you read, nothing else.`;\n\n// ── Types ────────────────────────────────────────────────────────────\n\nexport type Provider = 'openai' | 'anthropic' | 'google';\n\nexport interface SolverOptions {\n /** AI provider to use when constructing the model from an API key (default: \"openai\") */\n provider?: Provider;\n /** Model ID passed to the provider (default: \"gpt-4o\") */\n model?: string;\n}\n\nexport interface SolveOptions {\n /** Number of voting attempts (default: 5) */\n numAttempts?: number;\n /** Expected captcha length — results of other lengths are discarded */\n expectedLength?: number;\n /** Max retries per attempt on API failure (default: 2) */\n maxRetries?: number;\n /** Whether to log attempt details (default: true) */\n verbose?: boolean;\n}\n\n// ── Provider resolution ──────────────────────────────────────────────\n\nconst DEFAULT_MODELS: Record<Provider, string> = {\n openai: 'gpt-4o',\n anthropic: 'claude-sonnet-4-20250514',\n google: 'gemini-2.0-flash',\n};\n\nasync function resolveModel(\n apiKey: string,\n provider: Provider,\n modelId: string\n): Promise<LanguageModel> {\n switch (provider) {\n case 'openai': {\n const { createOpenAI } = await import('@ai-sdk/openai');\n return createOpenAI({ apiKey })(modelId);\n }\n case 'anthropic': {\n // @ts-expect-error — optional peer dependency\n const { createAnthropic } = await import('@ai-sdk/anthropic');\n return createAnthropic({ apiKey })(modelId);\n }\n case 'google': {\n // @ts-expect-error — optional peer dependency\n const { createGoogleGenerativeAI } = await import('@ai-sdk/google');\n return createGoogleGenerativeAI({ apiKey })(modelId);\n }\n default:\n throw new Error(\n `Unknown provider \"${provider}\". Install the matching @ai-sdk/* package and pass the model directly.`\n );\n }\n}\n\n// ── Confusion groups ─────────────────────────────────────────────────\n\n/**\n * Characters the model commonly misreads as each other.\n * Each group maps to its canonical (most likely correct) character.\n */\nconst CONFUSION_GROUPS: Record<string, string> = {\n '1': '1',\n I: '1',\n L: '1',\n O: 'O',\n D: 'O',\n '0': 'O',\n S: 'S',\n '5': 'S',\n Z: 'Z',\n '2': 'Z',\n};\n\n// ── Majority voting ──────────────────────────────────────────────────\n\n/**\n * Character-level majority vote across multiple attempts.\n * Uses confusion-aware voting: characters that the model commonly\n * confuses (e.g. 1/I/L, O/D/0) are grouped together during counting.\n */\nfunction majorityVote(attempts: string[], expectedLength?: number): string {\n let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;\n\n if (filtered.length === 0) {\n filtered = attempts;\n }\n if (filtered.length === 0) return '';\n\n // Find most common length\n const lenCounts = new Map<number, number>();\n for (const a of filtered) {\n lenCounts.set(a.length, (lenCounts.get(a.length) ?? 0) + 1);\n }\n let bestLen = 0;\n let bestCount = 0;\n for (const [len, count] of lenCounts) {\n if (count > bestCount) {\n bestLen = len;\n bestCount = count;\n }\n }\n\n const sameLenAttempts = filtered.filter((a) => a.length === bestLen);\n if (sameLenAttempts.length === 0) return filtered[0];\n\n // Vote per character position with confusion-aware grouping\n const result: string[] = [];\n for (let pos = 0; pos < bestLen; pos++) {\n const charCounts = new Map<string, number>();\n for (const a of sameLenAttempts) {\n const ch = a[pos];\n charCounts.set(ch, (charCounts.get(ch) ?? 0) + 1);\n }\n\n const groupCounts = new Map<string, number>();\n for (const [ch, count] of charCounts) {\n const canonical = CONFUSION_GROUPS[ch] ?? ch;\n groupCounts.set(canonical, (groupCounts.get(canonical) ?? 0) + count);\n }\n\n let bestGroup = '';\n let bestGroupCount = 0;\n for (const [canonical, count] of groupCounts) {\n if (count > bestGroupCount) {\n bestGroup = canonical;\n bestGroupCount = count;\n }\n }\n\n result.push(bestGroup);\n }\n\n return result.join('');\n}\n\n// ── Solver class ─────────────────────────────────────────────────────\n\nexport class Solver {\n private _model: LanguageModel | null = null;\n private _pendingModel: Promise<LanguageModel> | null = null;\n\n /**\n * Create a captcha solver.\n *\n * @example\n * // Simple — defaults to OpenAI gpt-4o\n * const solver = new Solver('sk-...');\n *\n * @example\n * // Specify provider and model\n * const solver = new Solver('sk-ant-...', { provider: 'anthropic', model: 'claude-sonnet-4-20250514' });\n *\n * @example\n * // Pass an AI SDK model directly\n * import { createOpenAI } from '@ai-sdk/openai';\n * const openai = createOpenAI({ apiKey: 'sk-...' });\n * const solver = new Solver(openai('gpt-4o'));\n */\n constructor(keyOrModel: string | LanguageModel, options?: SolverOptions) {\n if (typeof keyOrModel === 'string') {\n const provider = options?.provider ?? 'openai';\n const modelId = options?.model ?? DEFAULT_MODELS[provider];\n // Lazily resolve the model on first use\n this._pendingModel = resolveModel(keyOrModel, provider, modelId);\n } else {\n this._model = keyOrModel;\n }\n }\n\n private async getModel(): Promise<LanguageModel> {\n if (this._model) return this._model;\n this._model = await this._pendingModel!;\n this._pendingModel = null;\n return this._model;\n }\n\n /**\n * Solve a captcha image.\n *\n * @param input - File path (string) or raw image Buffer\n * @param options - Solve options (attempts, expected length, etc.)\n * @returns The captcha text\n */\n async solve(input: string | Buffer, options: SolveOptions = {}): Promise<string> {\n const { numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;\n\n const model = await this.getModel();\n const imageBuffer = await preprocessCaptchaToBuffer(input);\n\n // Run attempts — retry refusals/failures to guarantee numAttempts valid results\n const attempts: string[] = [];\n const maxTotalCalls = numAttempts + 4;\n let callCount = 0;\n\n while (attempts.length < numAttempts && callCount < maxTotalCalls) {\n callCount++;\n const result = await this.singleAttempt(model, imageBuffer, maxRetries);\n if (result) {\n attempts.push(result);\n if (verbose) console.log(` Attempt ${attempts.length}: ${result}`);\n } else {\n if (verbose) console.log(` Call ${callCount}: (refused/failed, retrying...)`);\n }\n }\n\n if (attempts.length === 0) {\n if (verbose) console.log(' All attempts failed!');\n return '';\n }\n\n return majorityVote(attempts, expectedLength);\n }\n\n /**\n * Make a single API call to read the captcha.\n * Retries up to `maxRetries` times on failure.\n */\n private async singleAttempt(\n model: LanguageModel,\n imageBuffer: Buffer,\n maxRetries: number\n ): Promise<string | null> {\n for (let retry = 0; retry <= maxRetries; retry++) {\n try {\n const { text } = await generateText({\n model,\n messages: [\n {\n role: 'user',\n content: [\n { type: 'text', text: PROMPT },\n { type: 'image', image: imageBuffer },\n ],\n },\n ],\n temperature: 1,\n maxOutputTokens: 256,\n });\n\n const raw = text.trim();\n\n // Detect refusals\n const lower = raw.toLowerCase();\n if (\n lower.includes('sorry') ||\n lower.includes(\"can't help\") ||\n lower.includes('cannot help') ||\n lower.includes('unable to') ||\n lower.includes(\"i can't\") ||\n raw.length > 20\n ) {\n return null;\n }\n\n // Clean: keep only uppercase letters and digits\n const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, '');\n return cleaned || null;\n } catch (_err) {\n if (retry < maxRetries) {\n await new Promise((r) => setTimeout(r, 1000 * (retry + 1)));\n continue;\n }\n return null;\n }\n }\n return null;\n }\n}\n","import fs from 'fs';\nimport path from 'path';\nimport sharp from 'sharp';\n\n/**\n * Preprocess a captcha image using sharp (libvips).\n *\n * Pipeline:\n * 1. Gaussian blur in color space (smooths dither pattern)\n * 2. Grayscale conversion\n * 3. Upscale 4× with Lanczos\n * 4. Contrast boost (3× around image mean) + sharpen\n * 5. Crop decorative borders\n * 6. Add white padding\n *\n * Accepts a file path or a raw image Buffer.\n * Returns a base64-encoded PNG string.\n */\nexport async function preprocessCaptcha(input: string | Buffer): Promise<string> {\n const buf = await preprocessCaptchaToBuffer(input);\n return buf.toString('base64');\n}\n\n/**\n * Same preprocessing pipeline as `preprocessCaptcha`, but returns the\n * resulting PNG as a raw Buffer (useful for AI SDK image content parts).\n */\nexport async function preprocessCaptchaToBuffer(input: string | Buffer): Promise<Buffer> {\n const source = typeof input === 'string' ? path.resolve(input) : input;\n\n // Read original dimensions for crop/resize calculations\n const metadata = await sharp(source).metadata();\n const origW = metadata.width!;\n const origH = metadata.height!;\n\n // Step 1-2: Blur in color space (smooths dither pattern) → greyscale\n // Separate from resize to prevent pipeline reordering\n const smoothed = await sharp(source).blur(1.5).greyscale().toBuffer();\n\n // Step 3: Upscale 4× with Lanczos\n const upscaled = await sharp(smoothed)\n .resize(origW * 4, origH * 4, { kernel: 'lanczos3' })\n .toBuffer();\n\n // Step 4: Contrast 3× around actual image mean + sharpen\n // Matches PIL's ImageEnhance.Contrast: output = factor*input + mean*(1-factor)\n const stats = await sharp(upscaled).stats();\n const mean = stats.channels[0].mean;\n const enhanced = await sharp(upscaled)\n .linear(3.0, mean * (1 - 3.0))\n .sharpen({ sigma: 1.0, m1: 2.0, m2: 1.0 })\n .toBuffer();\n\n // Step 5: Crop decorative borders\n // Remove 10% left/right, 2% top, 40% bottom (keep top 60%)\n // Math.floor matches Python's int() truncation\n const scaledW = origW * 4;\n const scaledH = origH * 4;\n const cropLeft = Math.floor(scaledW * 0.1);\n const cropTop = Math.floor(scaledH * 0.02);\n const cropRight = Math.floor(scaledW * 0.9);\n const cropBottom = Math.floor(scaledH * 0.6);\n const cropW = cropRight - cropLeft;\n const cropH = cropBottom - cropTop;\n\n // Step 5-6: Crop → add white padding → output PNG\n return sharp(enhanced)\n .extract({ left: cropLeft, top: cropTop, width: cropW, height: cropH })\n .extend({\n top: 20,\n bottom: 20,\n left: 30,\n right: 30,\n background: { r: 255, g: 255, b: 255 },\n })\n .png()\n .toBuffer();\n}\n\n/**\n * Read an image file and return its base64-encoded content.\n */\nexport function imageToBase64(imagePath: string): string {\n const buffer = fs.readFileSync(imagePath);\n return buffer.toString('base64');\n}\n"],"mappings":";AACA,SAAS,oBAAoB;;;ACD7B,OAAO,QAAQ;AACf,OAAO,UAAU;AACjB,OAAO,WAAW;AAgBlB,eAAsB,kBAAkB,OAAyC;AAC/E,QAAM,MAAM,MAAM,0BAA0B,KAAK;AACjD,SAAO,IAAI,SAAS,QAAQ;AAC9B;AAMA,eAAsB,0BAA0B,OAAyC;AACvF,QAAM,SAAS,OAAO,UAAU,WAAW,KAAK,QAAQ,KAAK,IAAI;AAGjE,QAAM,WAAW,MAAM,MAAM,MAAM,EAAE,SAAS;AAC9C,QAAM,QAAQ,SAAS;AACvB,QAAM,QAAQ,SAAS;AAIvB,QAAM,WAAW,MAAM,MAAM,MAAM,EAAE,KAAK,GAAG,EAAE,UAAU,EAAE,SAAS;AAGpE,QAAM,WAAW,MAAM,MAAM,QAAQ,EAClC,OAAO,QAAQ,GAAG,QAAQ,GAAG,EAAE,QAAQ,WAAW,CAAC,EACnD,SAAS;AAIZ,QAAM,QAAQ,MAAM,MAAM,QAAQ,EAAE,MAAM;AAC1C,QAAM,OAAO,MAAM,SAAS,CAAC,EAAE;AAC/B,QAAM,WAAW,MAAM,MAAM,QAAQ,EAClC,OAAO,GAAK,QAAQ,IAAI,EAAI,EAC5B,QAAQ,EAAE,OAAO,GAAK,IAAI,GAAK,IAAI,EAAI,CAAC,EACxC,SAAS;AAKZ,QAAM,UAAU,QAAQ;AACxB,QAAM,UAAU,QAAQ;AACxB,QAAM,WAAW,KAAK,MAAM,UAAU,GAAG;AACzC,QAAM,UAAU,KAAK,MAAM,UAAU,IAAI;AACzC,QAAM,YAAY,KAAK,MAAM,UAAU,GAAG;AAC1C,QAAM,aAAa,KAAK,MAAM,UAAU,GAAG;AAC3C,QAAM,QAAQ,YAAY;AAC1B,QAAM,QAAQ,aAAa;AAG3B,SAAO,MAAM,QAAQ,EAClB,QAAQ,EAAE,MAAM,UAAU,KAAK,SAAS,OAAO,OAAO,QAAQ,MAAM,CAAC,EACrE,OAAO;AAAA,IACN,KAAK;AAAA,IACL,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,OAAO;AAAA,IACP,YAAY,EAAE,GAAG,KAAK,GAAG,KAAK,GAAG,IAAI;AAAA,EACvC,CAAC,EACA,IAAI,EACJ,SAAS;AACd;AAKO,SAAS,cAAc,WAA2B;AACvD,QAAM,SAAS,GAAG,aAAa,SAAS;AACxC,SAAO,OAAO,SAAS,QAAQ;AACjC;;;ADjFA,IAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AA8Bf,IAAM,iBAA2C;AAAA,EAC/C,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,QAAQ;AACV;AAEA,eAAe,aACb,QACA,UACA,SACwB;AACxB,UAAQ,UAAU;AAAA,IAChB,KAAK,UAAU;AACb,YAAM,EAAE,aAAa,IAAI,MAAM,OAAO,gBAAgB;AACtD,aAAO,aAAa,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IACzC;AAAA,IACA,KAAK,aAAa;AAEhB,YAAM,EAAE,gBAAgB,IAAI,MAAM,OAAO,mBAAmB;AAC5D,aAAO,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IAC5C;AAAA,IACA,KAAK,UAAU;AAEb,YAAM,EAAE,yBAAyB,IAAI,MAAM,OAAO,gBAAgB;AAClE,aAAO,yBAAyB,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IACrD;AAAA,IACA;AACE,YAAM,IAAI;AAAA,QACR,qBAAqB,QAAQ;AAAA,MAC/B;AAAA,EACJ;AACF;AAQA,IAAM,mBAA2C;AAAA,EAC/C,KAAK;AAAA,EACL,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,KAAK;AAAA,EACL,GAAG;AAAA,EACH,KAAK;AAAA,EACL,GAAG;AAAA,EACH,KAAK;AACP;AASA,SAAS,aAAa,UAAoB,gBAAiC;AACzE,MAAI,WAAW,iBAAiB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,cAAc,IAAI;AAEtF,MAAI,SAAS,WAAW,GAAG;AACzB,eAAW;AAAA,EACb;AACA,MAAI,SAAS,WAAW,EAAG,QAAO;AAGlC,QAAM,YAAY,oBAAI,IAAoB;AAC1C,aAAW,KAAK,UAAU;AACxB,cAAU,IAAI,EAAE,SAAS,UAAU,IAAI,EAAE,MAAM,KAAK,KAAK,CAAC;AAAA,EAC5D;AACA,MAAI,UAAU;AACd,MAAI,YAAY;AAChB,aAAW,CAAC,KAAK,KAAK,KAAK,WAAW;AACpC,QAAI,QAAQ,WAAW;AACrB,gBAAU;AACV,kBAAY;AAAA,IACd;AAAA,EACF;AAEA,QAAM,kBAAkB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO;AACnE,MAAI,gBAAgB,WAAW,EAAG,QAAO,SAAS,CAAC;AAGnD,QAAM,SAAmB,CAAC;AAC1B,WAAS,MAAM,GAAG,MAAM,SAAS,OAAO;AACtC,UAAM,aAAa,oBAAI,IAAoB;AAC3C,eAAW,KAAK,iBAAiB;AAC/B,YAAM,KAAK,EAAE,GAAG;AAChB,iBAAW,IAAI,KAAK,WAAW,IAAI,EAAE,KAAK,KAAK,CAAC;AAAA,IAClD;AAEA,UAAM,cAAc,oBAAI,IAAoB;AAC5C,eAAW,CAAC,IAAI,KAAK,KAAK,YAAY;AACpC,YAAM,YAAY,iBAAiB,EAAE,KAAK;AAC1C,kBAAY,IAAI,YAAY,YAAY,IAAI,SAAS,KAAK,KAAK,KAAK;AAAA,IACtE;AAEA,QAAI,YAAY;AAChB,QAAI,iBAAiB;AACrB,eAAW,CAAC,WAAW,KAAK,KAAK,aAAa;AAC5C,UAAI,QAAQ,gBAAgB;AAC1B,oBAAY;AACZ,yBAAiB;AAAA,MACnB;AAAA,IACF;AAEA,WAAO,KAAK,SAAS;AAAA,EACvB;AAEA,SAAO,OAAO,KAAK,EAAE;AACvB;AAIO,IAAM,SAAN,MAAa;AAAA,EACV,SAA+B;AAAA,EAC/B,gBAA+C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmBvD,YAAY,YAAoC,SAAyB;AACvE,QAAI,OAAO,eAAe,UAAU;AAClC,YAAM,WAAW,SAAS,YAAY;AACtC,YAAM,UAAU,SAAS,SAAS,eAAe,QAAQ;AAEzD,WAAK,gBAAgB,aAAa,YAAY,UAAU,OAAO;AAAA,IACjE,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAc,WAAmC;AAC/C,QAAI,KAAK,OAAQ,QAAO,KAAK;AAC7B,SAAK,SAAS,MAAM,KAAK;AACzB,SAAK,gBAAgB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MAAM,OAAwB,UAAwB,CAAC,GAAoB;AAC/E,UAAM,EAAE,cAAc,GAAG,gBAAgB,aAAa,GAAG,UAAU,KAAK,IAAI;AAE5E,UAAM,QAAQ,MAAM,KAAK,SAAS;AAClC,UAAM,cAAc,MAAM,0BAA0B,KAAK;AAGzD,UAAM,WAAqB,CAAC;AAC5B,UAAM,gBAAgB,cAAc;AACpC,QAAI,YAAY;AAEhB,WAAO,SAAS,SAAS,eAAe,YAAY,eAAe;AACjE;AACA,YAAM,SAAS,MAAM,KAAK,cAAc,OAAO,aAAa,UAAU;AACtE,UAAI,QAAQ;AACV,iBAAS,KAAK,MAAM;AACpB,YAAI,QAAS,SAAQ,IAAI,aAAa,SAAS,MAAM,KAAK,MAAM,EAAE;AAAA,MACpE,OAAO;AACL,YAAI,QAAS,SAAQ,IAAI,UAAU,SAAS,iCAAiC;AAAA,MAC/E;AAAA,IACF;AAEA,QAAI,SAAS,WAAW,GAAG;AACzB,UAAI,QAAS,SAAQ,IAAI,wBAAwB;AACjD,aAAO;AAAA,IACT;AAEA,WAAO,aAAa,UAAU,cAAc;AAAA,EAC9C;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAc,cACZ,OACA,aACA,YACwB;AACxB,aAAS,QAAQ,GAAG,SAAS,YAAY,SAAS;AAChD,UAAI;AACF,cAAM,EAAE,KAAK,IAAI,MAAM,aAAa;AAAA,UAClC;AAAA,UACA,UAAU;AAAA,YACR;AAAA,cACE,MAAM;AAAA,cACN,SAAS;AAAA,gBACP,EAAE,MAAM,QAAQ,MAAM,OAAO;AAAA,gBAC7B,EAAE,MAAM,SAAS,OAAO,YAAY;AAAA,cACtC;AAAA,YACF;AAAA,UACF;AAAA,UACA,aAAa;AAAA,UACb,iBAAiB;AAAA,QACnB,CAAC;AAED,cAAM,MAAM,KAAK,KAAK;AAGtB,cAAM,QAAQ,IAAI,YAAY;AAC9B,YACE,MAAM,SAAS,OAAO,KACtB,MAAM,SAAS,YAAY,KAC3B,MAAM,SAAS,aAAa,KAC5B,MAAM,SAAS,WAAW,KAC1B,MAAM,SAAS,SAAS,KACxB,IAAI,SAAS,IACb;AACA,iBAAO;AAAA,QACT;AAGA,cAAM,UAAU,IAAI,YAAY,EAAE,QAAQ,cAAc,EAAE;AAC1D,eAAO,WAAW;AAAA,MACpB,SAAS,MAAM;AACb,YAAI,QAAQ,YAAY;AACtB,gBAAM,IAAI,QAAQ,CAAC,MAAM,WAAW,GAAG,OAAQ,QAAQ,EAAE,CAAC;AAC1D;AAAA,QACF;AACA,eAAO;AAAA,MACT;AAAA,IACF;AACA,WAAO;AAAA,EACT;AACF;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yigitahmetsahin/captcha-solver",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "AI-powered captcha solver using image preprocessing and
|
|
3
|
+
"version": "1.2.0",
|
|
4
|
+
"description": "AI-powered captcha solver using image preprocessing and multi-provider vision models (Vercel AI SDK)",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
@@ -32,7 +32,10 @@
|
|
|
32
32
|
"captcha",
|
|
33
33
|
"solver",
|
|
34
34
|
"ocr",
|
|
35
|
+
"ai-sdk",
|
|
35
36
|
"openai",
|
|
37
|
+
"anthropic",
|
|
38
|
+
"google",
|
|
36
39
|
"vision",
|
|
37
40
|
"image-processing",
|
|
38
41
|
"typescript"
|
|
@@ -48,11 +51,12 @@
|
|
|
48
51
|
},
|
|
49
52
|
"homepage": "https://github.com/yigitahmetsahin/captcha-solver#readme",
|
|
50
53
|
"dependencies": {
|
|
54
|
+
"ai": "^6.0.146",
|
|
51
55
|
"dotenv": "^16.4.7",
|
|
52
|
-
"openai": "^4.77.0",
|
|
53
56
|
"sharp": "^0.33.5"
|
|
54
57
|
},
|
|
55
58
|
"devDependencies": {
|
|
59
|
+
"@ai-sdk/openai": "^3.0.50",
|
|
56
60
|
"@eslint/js": "^9.39.2",
|
|
57
61
|
"@types/node": "^22.10.0",
|
|
58
62
|
"@vitest/coverage-v8": "^4.0.18",
|
|
@@ -65,6 +69,22 @@
|
|
|
65
69
|
"typescript-eslint": "^8.53.1",
|
|
66
70
|
"vitest": "^4.0.17"
|
|
67
71
|
},
|
|
72
|
+
"peerDependencies": {
|
|
73
|
+
"@ai-sdk/openai": ">=1.0.0",
|
|
74
|
+
"@ai-sdk/anthropic": ">=1.0.0",
|
|
75
|
+
"@ai-sdk/google": ">=1.0.0"
|
|
76
|
+
},
|
|
77
|
+
"peerDependenciesMeta": {
|
|
78
|
+
"@ai-sdk/openai": {
|
|
79
|
+
"optional": true
|
|
80
|
+
},
|
|
81
|
+
"@ai-sdk/anthropic": {
|
|
82
|
+
"optional": true
|
|
83
|
+
},
|
|
84
|
+
"@ai-sdk/google": {
|
|
85
|
+
"optional": true
|
|
86
|
+
}
|
|
87
|
+
},
|
|
68
88
|
"engines": {
|
|
69
89
|
"node": ">=24"
|
|
70
90
|
},
|