@yigitahmetsahin/captcha-solver 1.2.1 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -21
- package/dist/index.cjs +59 -7
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +15 -4
- package/dist/index.d.ts +15 -4
- package/dist/index.js +59 -7
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# @yigitahmetsahin/captcha-solver
|
|
2
2
|
|
|
3
|
-
AI-powered captcha solver using image preprocessing and
|
|
3
|
+
AI-powered captcha solver using image preprocessing and vision models with parallel majority voting. Supports OpenAI, Anthropic, and Google providers via the Vercel AI SDK.
|
|
4
4
|
|
|
5
5
|
[](https://github.com/yigitahmetsahin/captcha-solver/actions/workflows/ci.yml)
|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
@@ -8,16 +8,17 @@ AI-powered captcha solver using image preprocessing and OpenAI vision models wit
|
|
|
8
8
|
|
|
9
9
|
## Features
|
|
10
10
|
|
|
11
|
-
- **AI Vision OCR** - Uses
|
|
11
|
+
- **AI Vision OCR** - Uses vision models (OpenAI, Anthropic, Google) to read distorted captcha text
|
|
12
12
|
- **Image Preprocessing** - Sharp/libvips pipeline: grayscale, blur, upscale, contrast/sharpness enhancement, cropping
|
|
13
|
-
- **Majority Voting** -
|
|
14
|
-
- **
|
|
13
|
+
- **Parallel Majority Voting** - Fires all attempts concurrently and uses character-level majority voting for accuracy
|
|
14
|
+
- **Multi-Provider** - Supports OpenAI, Anthropic, and Google via Vercel AI SDK
|
|
15
|
+
- **Configurable** - Adjustable provider, model, attempt count, expected length, and verbosity
|
|
15
16
|
- **TypeScript** - Full type safety with strict mode
|
|
16
17
|
|
|
17
18
|
## Prerequisites
|
|
18
19
|
|
|
19
20
|
- Node.js >= 18
|
|
20
|
-
-
|
|
21
|
+
- An API key for at least one supported provider (OpenAI, Anthropic, or Google)
|
|
21
22
|
|
|
22
23
|
## Installation
|
|
23
24
|
|
|
@@ -29,34 +30,45 @@ npm install @yigitahmetsahin/captcha-solver
|
|
|
29
30
|
|
|
30
31
|
```typescript
|
|
31
32
|
import 'dotenv/config';
|
|
32
|
-
import {
|
|
33
|
+
import { Solver } from '@yigitahmetsahin/captcha-solver';
|
|
33
34
|
|
|
34
|
-
const
|
|
35
|
+
const solver = new Solver(process.env.OPENAI_API_KEY!);
|
|
36
|
+
const { text, attempts, usage } = await solver.solve('./captcha.png', {
|
|
35
37
|
numAttempts: 5,
|
|
36
38
|
expectedLength: 4,
|
|
37
|
-
model: 'o3',
|
|
38
39
|
});
|
|
39
40
|
|
|
40
|
-
console.log('Captcha answer:',
|
|
41
|
+
console.log('Captcha answer:', text);
|
|
42
|
+
console.log('Attempts:', attempts);
|
|
43
|
+
console.log('Total tokens:', usage.totalTokens);
|
|
41
44
|
```
|
|
42
45
|
|
|
43
46
|
## API
|
|
44
47
|
|
|
45
|
-
### `
|
|
48
|
+
### `solver.solve(input, options?)`
|
|
46
49
|
|
|
47
|
-
Solve a captcha image using
|
|
50
|
+
Solve a captcha image using AI vision + preprocessing + parallel majority voting.
|
|
48
51
|
|
|
49
52
|
**Parameters:**
|
|
50
53
|
|
|
51
|
-
| Option | Type | Default
|
|
52
|
-
| ---------------- | --------- |
|
|
53
|
-
| `model` | `string` | `'
|
|
54
|
-
| `numAttempts` | `number` | `5`
|
|
55
|
-
| `expectedLength` | `number` | -
|
|
56
|
-
| `maxRetries` | `number` | `2`
|
|
57
|
-
| `verbose` | `boolean` | `true`
|
|
54
|
+
| Option | Type | Default | Description |
|
|
55
|
+
| ---------------- | --------- | ---------- | ----------------------------------------------- |
|
|
56
|
+
| `model` | `string` | `'gpt-4o'` | Model ID passed to the provider |
|
|
57
|
+
| `numAttempts` | `number` | `5` | Number of parallel voting attempts |
|
|
58
|
+
| `expectedLength` | `number` | - | Expected captcha length (filters wrong lengths) |
|
|
59
|
+
| `maxRetries` | `number` | `2` | Max retries per attempt on API failure |
|
|
60
|
+
| `verbose` | `boolean` | `true` | Whether to log attempt details |
|
|
58
61
|
|
|
59
|
-
**Returns:** `Promise<
|
|
62
|
+
**Returns:** `Promise<SolveResult>`
|
|
63
|
+
|
|
64
|
+
```typescript
|
|
65
|
+
interface SolveResult {
|
|
66
|
+
text: string; // Majority-voted captcha answer
|
|
67
|
+
attempts: string[]; // Per-attempt raw answers
|
|
68
|
+
usage: LanguageModelUsage; // Aggregated token usage
|
|
69
|
+
attemptUsages: LanguageModelUsage[]; // Per-attempt token usage
|
|
70
|
+
}
|
|
71
|
+
```
|
|
60
72
|
|
|
61
73
|
### `preprocessCaptcha(imagePath)`
|
|
62
74
|
|
|
@@ -89,9 +101,9 @@ npm run benchmark
|
|
|
89
101
|
- Crop decorative borders
|
|
90
102
|
- Add white padding
|
|
91
103
|
|
|
92
|
-
2. **
|
|
104
|
+
2. **Parallel Attempts** - The preprocessed image is sent to the vision API concurrently across all attempts (via `Promise.all`) with temperature=1 for diverse responses.
|
|
93
105
|
|
|
94
|
-
3. **Majority Voting** - Character-level majority voting across all attempts determines the final answer, filtering by expected length if specified.
|
|
106
|
+
3. **Majority Voting** - Character-level majority voting across all parallel attempts determines the final answer, filtering by expected length if specified.
|
|
95
107
|
|
|
96
108
|
## Development
|
|
97
109
|
|
package/dist/index.cjs
CHANGED
|
@@ -166,6 +166,55 @@ function majorityVote(attempts, expectedLength) {
|
|
|
166
166
|
}
|
|
167
167
|
return result.join("");
|
|
168
168
|
}
|
|
169
|
+
function sumOptional(a, b) {
|
|
170
|
+
if (a === void 0 && b === void 0) return void 0;
|
|
171
|
+
return (a ?? 0) + (b ?? 0);
|
|
172
|
+
}
|
|
173
|
+
function aggregateUsage(usages) {
|
|
174
|
+
const zero = {
|
|
175
|
+
inputTokens: void 0,
|
|
176
|
+
inputTokenDetails: {
|
|
177
|
+
noCacheTokens: void 0,
|
|
178
|
+
cacheReadTokens: void 0,
|
|
179
|
+
cacheWriteTokens: void 0
|
|
180
|
+
},
|
|
181
|
+
outputTokens: void 0,
|
|
182
|
+
outputTokenDetails: {
|
|
183
|
+
textTokens: void 0,
|
|
184
|
+
reasoningTokens: void 0
|
|
185
|
+
},
|
|
186
|
+
totalTokens: void 0
|
|
187
|
+
};
|
|
188
|
+
return usages.reduce(
|
|
189
|
+
(acc, u) => ({
|
|
190
|
+
inputTokens: sumOptional(acc.inputTokens, u.inputTokens),
|
|
191
|
+
inputTokenDetails: {
|
|
192
|
+
noCacheTokens: sumOptional(
|
|
193
|
+
acc.inputTokenDetails.noCacheTokens,
|
|
194
|
+
u.inputTokenDetails.noCacheTokens
|
|
195
|
+
),
|
|
196
|
+
cacheReadTokens: sumOptional(
|
|
197
|
+
acc.inputTokenDetails.cacheReadTokens,
|
|
198
|
+
u.inputTokenDetails.cacheReadTokens
|
|
199
|
+
),
|
|
200
|
+
cacheWriteTokens: sumOptional(
|
|
201
|
+
acc.inputTokenDetails.cacheWriteTokens,
|
|
202
|
+
u.inputTokenDetails.cacheWriteTokens
|
|
203
|
+
)
|
|
204
|
+
},
|
|
205
|
+
outputTokens: sumOptional(acc.outputTokens, u.outputTokens),
|
|
206
|
+
outputTokenDetails: {
|
|
207
|
+
textTokens: sumOptional(acc.outputTokenDetails.textTokens, u.outputTokenDetails.textTokens),
|
|
208
|
+
reasoningTokens: sumOptional(
|
|
209
|
+
acc.outputTokenDetails.reasoningTokens,
|
|
210
|
+
u.outputTokenDetails.reasoningTokens
|
|
211
|
+
)
|
|
212
|
+
},
|
|
213
|
+
totalTokens: sumOptional(acc.totalTokens, u.totalTokens)
|
|
214
|
+
}),
|
|
215
|
+
zero
|
|
216
|
+
);
|
|
217
|
+
}
|
|
169
218
|
var Solver = class {
|
|
170
219
|
_model = null;
|
|
171
220
|
_pendingModel = null;
|
|
@@ -206,7 +255,7 @@ var Solver = class {
|
|
|
206
255
|
*
|
|
207
256
|
* @param input - File path (string) or raw image Buffer
|
|
208
257
|
* @param options - Solve options (attempts, expected length, etc.)
|
|
209
|
-
* @returns
|
|
258
|
+
* @returns Solved text, per-attempt answers, and token usage
|
|
210
259
|
*/
|
|
211
260
|
async solve(input, options = {}) {
|
|
212
261
|
const { numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;
|
|
@@ -215,15 +264,18 @@ var Solver = class {
|
|
|
215
264
|
const results = await Promise.all(
|
|
216
265
|
Array.from({ length: numAttempts }, () => this.singleAttempt(model, imageBuffer, maxRetries))
|
|
217
266
|
);
|
|
218
|
-
const
|
|
267
|
+
const valid = results.filter((r) => r !== null);
|
|
219
268
|
if (verbose) {
|
|
220
|
-
|
|
269
|
+
valid.forEach((r, i) => console.log(` Attempt ${i + 1}: ${r.text}`));
|
|
221
270
|
}
|
|
271
|
+
const attempts = valid.map((r) => r.text);
|
|
272
|
+
const attemptUsages = valid.map((r) => r.usage);
|
|
273
|
+
const usage = aggregateUsage(attemptUsages);
|
|
222
274
|
if (attempts.length === 0) {
|
|
223
275
|
if (verbose) console.log(" All attempts failed!");
|
|
224
|
-
return "";
|
|
276
|
+
return { text: "", attempts, usage, attemptUsages };
|
|
225
277
|
}
|
|
226
|
-
return majorityVote(attempts, expectedLength);
|
|
278
|
+
return { text: majorityVote(attempts, expectedLength), attempts, usage, attemptUsages };
|
|
227
279
|
}
|
|
228
280
|
/**
|
|
229
281
|
* Make a single API call to read the captcha.
|
|
@@ -232,7 +284,7 @@ var Solver = class {
|
|
|
232
284
|
async singleAttempt(model, imageBuffer, maxRetries) {
|
|
233
285
|
for (let retry = 0; retry <= maxRetries; retry++) {
|
|
234
286
|
try {
|
|
235
|
-
const { text } = await (0, import_ai.generateText)({
|
|
287
|
+
const { text, usage } = await (0, import_ai.generateText)({
|
|
236
288
|
model,
|
|
237
289
|
messages: [
|
|
238
290
|
{
|
|
@@ -252,7 +304,7 @@ var Solver = class {
|
|
|
252
304
|
return null;
|
|
253
305
|
}
|
|
254
306
|
const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, "");
|
|
255
|
-
return cleaned
|
|
307
|
+
return cleaned ? { text: cleaned, usage } : null;
|
|
256
308
|
} catch (_err) {
|
|
257
309
|
if (retry < maxRetries) {
|
|
258
310
|
await new Promise((r) => setTimeout(r, 1e3 * (retry + 1)));
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts","../src/solver.ts","../src/preprocess.ts"],"sourcesContent":["export { Solver } from './solver.js';\nexport type { SolverOptions, SolveOptions, Provider } from './solver.js';\nexport { preprocessCaptcha, preprocessCaptchaToBuffer, imageToBase64 } from './preprocess.js';\n","import type { LanguageModel } from 'ai';\nimport { generateText } from 'ai';\nimport { preprocessCaptchaToBuffer } from './preprocess.js';\n\nconst PROMPT = `You are an assistant helping a visually impaired person read distorted text from an image.\nThe text contains uppercase letters A-Z and/or digits 0-9.\nA thin vertical stroke is the digit 1. Never read it as the letter I or L.\nA round closed shape is the letter O, not the letter D.\nOutput ONLY the exact characters you read, nothing else.`;\n\n// ── Types ────────────────────────────────────────────────────────────\n\nexport type Provider = 'openai' | 'anthropic' | 'google';\n\nexport interface SolverOptions {\n /** AI provider to use when constructing the model from an API key (default: \"openai\") */\n provider?: Provider;\n /** Model ID passed to the provider (default: \"gpt-4o\") */\n model?: string;\n}\n\nexport interface SolveOptions {\n /** Number of voting attempts (default: 5) */\n numAttempts?: number;\n /** Expected captcha length — results of other lengths are discarded */\n expectedLength?: number;\n /** Max retries per attempt on API failure (default: 2) */\n maxRetries?: number;\n /** Whether to log attempt details (default: true) */\n verbose?: boolean;\n}\n\n// ── Provider resolution ──────────────────────────────────────────────\n\nconst DEFAULT_MODELS: Record<Provider, string> = {\n openai: 'gpt-4o',\n anthropic: 'claude-sonnet-4-20250514',\n google: 'gemini-2.0-flash',\n};\n\nasync function resolveModel(\n apiKey: string,\n provider: Provider,\n modelId: string\n): Promise<LanguageModel> {\n switch (provider) {\n case 'openai': {\n const { createOpenAI } = await import('@ai-sdk/openai');\n return createOpenAI({ apiKey })(modelId);\n }\n case 'anthropic': {\n // @ts-expect-error — optional peer dependency\n const { createAnthropic } = await import('@ai-sdk/anthropic');\n return createAnthropic({ apiKey })(modelId);\n }\n case 'google': {\n // @ts-expect-error — optional peer dependency\n const { createGoogleGenerativeAI } = await import('@ai-sdk/google');\n return createGoogleGenerativeAI({ apiKey })(modelId);\n }\n default:\n throw new Error(\n `Unknown provider \"${provider}\". Install the matching @ai-sdk/* package and pass the model directly.`\n );\n }\n}\n\n// ── Confusion groups ─────────────────────────────────────────────────\n\n/**\n * Characters the model commonly misreads as each other.\n * Each group maps to its canonical (most likely correct) character.\n */\nconst CONFUSION_GROUPS: Record<string, string> = {\n '1': '1',\n I: '1',\n L: '1',\n O: 'O',\n D: 'O',\n '0': 'O',\n S: 'S',\n '5': 'S',\n Z: 'Z',\n '2': 'Z',\n};\n\n// ── Majority voting ──────────────────────────────────────────────────\n\n/**\n * Character-level majority vote across multiple attempts.\n * Uses confusion-aware voting: characters that the model commonly\n * confuses (e.g. 1/I/L, O/D/0) are grouped together during counting.\n */\nfunction majorityVote(attempts: string[], expectedLength?: number): string {\n let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;\n\n if (filtered.length === 0) {\n filtered = attempts;\n }\n if (filtered.length === 0) return '';\n\n // Find most common length\n const lenCounts = new Map<number, number>();\n for (const a of filtered) {\n lenCounts.set(a.length, (lenCounts.get(a.length) ?? 0) + 1);\n }\n let bestLen = 0;\n let bestCount = 0;\n for (const [len, count] of lenCounts) {\n if (count > bestCount) {\n bestLen = len;\n bestCount = count;\n }\n }\n\n const sameLenAttempts = filtered.filter((a) => a.length === bestLen);\n if (sameLenAttempts.length === 0) return filtered[0];\n\n // Vote per character position with confusion-aware grouping\n const result: string[] = [];\n for (let pos = 0; pos < bestLen; pos++) {\n const charCounts = new Map<string, number>();\n for (const a of sameLenAttempts) {\n const ch = a[pos];\n charCounts.set(ch, (charCounts.get(ch) ?? 0) + 1);\n }\n\n const groupCounts = new Map<string, number>();\n for (const [ch, count] of charCounts) {\n const canonical = CONFUSION_GROUPS[ch] ?? ch;\n groupCounts.set(canonical, (groupCounts.get(canonical) ?? 0) + count);\n }\n\n let bestGroup = '';\n let bestGroupCount = 0;\n for (const [canonical, count] of groupCounts) {\n if (count > bestGroupCount) {\n bestGroup = canonical;\n bestGroupCount = count;\n }\n }\n\n result.push(bestGroup);\n }\n\n return result.join('');\n}\n\n// ── Solver class ─────────────────────────────────────────────────────\n\nexport class Solver {\n private _model: LanguageModel | null = null;\n private _pendingModel: Promise<LanguageModel> | null = null;\n\n /**\n * Create a captcha solver.\n *\n * @example\n * // Simple — defaults to OpenAI gpt-4o\n * const solver = new Solver('sk-...');\n *\n * @example\n * // Specify provider and model\n * const solver = new Solver('sk-ant-...', { provider: 'anthropic', model: 'claude-sonnet-4-20250514' });\n *\n * @example\n * // Pass an AI SDK model directly\n * import { createOpenAI } from '@ai-sdk/openai';\n * const openai = createOpenAI({ apiKey: 'sk-...' });\n * const solver = new Solver(openai('gpt-4o'));\n */\n constructor(keyOrModel: string | LanguageModel, options?: SolverOptions) {\n if (typeof keyOrModel === 'string') {\n const provider = options?.provider ?? 'openai';\n const modelId = options?.model ?? DEFAULT_MODELS[provider];\n // Lazily resolve the model on first use\n this._pendingModel = resolveModel(keyOrModel, provider, modelId);\n } else {\n this._model = keyOrModel;\n }\n }\n\n private async getModel(): Promise<LanguageModel> {\n if (this._model) return this._model;\n this._model = await this._pendingModel!;\n this._pendingModel = null;\n return this._model;\n }\n\n /**\n * Solve a captcha image.\n *\n * @param input - File path (string) or raw image Buffer\n * @param options - Solve options (attempts, expected length, etc.)\n * @returns The captcha text\n */\n async solve(input: string | Buffer, options: SolveOptions = {}): Promise<string> {\n const { numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;\n\n const model = await this.getModel();\n const imageBuffer = await preprocessCaptchaToBuffer(input);\n\n // Fire all attempts in parallel for speed\n const results = await Promise.all(\n Array.from({ length: numAttempts }, () => this.singleAttempt(model, imageBuffer, maxRetries))\n );\n const attempts = results.filter((r): r is string => r !== null);\n if (verbose) {\n attempts.forEach((r, i) => console.log(` Attempt ${i + 1}: ${r}`));\n }\n\n if (attempts.length === 0) {\n if (verbose) console.log(' All attempts failed!');\n return '';\n }\n\n return majorityVote(attempts, expectedLength);\n }\n\n /**\n * Make a single API call to read the captcha.\n * Retries up to `maxRetries` times on failure.\n */\n private async singleAttempt(\n model: LanguageModel,\n imageBuffer: Buffer,\n maxRetries: number\n ): Promise<string | null> {\n for (let retry = 0; retry <= maxRetries; retry++) {\n try {\n const { text } = await generateText({\n model,\n messages: [\n {\n role: 'user',\n content: [\n { type: 'text', text: PROMPT },\n { type: 'image', image: imageBuffer },\n ],\n },\n ],\n temperature: 1,\n maxOutputTokens: 256,\n });\n\n const raw = text.trim();\n\n // Detect refusals\n const lower = raw.toLowerCase();\n if (\n lower.includes('sorry') ||\n lower.includes(\"can't help\") ||\n lower.includes('cannot help') ||\n lower.includes('unable to') ||\n lower.includes(\"i can't\") ||\n raw.length > 20\n ) {\n return null;\n }\n\n // Clean: keep only uppercase letters and digits\n const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, '');\n return cleaned || null;\n } catch (_err) {\n if (retry < maxRetries) {\n await new Promise((r) => setTimeout(r, 1000 * (retry + 1)));\n continue;\n }\n return null;\n }\n }\n return null;\n }\n}\n","import fs from 'fs';\nimport path from 'path';\nimport sharp from 'sharp';\n\n/**\n * Preprocess a captcha image using sharp (libvips).\n *\n * Pipeline:\n * 1. Gaussian blur in color space (smooths dither pattern)\n * 2. Grayscale conversion\n * 3. Upscale 4× with Lanczos\n * 4. Contrast boost (3× around image mean) + sharpen\n * 5. Crop decorative borders\n * 6. Add white padding\n *\n * Accepts a file path or a raw image Buffer.\n * Returns a base64-encoded PNG string.\n */\nexport async function preprocessCaptcha(input: string | Buffer): Promise<string> {\n const buf = await preprocessCaptchaToBuffer(input);\n return buf.toString('base64');\n}\n\n/**\n * Same preprocessing pipeline as `preprocessCaptcha`, but returns the\n * resulting PNG as a raw Buffer (useful for AI SDK image content parts).\n */\nexport async function preprocessCaptchaToBuffer(input: string | Buffer): Promise<Buffer> {\n const source = typeof input === 'string' ? path.resolve(input) : input;\n\n // Read original dimensions for crop/resize calculations\n const metadata = await sharp(source).metadata();\n const origW = metadata.width!;\n const origH = metadata.height!;\n\n // Step 1-2: Blur in color space (smooths dither pattern) → greyscale\n // Separate from resize to prevent pipeline reordering\n const smoothed = await sharp(source).blur(1.5).greyscale().toBuffer();\n\n // Step 3: Upscale 4× with Lanczos\n const upscaled = await sharp(smoothed)\n .resize(origW * 4, origH * 4, { kernel: 'lanczos3' })\n .toBuffer();\n\n // Step 4: Contrast 3× around actual image mean + sharpen\n // Matches PIL's ImageEnhance.Contrast: output = factor*input + mean*(1-factor)\n const stats = await sharp(upscaled).stats();\n const mean = stats.channels[0].mean;\n const enhanced = await sharp(upscaled)\n .linear(3.0, mean * (1 - 3.0))\n .sharpen({ sigma: 1.0, m1: 2.0, m2: 1.0 })\n .toBuffer();\n\n // Step 5: Crop decorative borders\n // Remove 10% left/right, 2% top, 40% bottom (keep top 60%)\n // Math.floor matches Python's int() truncation\n const scaledW = origW * 4;\n const scaledH = origH * 4;\n const cropLeft = Math.floor(scaledW * 0.1);\n const cropTop = Math.floor(scaledH * 0.02);\n const cropRight = Math.floor(scaledW * 0.9);\n const cropBottom = Math.floor(scaledH * 0.6);\n const cropW = cropRight - cropLeft;\n const cropH = cropBottom - cropTop;\n\n // Step 5-6: Crop → add white padding → output PNG\n return sharp(enhanced)\n .extract({ left: cropLeft, top: cropTop, width: cropW, height: cropH })\n .extend({\n top: 20,\n bottom: 20,\n left: 30,\n right: 30,\n background: { r: 255, g: 255, b: 255 },\n })\n .png()\n .toBuffer();\n}\n\n/**\n * Read an image file and return its base64-encoded content.\n */\nexport function imageToBase64(imagePath: string): string {\n const buffer = fs.readFileSync(imagePath);\n return buffer.toString('base64');\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACCA,gBAA6B;;;ACD7B,gBAAe;AACf,kBAAiB;AACjB,mBAAkB;AAgBlB,eAAsB,kBAAkB,OAAyC;AAC/E,QAAM,MAAM,MAAM,0BAA0B,KAAK;AACjD,SAAO,IAAI,SAAS,QAAQ;AAC9B;AAMA,eAAsB,0BAA0B,OAAyC;AACvF,QAAM,SAAS,OAAO,UAAU,WAAW,YAAAA,QAAK,QAAQ,KAAK,IAAI;AAGjE,QAAM,WAAW,UAAM,aAAAC,SAAM,MAAM,EAAE,SAAS;AAC9C,QAAM,QAAQ,SAAS;AACvB,QAAM,QAAQ,SAAS;AAIvB,QAAM,WAAW,UAAM,aAAAA,SAAM,MAAM,EAAE,KAAK,GAAG,EAAE,UAAU,EAAE,SAAS;AAGpE,QAAM,WAAW,UAAM,aAAAA,SAAM,QAAQ,EAClC,OAAO,QAAQ,GAAG,QAAQ,GAAG,EAAE,QAAQ,WAAW,CAAC,EACnD,SAAS;AAIZ,QAAM,QAAQ,UAAM,aAAAA,SAAM,QAAQ,EAAE,MAAM;AAC1C,QAAM,OAAO,MAAM,SAAS,CAAC,EAAE;AAC/B,QAAM,WAAW,UAAM,aAAAA,SAAM,QAAQ,EAClC,OAAO,GAAK,QAAQ,IAAI,EAAI,EAC5B,QAAQ,EAAE,OAAO,GAAK,IAAI,GAAK,IAAI,EAAI,CAAC,EACxC,SAAS;AAKZ,QAAM,UAAU,QAAQ;AACxB,QAAM,UAAU,QAAQ;AACxB,QAAM,WAAW,KAAK,MAAM,UAAU,GAAG;AACzC,QAAM,UAAU,KAAK,MAAM,UAAU,IAAI;AACzC,QAAM,YAAY,KAAK,MAAM,UAAU,GAAG;AAC1C,QAAM,aAAa,KAAK,MAAM,UAAU,GAAG;AAC3C,QAAM,QAAQ,YAAY;AAC1B,QAAM,QAAQ,aAAa;AAG3B,aAAO,aAAAA,SAAM,QAAQ,EAClB,QAAQ,EAAE,MAAM,UAAU,KAAK,SAAS,OAAO,OAAO,QAAQ,MAAM,CAAC,EACrE,OAAO;AAAA,IACN,KAAK;AAAA,IACL,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,OAAO;AAAA,IACP,YAAY,EAAE,GAAG,KAAK,GAAG,KAAK,GAAG,IAAI;AAAA,EACvC,CAAC,EACA,IAAI,EACJ,SAAS;AACd;AAKO,SAAS,cAAc,WAA2B;AACvD,QAAM,SAAS,UAAAC,QAAG,aAAa,SAAS;AACxC,SAAO,OAAO,SAAS,QAAQ;AACjC;;;ADjFA,IAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AA8Bf,IAAM,iBAA2C;AAAA,EAC/C,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,QAAQ;AACV;AAEA,eAAe,aACb,QACA,UACA,SACwB;AACxB,UAAQ,UAAU;AAAA,IAChB,KAAK,UAAU;AACb,YAAM,EAAE,aAAa,IAAI,MAAM,OAAO,gBAAgB;AACtD,aAAO,aAAa,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IACzC;AAAA,IACA,KAAK,aAAa;AAEhB,YAAM,EAAE,gBAAgB,IAAI,MAAM,OAAO,mBAAmB;AAC5D,aAAO,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IAC5C;AAAA,IACA,KAAK,UAAU;AAEb,YAAM,EAAE,yBAAyB,IAAI,MAAM,OAAO,gBAAgB;AAClE,aAAO,yBAAyB,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IACrD;AAAA,IACA;AACE,YAAM,IAAI;AAAA,QACR,qBAAqB,QAAQ;AAAA,MAC/B;AAAA,EACJ;AACF;AAQA,IAAM,mBAA2C;AAAA,EAC/C,KAAK;AAAA,EACL,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,KAAK;AAAA,EACL,GAAG;AAAA,EACH,KAAK;AAAA,EACL,GAAG;AAAA,EACH,KAAK;AACP;AASA,SAAS,aAAa,UAAoB,gBAAiC;AACzE,MAAI,WAAW,iBAAiB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,cAAc,IAAI;AAEtF,MAAI,SAAS,WAAW,GAAG;AACzB,eAAW;AAAA,EACb;AACA,MAAI,SAAS,WAAW,EAAG,QAAO;AAGlC,QAAM,YAAY,oBAAI,IAAoB;AAC1C,aAAW,KAAK,UAAU;AACxB,cAAU,IAAI,EAAE,SAAS,UAAU,IAAI,EAAE,MAAM,KAAK,KAAK,CAAC;AAAA,EAC5D;AACA,MAAI,UAAU;AACd,MAAI,YAAY;AAChB,aAAW,CAAC,KAAK,KAAK,KAAK,WAAW;AACpC,QAAI,QAAQ,WAAW;AACrB,gBAAU;AACV,kBAAY;AAAA,IACd;AAAA,EACF;AAEA,QAAM,kBAAkB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO;AACnE,MAAI,gBAAgB,WAAW,EAAG,QAAO,SAAS,CAAC;AAGnD,QAAM,SAAmB,CAAC;AAC1B,WAAS,MAAM,GAAG,MAAM,SAAS,OAAO;AACtC,UAAM,aAAa,oBAAI,IAAoB;AAC3C,eAAW,KAAK,iBAAiB;AAC/B,YAAM,KAAK,EAAE,GAAG;AAChB,iBAAW,IAAI,KAAK,WAAW,IAAI,EAAE,KAAK,KAAK,CAAC;AAAA,IAClD;AAEA,UAAM,cAAc,oBAAI,IAAoB;AAC5C,eAAW,CAAC,IAAI,KAAK,KAAK,YAAY;AACpC,YAAM,YAAY,iBAAiB,EAAE,KAAK;AAC1C,kBAAY,IAAI,YAAY,YAAY,IAAI,SAAS,KAAK,KAAK,KAAK;AAAA,IACtE;AAEA,QAAI,YAAY;AAChB,QAAI,iBAAiB;AACrB,eAAW,CAAC,WAAW,KAAK,KAAK,aAAa;AAC5C,UAAI,QAAQ,gBAAgB;AAC1B,oBAAY;AACZ,yBAAiB;AAAA,MACnB;AAAA,IACF;AAEA,WAAO,KAAK,SAAS;AAAA,EACvB;AAEA,SAAO,OAAO,KAAK,EAAE;AACvB;AAIO,IAAM,SAAN,MAAa;AAAA,EACV,SAA+B;AAAA,EAC/B,gBAA+C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmBvD,YAAY,YAAoC,SAAyB;AACvE,QAAI,OAAO,eAAe,UAAU;AAClC,YAAM,WAAW,SAAS,YAAY;AACtC,YAAM,UAAU,SAAS,SAAS,eAAe,QAAQ;AAEzD,WAAK,gBAAgB,aAAa,YAAY,UAAU,OAAO;AAAA,IACjE,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAc,WAAmC;AAC/C,QAAI,KAAK,OAAQ,QAAO,KAAK;AAC7B,SAAK,SAAS,MAAM,KAAK;AACzB,SAAK,gBAAgB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MAAM,OAAwB,UAAwB,CAAC,GAAoB;AAC/E,UAAM,EAAE,cAAc,GAAG,gBAAgB,aAAa,GAAG,UAAU,KAAK,IAAI;AAE5E,UAAM,QAAQ,MAAM,KAAK,SAAS;AAClC,UAAM,cAAc,MAAM,0BAA0B,KAAK;AAGzD,UAAM,UAAU,MAAM,QAAQ;AAAA,MAC5B,MAAM,KAAK,EAAE,QAAQ,YAAY,GAAG,MAAM,KAAK,cAAc,OAAO,aAAa,UAAU,CAAC;AAAA,IAC9F;AACA,UAAM,WAAW,QAAQ,OAAO,CAAC,MAAmB,MAAM,IAAI;AAC9D,QAAI,SAAS;AACX,eAAS,QAAQ,CAAC,GAAG,MAAM,QAAQ,IAAI,aAAa,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;AAAA,IACpE;AAEA,QAAI,SAAS,WAAW,GAAG;AACzB,UAAI,QAAS,SAAQ,IAAI,wBAAwB;AACjD,aAAO;AAAA,IACT;AAEA,WAAO,aAAa,UAAU,cAAc;AAAA,EAC9C;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAc,cACZ,OACA,aACA,YACwB;AACxB,aAAS,QAAQ,GAAG,SAAS,YAAY,SAAS;AAChD,UAAI;AACF,cAAM,EAAE,KAAK,IAAI,UAAM,wBAAa;AAAA,UAClC;AAAA,UACA,UAAU;AAAA,YACR;AAAA,cACE,MAAM;AAAA,cACN,SAAS;AAAA,gBACP,EAAE,MAAM,QAAQ,MAAM,OAAO;AAAA,gBAC7B,EAAE,MAAM,SAAS,OAAO,YAAY;AAAA,cACtC;AAAA,YACF;AAAA,UACF;AAAA,UACA,aAAa;AAAA,UACb,iBAAiB;AAAA,QACnB,CAAC;AAED,cAAM,MAAM,KAAK,KAAK;AAGtB,cAAM,QAAQ,IAAI,YAAY;AAC9B,YACE,MAAM,SAAS,OAAO,KACtB,MAAM,SAAS,YAAY,KAC3B,MAAM,SAAS,aAAa,KAC5B,MAAM,SAAS,WAAW,KAC1B,MAAM,SAAS,SAAS,KACxB,IAAI,SAAS,IACb;AACA,iBAAO;AAAA,QACT;AAGA,cAAM,UAAU,IAAI,YAAY,EAAE,QAAQ,cAAc,EAAE;AAC1D,eAAO,WAAW;AAAA,MACpB,SAAS,MAAM;AACb,YAAI,QAAQ,YAAY;AACtB,gBAAM,IAAI,QAAQ,CAAC,MAAM,WAAW,GAAG,OAAQ,QAAQ,EAAE,CAAC;AAC1D;AAAA,QACF;AACA,eAAO;AAAA,MACT;AAAA,IACF;AACA,WAAO;AAAA,EACT;AACF;","names":["path","sharp","fs"]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/solver.ts","../src/preprocess.ts"],"sourcesContent":["export { Solver } from './solver.js';\nexport type { SolverOptions, SolveOptions, SolveResult, Provider } from './solver.js';\nexport type { LanguageModelUsage } from 'ai';\nexport { preprocessCaptcha, preprocessCaptchaToBuffer, imageToBase64 } from './preprocess.js';\n","import type { LanguageModel, LanguageModelUsage } from 'ai';\nimport { generateText } from 'ai';\nimport { preprocessCaptchaToBuffer } from './preprocess.js';\n\nconst PROMPT = `You are an assistant helping a visually impaired person read distorted text from an image.\nThe text contains uppercase letters A-Z and/or digits 0-9.\nA thin vertical stroke is the digit 1. Never read it as the letter I or L.\nA round closed shape is the letter O, not the letter D.\nOutput ONLY the exact characters you read, nothing else.`;\n\n// ── Types ────────────────────────────────────────────────────────────\n\nexport type Provider = 'openai' | 'anthropic' | 'google';\n\nexport interface SolverOptions {\n /** AI provider to use when constructing the model from an API key (default: \"openai\") */\n provider?: Provider;\n /** Model ID passed to the provider (default: \"gpt-4o\") */\n model?: string;\n}\n\nexport interface SolveOptions {\n /** Number of voting attempts (default: 5) */\n numAttempts?: number;\n /** Expected captcha length — results of other lengths are discarded */\n expectedLength?: number;\n /** Max retries per attempt on API failure (default: 2) */\n maxRetries?: number;\n /** Whether to log attempt details (default: true) */\n verbose?: boolean;\n}\n\nexport interface SolveResult {\n /** The solved captcha text (majority-voted) */\n text: string;\n /** Per-attempt raw answers (before voting) */\n attempts: string[];\n /** Aggregated token usage across all parallel attempts */\n usage: LanguageModelUsage;\n /** Per-attempt usage breakdown */\n attemptUsages: LanguageModelUsage[];\n}\n\ninterface AttemptResult {\n text: string;\n usage: LanguageModelUsage;\n}\n\n// ── Provider resolution ──────────────────────────────────────────────\n\nconst DEFAULT_MODELS: Record<Provider, string> = {\n openai: 'gpt-4o',\n anthropic: 'claude-sonnet-4-20250514',\n google: 'gemini-2.0-flash',\n};\n\nasync function resolveModel(\n apiKey: string,\n provider: Provider,\n modelId: string\n): Promise<LanguageModel> {\n switch (provider) {\n case 'openai': {\n const { createOpenAI } = await import('@ai-sdk/openai');\n return createOpenAI({ apiKey })(modelId);\n }\n case 'anthropic': {\n // @ts-expect-error — optional peer dependency\n const { createAnthropic } = await import('@ai-sdk/anthropic');\n return createAnthropic({ apiKey })(modelId);\n }\n case 'google': {\n // @ts-expect-error — optional peer dependency\n const { createGoogleGenerativeAI } = await import('@ai-sdk/google');\n return createGoogleGenerativeAI({ apiKey })(modelId);\n }\n default:\n throw new Error(\n `Unknown provider \"${provider}\". Install the matching @ai-sdk/* package and pass the model directly.`\n );\n }\n}\n\n// ── Confusion groups ─────────────────────────────────────────────────\n\n/**\n * Characters the model commonly misreads as each other.\n * Each group maps to its canonical (most likely correct) character.\n */\nconst CONFUSION_GROUPS: Record<string, string> = {\n '1': '1',\n I: '1',\n L: '1',\n O: 'O',\n D: 'O',\n '0': 'O',\n S: 'S',\n '5': 'S',\n Z: 'Z',\n '2': 'Z',\n};\n\n// ── Majority voting ──────────────────────────────────────────────────\n\n/**\n * Character-level majority vote across multiple attempts.\n * Uses confusion-aware voting: characters that the model commonly\n * confuses (e.g. 1/I/L, O/D/0) are grouped together during counting.\n */\nfunction majorityVote(attempts: string[], expectedLength?: number): string {\n let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;\n\n if (filtered.length === 0) {\n filtered = attempts;\n }\n if (filtered.length === 0) return '';\n\n // Find most common length\n const lenCounts = new Map<number, number>();\n for (const a of filtered) {\n lenCounts.set(a.length, (lenCounts.get(a.length) ?? 0) + 1);\n }\n let bestLen = 0;\n let bestCount = 0;\n for (const [len, count] of lenCounts) {\n if (count > bestCount) {\n bestLen = len;\n bestCount = count;\n }\n }\n\n const sameLenAttempts = filtered.filter((a) => a.length === bestLen);\n if (sameLenAttempts.length === 0) return filtered[0];\n\n // Vote per character position with confusion-aware grouping\n const result: string[] = [];\n for (let pos = 0; pos < bestLen; pos++) {\n const charCounts = new Map<string, number>();\n for (const a of sameLenAttempts) {\n const ch = a[pos];\n charCounts.set(ch, (charCounts.get(ch) ?? 0) + 1);\n }\n\n const groupCounts = new Map<string, number>();\n for (const [ch, count] of charCounts) {\n const canonical = CONFUSION_GROUPS[ch] ?? ch;\n groupCounts.set(canonical, (groupCounts.get(canonical) ?? 0) + count);\n }\n\n let bestGroup = '';\n let bestGroupCount = 0;\n for (const [canonical, count] of groupCounts) {\n if (count > bestGroupCount) {\n bestGroup = canonical;\n bestGroupCount = count;\n }\n }\n\n result.push(bestGroup);\n }\n\n return result.join('');\n}\n\n// ── Usage aggregation ────────────────────────────────────────────────\n\nfunction sumOptional(a: number | undefined, b: number | undefined): number | undefined {\n if (a === undefined && b === undefined) return undefined;\n return (a ?? 0) + (b ?? 0);\n}\n\nfunction aggregateUsage(usages: LanguageModelUsage[]): LanguageModelUsage {\n const zero: LanguageModelUsage = {\n inputTokens: undefined,\n inputTokenDetails: {\n noCacheTokens: undefined,\n cacheReadTokens: undefined,\n cacheWriteTokens: undefined,\n },\n outputTokens: undefined,\n outputTokenDetails: {\n textTokens: undefined,\n reasoningTokens: undefined,\n },\n totalTokens: undefined,\n };\n return usages.reduce<LanguageModelUsage>(\n (acc, u) => ({\n inputTokens: sumOptional(acc.inputTokens, u.inputTokens),\n inputTokenDetails: {\n noCacheTokens: sumOptional(\n acc.inputTokenDetails.noCacheTokens,\n u.inputTokenDetails.noCacheTokens\n ),\n cacheReadTokens: sumOptional(\n acc.inputTokenDetails.cacheReadTokens,\n u.inputTokenDetails.cacheReadTokens\n ),\n cacheWriteTokens: sumOptional(\n acc.inputTokenDetails.cacheWriteTokens,\n u.inputTokenDetails.cacheWriteTokens\n ),\n },\n outputTokens: sumOptional(acc.outputTokens, u.outputTokens),\n outputTokenDetails: {\n textTokens: sumOptional(acc.outputTokenDetails.textTokens, u.outputTokenDetails.textTokens),\n reasoningTokens: sumOptional(\n acc.outputTokenDetails.reasoningTokens,\n u.outputTokenDetails.reasoningTokens\n ),\n },\n totalTokens: sumOptional(acc.totalTokens, u.totalTokens),\n }),\n zero\n );\n}\n\n// ── Solver class ─────────────────────────────────────────────────────\n\nexport class Solver {\n private _model: LanguageModel | null = null;\n private _pendingModel: Promise<LanguageModel> | null = null;\n\n /**\n * Create a captcha solver.\n *\n * @example\n * // Simple — defaults to OpenAI gpt-4o\n * const solver = new Solver('sk-...');\n *\n * @example\n * // Specify provider and model\n * const solver = new Solver('sk-ant-...', { provider: 'anthropic', model: 'claude-sonnet-4-20250514' });\n *\n * @example\n * // Pass an AI SDK model directly\n * import { createOpenAI } from '@ai-sdk/openai';\n * const openai = createOpenAI({ apiKey: 'sk-...' });\n * const solver = new Solver(openai('gpt-4o'));\n */\n constructor(keyOrModel: string | LanguageModel, options?: SolverOptions) {\n if (typeof keyOrModel === 'string') {\n const provider = options?.provider ?? 'openai';\n const modelId = options?.model ?? DEFAULT_MODELS[provider];\n // Lazily resolve the model on first use\n this._pendingModel = resolveModel(keyOrModel, provider, modelId);\n } else {\n this._model = keyOrModel;\n }\n }\n\n private async getModel(): Promise<LanguageModel> {\n if (this._model) return this._model;\n this._model = await this._pendingModel!;\n this._pendingModel = null;\n return this._model;\n }\n\n /**\n * Solve a captcha image.\n *\n * @param input - File path (string) or raw image Buffer\n * @param options - Solve options (attempts, expected length, etc.)\n * @returns Solved text, per-attempt answers, and token usage\n */\n async solve(input: string | Buffer, options: SolveOptions = {}): Promise<SolveResult> {\n const { numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;\n\n const model = await this.getModel();\n const imageBuffer = await preprocessCaptchaToBuffer(input);\n\n // Fire all attempts in parallel for speed\n const results = await Promise.all(\n Array.from({ length: numAttempts }, () => this.singleAttempt(model, imageBuffer, maxRetries))\n );\n const valid = results.filter((r): r is AttemptResult => r !== null);\n if (verbose) {\n valid.forEach((r, i) => console.log(` Attempt ${i + 1}: ${r.text}`));\n }\n\n const attempts = valid.map((r) => r.text);\n const attemptUsages = valid.map((r) => r.usage);\n const usage = aggregateUsage(attemptUsages);\n\n if (attempts.length === 0) {\n if (verbose) console.log(' All attempts failed!');\n return { text: '', attempts, usage, attemptUsages };\n }\n\n return { text: majorityVote(attempts, expectedLength), attempts, usage, attemptUsages };\n }\n\n /**\n * Make a single API call to read the captcha.\n * Retries up to `maxRetries` times on failure.\n */\n private async singleAttempt(\n model: LanguageModel,\n imageBuffer: Buffer,\n maxRetries: number\n ): Promise<AttemptResult | null> {\n for (let retry = 0; retry <= maxRetries; retry++) {\n try {\n const { text, usage } = await generateText({\n model,\n messages: [\n {\n role: 'user',\n content: [\n { type: 'text', text: PROMPT },\n { type: 'image', image: imageBuffer },\n ],\n },\n ],\n temperature: 1,\n maxOutputTokens: 256,\n });\n\n const raw = text.trim();\n\n // Detect refusals\n const lower = raw.toLowerCase();\n if (\n lower.includes('sorry') ||\n lower.includes(\"can't help\") ||\n lower.includes('cannot help') ||\n lower.includes('unable to') ||\n lower.includes(\"i can't\") ||\n raw.length > 20\n ) {\n return null;\n }\n\n // Clean: keep only uppercase letters and digits\n const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, '');\n return cleaned ? { text: cleaned, usage } : null;\n } catch (_err) {\n if (retry < maxRetries) {\n await new Promise((r) => setTimeout(r, 1000 * (retry + 1)));\n continue;\n }\n return null;\n }\n }\n return null;\n }\n}\n","import fs from 'fs';\nimport path from 'path';\nimport sharp from 'sharp';\n\n/**\n * Preprocess a captcha image using sharp (libvips).\n *\n * Pipeline:\n * 1. Gaussian blur in color space (smooths dither pattern)\n * 2. Grayscale conversion\n * 3. Upscale 4× with Lanczos\n * 4. Contrast boost (3× around image mean) + sharpen\n * 5. Crop decorative borders\n * 6. Add white padding\n *\n * Accepts a file path or a raw image Buffer.\n * Returns a base64-encoded PNG string.\n */\nexport async function preprocessCaptcha(input: string | Buffer): Promise<string> {\n const buf = await preprocessCaptchaToBuffer(input);\n return buf.toString('base64');\n}\n\n/**\n * Same preprocessing pipeline as `preprocessCaptcha`, but returns the\n * resulting PNG as a raw Buffer (useful for AI SDK image content parts).\n */\nexport async function preprocessCaptchaToBuffer(input: string | Buffer): Promise<Buffer> {\n const source = typeof input === 'string' ? path.resolve(input) : input;\n\n // Read original dimensions for crop/resize calculations\n const metadata = await sharp(source).metadata();\n const origW = metadata.width!;\n const origH = metadata.height!;\n\n // Step 1-2: Blur in color space (smooths dither pattern) → greyscale\n // Separate from resize to prevent pipeline reordering\n const smoothed = await sharp(source).blur(1.5).greyscale().toBuffer();\n\n // Step 3: Upscale 4× with Lanczos\n const upscaled = await sharp(smoothed)\n .resize(origW * 4, origH * 4, { kernel: 'lanczos3' })\n .toBuffer();\n\n // Step 4: Contrast 3× around actual image mean + sharpen\n // Matches PIL's ImageEnhance.Contrast: output = factor*input + mean*(1-factor)\n const stats = await sharp(upscaled).stats();\n const mean = stats.channels[0].mean;\n const enhanced = await sharp(upscaled)\n .linear(3.0, mean * (1 - 3.0))\n .sharpen({ sigma: 1.0, m1: 2.0, m2: 1.0 })\n .toBuffer();\n\n // Step 5: Crop decorative borders\n // Remove 10% left/right, 2% top, 40% bottom (keep top 60%)\n // Math.floor matches Python's int() truncation\n const scaledW = origW * 4;\n const scaledH = origH * 4;\n const cropLeft = Math.floor(scaledW * 0.1);\n const cropTop = Math.floor(scaledH * 0.02);\n const cropRight = Math.floor(scaledW * 0.9);\n const cropBottom = Math.floor(scaledH * 0.6);\n const cropW = cropRight - cropLeft;\n const cropH = cropBottom - cropTop;\n\n // Step 5-6: Crop → add white padding → output PNG\n return sharp(enhanced)\n .extract({ left: cropLeft, top: cropTop, width: cropW, height: cropH })\n .extend({\n top: 20,\n bottom: 20,\n left: 30,\n right: 30,\n background: { r: 255, g: 255, b: 255 },\n })\n .png()\n .toBuffer();\n}\n\n/**\n * Read an image file and return its base64-encoded content.\n */\nexport function imageToBase64(imagePath: string): string {\n const buffer = fs.readFileSync(imagePath);\n return buffer.toString('base64');\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACCA,gBAA6B;;;ACD7B,gBAAe;AACf,kBAAiB;AACjB,mBAAkB;AAgBlB,eAAsB,kBAAkB,OAAyC;AAC/E,QAAM,MAAM,MAAM,0BAA0B,KAAK;AACjD,SAAO,IAAI,SAAS,QAAQ;AAC9B;AAMA,eAAsB,0BAA0B,OAAyC;AACvF,QAAM,SAAS,OAAO,UAAU,WAAW,YAAAA,QAAK,QAAQ,KAAK,IAAI;AAGjE,QAAM,WAAW,UAAM,aAAAC,SAAM,MAAM,EAAE,SAAS;AAC9C,QAAM,QAAQ,SAAS;AACvB,QAAM,QAAQ,SAAS;AAIvB,QAAM,WAAW,UAAM,aAAAA,SAAM,MAAM,EAAE,KAAK,GAAG,EAAE,UAAU,EAAE,SAAS;AAGpE,QAAM,WAAW,UAAM,aAAAA,SAAM,QAAQ,EAClC,OAAO,QAAQ,GAAG,QAAQ,GAAG,EAAE,QAAQ,WAAW,CAAC,EACnD,SAAS;AAIZ,QAAM,QAAQ,UAAM,aAAAA,SAAM,QAAQ,EAAE,MAAM;AAC1C,QAAM,OAAO,MAAM,SAAS,CAAC,EAAE;AAC/B,QAAM,WAAW,UAAM,aAAAA,SAAM,QAAQ,EAClC,OAAO,GAAK,QAAQ,IAAI,EAAI,EAC5B,QAAQ,EAAE,OAAO,GAAK,IAAI,GAAK,IAAI,EAAI,CAAC,EACxC,SAAS;AAKZ,QAAM,UAAU,QAAQ;AACxB,QAAM,UAAU,QAAQ;AACxB,QAAM,WAAW,KAAK,MAAM,UAAU,GAAG;AACzC,QAAM,UAAU,KAAK,MAAM,UAAU,IAAI;AACzC,QAAM,YAAY,KAAK,MAAM,UAAU,GAAG;AAC1C,QAAM,aAAa,KAAK,MAAM,UAAU,GAAG;AAC3C,QAAM,QAAQ,YAAY;AAC1B,QAAM,QAAQ,aAAa;AAG3B,aAAO,aAAAA,SAAM,QAAQ,EAClB,QAAQ,EAAE,MAAM,UAAU,KAAK,SAAS,OAAO,OAAO,QAAQ,MAAM,CAAC,EACrE,OAAO;AAAA,IACN,KAAK;AAAA,IACL,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,OAAO;AAAA,IACP,YAAY,EAAE,GAAG,KAAK,GAAG,KAAK,GAAG,IAAI;AAAA,EACvC,CAAC,EACA,IAAI,EACJ,SAAS;AACd;AAKO,SAAS,cAAc,WAA2B;AACvD,QAAM,SAAS,UAAAC,QAAG,aAAa,SAAS;AACxC,SAAO,OAAO,SAAS,QAAQ;AACjC;;;ADjFA,IAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AA8Cf,IAAM,iBAA2C;AAAA,EAC/C,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,QAAQ;AACV;AAEA,eAAe,aACb,QACA,UACA,SACwB;AACxB,UAAQ,UAAU;AAAA,IAChB,KAAK,UAAU;AACb,YAAM,EAAE,aAAa,IAAI,MAAM,OAAO,gBAAgB;AACtD,aAAO,aAAa,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IACzC;AAAA,IACA,KAAK,aAAa;AAEhB,YAAM,EAAE,gBAAgB,IAAI,MAAM,OAAO,mBAAmB;AAC5D,aAAO,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IAC5C;AAAA,IACA,KAAK,UAAU;AAEb,YAAM,EAAE,yBAAyB,IAAI,MAAM,OAAO,gBAAgB;AAClE,aAAO,yBAAyB,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IACrD;AAAA,IACA;AACE,YAAM,IAAI;AAAA,QACR,qBAAqB,QAAQ;AAAA,MAC/B;AAAA,EACJ;AACF;AAQA,IAAM,mBAA2C;AAAA,EAC/C,KAAK;AAAA,EACL,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,KAAK;AAAA,EACL,GAAG;AAAA,EACH,KAAK;AAAA,EACL,GAAG;AAAA,EACH,KAAK;AACP;AASA,SAAS,aAAa,UAAoB,gBAAiC;AACzE,MAAI,WAAW,iBAAiB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,cAAc,IAAI;AAEtF,MAAI,SAAS,WAAW,GAAG;AACzB,eAAW;AAAA,EACb;AACA,MAAI,SAAS,WAAW,EAAG,QAAO;AAGlC,QAAM,YAAY,oBAAI,IAAoB;AAC1C,aAAW,KAAK,UAAU;AACxB,cAAU,IAAI,EAAE,SAAS,UAAU,IAAI,EAAE,MAAM,KAAK,KAAK,CAAC;AAAA,EAC5D;AACA,MAAI,UAAU;AACd,MAAI,YAAY;AAChB,aAAW,CAAC,KAAK,KAAK,KAAK,WAAW;AACpC,QAAI,QAAQ,WAAW;AACrB,gBAAU;AACV,kBAAY;AAAA,IACd;AAAA,EACF;AAEA,QAAM,kBAAkB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO;AACnE,MAAI,gBAAgB,WAAW,EAAG,QAAO,SAAS,CAAC;AAGnD,QAAM,SAAmB,CAAC;AAC1B,WAAS,MAAM,GAAG,MAAM,SAAS,OAAO;AACtC,UAAM,aAAa,oBAAI,IAAoB;AAC3C,eAAW,KAAK,iBAAiB;AAC/B,YAAM,KAAK,EAAE,GAAG;AAChB,iBAAW,IAAI,KAAK,WAAW,IAAI,EAAE,KAAK,KAAK,CAAC;AAAA,IAClD;AAEA,UAAM,cAAc,oBAAI,IAAoB;AAC5C,eAAW,CAAC,IAAI,KAAK,KAAK,YAAY;AACpC,YAAM,YAAY,iBAAiB,EAAE,KAAK;AAC1C,kBAAY,IAAI,YAAY,YAAY,IAAI,SAAS,KAAK,KAAK,KAAK;AAAA,IACtE;AAEA,QAAI,YAAY;AAChB,QAAI,iBAAiB;AACrB,eAAW,CAAC,WAAW,KAAK,KAAK,aAAa;AAC5C,UAAI,QAAQ,gBAAgB;AAC1B,oBAAY;AACZ,yBAAiB;AAAA,MACnB;AAAA,IACF;AAEA,WAAO,KAAK,SAAS;AAAA,EACvB;AAEA,SAAO,OAAO,KAAK,EAAE;AACvB;AAIA,SAAS,YAAY,GAAuB,GAA2C;AACrF,MAAI,MAAM,UAAa,MAAM,OAAW,QAAO;AAC/C,UAAQ,KAAK,MAAM,KAAK;AAC1B;AAEA,SAAS,eAAe,QAAkD;AACxE,QAAM,OAA2B;AAAA,IAC/B,aAAa;AAAA,IACb,mBAAmB;AAAA,MACjB,eAAe;AAAA,MACf,iBAAiB;AAAA,MACjB,kBAAkB;AAAA,IACpB;AAAA,IACA,cAAc;AAAA,IACd,oBAAoB;AAAA,MAClB,YAAY;AAAA,MACZ,iBAAiB;AAAA,IACnB;AAAA,IACA,aAAa;AAAA,EACf;AACA,SAAO,OAAO;AAAA,IACZ,CAAC,KAAK,OAAO;AAAA,MACX,aAAa,YAAY,IAAI,aAAa,EAAE,WAAW;AAAA,MACvD,mBAAmB;AAAA,QACjB,eAAe;AAAA,UACb,IAAI,kBAAkB;AAAA,UACtB,EAAE,kBAAkB;AAAA,QACtB;AAAA,QACA,iBAAiB;AAAA,UACf,IAAI,kBAAkB;AAAA,UACtB,EAAE,kBAAkB;AAAA,QACtB;AAAA,QACA,kBAAkB;AAAA,UAChB,IAAI,kBAAkB;AAAA,UACtB,EAAE,kBAAkB;AAAA,QACtB;AAAA,MACF;AAAA,MACA,cAAc,YAAY,IAAI,cAAc,EAAE,YAAY;AAAA,MAC1D,oBAAoB;AAAA,QAClB,YAAY,YAAY,IAAI,mBAAmB,YAAY,EAAE,mBAAmB,UAAU;AAAA,QAC1F,iBAAiB;AAAA,UACf,IAAI,mBAAmB;AAAA,UACvB,EAAE,mBAAmB;AAAA,QACvB;AAAA,MACF;AAAA,MACA,aAAa,YAAY,IAAI,aAAa,EAAE,WAAW;AAAA,IACzD;AAAA,IACA;AAAA,EACF;AACF;AAIO,IAAM,SAAN,MAAa;AAAA,EACV,SAA+B;AAAA,EAC/B,gBAA+C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmBvD,YAAY,YAAoC,SAAyB;AACvE,QAAI,OAAO,eAAe,UAAU;AAClC,YAAM,WAAW,SAAS,YAAY;AACtC,YAAM,UAAU,SAAS,SAAS,eAAe,QAAQ;AAEzD,WAAK,gBAAgB,aAAa,YAAY,UAAU,OAAO;AAAA,IACjE,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAc,WAAmC;AAC/C,QAAI,KAAK,OAAQ,QAAO,KAAK;AAC7B,SAAK,SAAS,MAAM,KAAK;AACzB,SAAK,gBAAgB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MAAM,OAAwB,UAAwB,CAAC,GAAyB;AACpF,UAAM,EAAE,cAAc,GAAG,gBAAgB,aAAa,GAAG,UAAU,KAAK,IAAI;AAE5E,UAAM,QAAQ,MAAM,KAAK,SAAS;AAClC,UAAM,cAAc,MAAM,0BAA0B,KAAK;AAGzD,UAAM,UAAU,MAAM,QAAQ;AAAA,MAC5B,MAAM,KAAK,EAAE,QAAQ,YAAY,GAAG,MAAM,KAAK,cAAc,OAAO,aAAa,UAAU,CAAC;AAAA,IAC9F;AACA,UAAM,QAAQ,QAAQ,OAAO,CAAC,MAA0B,MAAM,IAAI;AAClE,QAAI,SAAS;AACX,YAAM,QAAQ,CAAC,GAAG,MAAM,QAAQ,IAAI,aAAa,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC;AAAA,IACtE;AAEA,UAAM,WAAW,MAAM,IAAI,CAAC,MAAM,EAAE,IAAI;AACxC,UAAM,gBAAgB,MAAM,IAAI,CAAC,MAAM,EAAE,KAAK;AAC9C,UAAM,QAAQ,eAAe,aAAa;AAE1C,QAAI,SAAS,WAAW,GAAG;AACzB,UAAI,QAAS,SAAQ,IAAI,wBAAwB;AACjD,aAAO,EAAE,MAAM,IAAI,UAAU,OAAO,cAAc;AAAA,IACpD;AAEA,WAAO,EAAE,MAAM,aAAa,UAAU,cAAc,GAAG,UAAU,OAAO,cAAc;AAAA,EACxF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAc,cACZ,OACA,aACA,YAC+B;AAC/B,aAAS,QAAQ,GAAG,SAAS,YAAY,SAAS;AAChD,UAAI;AACF,cAAM,EAAE,MAAM,MAAM,IAAI,UAAM,wBAAa;AAAA,UACzC;AAAA,UACA,UAAU;AAAA,YACR;AAAA,cACE,MAAM;AAAA,cACN,SAAS;AAAA,gBACP,EAAE,MAAM,QAAQ,MAAM,OAAO;AAAA,gBAC7B,EAAE,MAAM,SAAS,OAAO,YAAY;AAAA,cACtC;AAAA,YACF;AAAA,UACF;AAAA,UACA,aAAa;AAAA,UACb,iBAAiB;AAAA,QACnB,CAAC;AAED,cAAM,MAAM,KAAK,KAAK;AAGtB,cAAM,QAAQ,IAAI,YAAY;AAC9B,YACE,MAAM,SAAS,OAAO,KACtB,MAAM,SAAS,YAAY,KAC3B,MAAM,SAAS,aAAa,KAC5B,MAAM,SAAS,WAAW,KAC1B,MAAM,SAAS,SAAS,KACxB,IAAI,SAAS,IACb;AACA,iBAAO;AAAA,QACT;AAGA,cAAM,UAAU,IAAI,YAAY,EAAE,QAAQ,cAAc,EAAE;AAC1D,eAAO,UAAU,EAAE,MAAM,SAAS,MAAM,IAAI;AAAA,MAC9C,SAAS,MAAM;AACb,YAAI,QAAQ,YAAY;AACtB,gBAAM,IAAI,QAAQ,CAAC,MAAM,WAAW,GAAG,OAAQ,QAAQ,EAAE,CAAC;AAC1D;AAAA,QACF;AACA,eAAO;AAAA,MACT;AAAA,IACF;AACA,WAAO;AAAA,EACT;AACF;","names":["path","sharp","fs"]}
|
package/dist/index.d.cts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { LanguageModel } from 'ai';
|
|
1
|
+
import { LanguageModelUsage, LanguageModel } from 'ai';
|
|
2
|
+
export { LanguageModelUsage } from 'ai';
|
|
2
3
|
|
|
3
4
|
type Provider = 'openai' | 'anthropic' | 'google';
|
|
4
5
|
interface SolverOptions {
|
|
@@ -17,6 +18,16 @@ interface SolveOptions {
|
|
|
17
18
|
/** Whether to log attempt details (default: true) */
|
|
18
19
|
verbose?: boolean;
|
|
19
20
|
}
|
|
21
|
+
interface SolveResult {
|
|
22
|
+
/** The solved captcha text (majority-voted) */
|
|
23
|
+
text: string;
|
|
24
|
+
/** Per-attempt raw answers (before voting) */
|
|
25
|
+
attempts: string[];
|
|
26
|
+
/** Aggregated token usage across all parallel attempts */
|
|
27
|
+
usage: LanguageModelUsage;
|
|
28
|
+
/** Per-attempt usage breakdown */
|
|
29
|
+
attemptUsages: LanguageModelUsage[];
|
|
30
|
+
}
|
|
20
31
|
declare class Solver {
|
|
21
32
|
private _model;
|
|
22
33
|
private _pendingModel;
|
|
@@ -44,9 +55,9 @@ declare class Solver {
|
|
|
44
55
|
*
|
|
45
56
|
* @param input - File path (string) or raw image Buffer
|
|
46
57
|
* @param options - Solve options (attempts, expected length, etc.)
|
|
47
|
-
* @returns
|
|
58
|
+
* @returns Solved text, per-attempt answers, and token usage
|
|
48
59
|
*/
|
|
49
|
-
solve(input: string | Buffer, options?: SolveOptions): Promise<
|
|
60
|
+
solve(input: string | Buffer, options?: SolveOptions): Promise<SolveResult>;
|
|
50
61
|
/**
|
|
51
62
|
* Make a single API call to read the captcha.
|
|
52
63
|
* Retries up to `maxRetries` times on failure.
|
|
@@ -79,4 +90,4 @@ declare function preprocessCaptchaToBuffer(input: string | Buffer): Promise<Buff
|
|
|
79
90
|
*/
|
|
80
91
|
declare function imageToBase64(imagePath: string): string;
|
|
81
92
|
|
|
82
|
-
export { type Provider, type SolveOptions, Solver, type SolverOptions, imageToBase64, preprocessCaptcha, preprocessCaptchaToBuffer };
|
|
93
|
+
export { type Provider, type SolveOptions, type SolveResult, Solver, type SolverOptions, imageToBase64, preprocessCaptcha, preprocessCaptchaToBuffer };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { LanguageModel } from 'ai';
|
|
1
|
+
import { LanguageModelUsage, LanguageModel } from 'ai';
|
|
2
|
+
export { LanguageModelUsage } from 'ai';
|
|
2
3
|
|
|
3
4
|
type Provider = 'openai' | 'anthropic' | 'google';
|
|
4
5
|
interface SolverOptions {
|
|
@@ -17,6 +18,16 @@ interface SolveOptions {
|
|
|
17
18
|
/** Whether to log attempt details (default: true) */
|
|
18
19
|
verbose?: boolean;
|
|
19
20
|
}
|
|
21
|
+
interface SolveResult {
|
|
22
|
+
/** The solved captcha text (majority-voted) */
|
|
23
|
+
text: string;
|
|
24
|
+
/** Per-attempt raw answers (before voting) */
|
|
25
|
+
attempts: string[];
|
|
26
|
+
/** Aggregated token usage across all parallel attempts */
|
|
27
|
+
usage: LanguageModelUsage;
|
|
28
|
+
/** Per-attempt usage breakdown */
|
|
29
|
+
attemptUsages: LanguageModelUsage[];
|
|
30
|
+
}
|
|
20
31
|
declare class Solver {
|
|
21
32
|
private _model;
|
|
22
33
|
private _pendingModel;
|
|
@@ -44,9 +55,9 @@ declare class Solver {
|
|
|
44
55
|
*
|
|
45
56
|
* @param input - File path (string) or raw image Buffer
|
|
46
57
|
* @param options - Solve options (attempts, expected length, etc.)
|
|
47
|
-
* @returns
|
|
58
|
+
* @returns Solved text, per-attempt answers, and token usage
|
|
48
59
|
*/
|
|
49
|
-
solve(input: string | Buffer, options?: SolveOptions): Promise<
|
|
60
|
+
solve(input: string | Buffer, options?: SolveOptions): Promise<SolveResult>;
|
|
50
61
|
/**
|
|
51
62
|
* Make a single API call to read the captcha.
|
|
52
63
|
* Retries up to `maxRetries` times on failure.
|
|
@@ -79,4 +90,4 @@ declare function preprocessCaptchaToBuffer(input: string | Buffer): Promise<Buff
|
|
|
79
90
|
*/
|
|
80
91
|
declare function imageToBase64(imagePath: string): string;
|
|
81
92
|
|
|
82
|
-
export { type Provider, type SolveOptions, Solver, type SolverOptions, imageToBase64, preprocessCaptcha, preprocessCaptchaToBuffer };
|
|
93
|
+
export { type Provider, type SolveOptions, type SolveResult, Solver, type SolverOptions, imageToBase64, preprocessCaptcha, preprocessCaptchaToBuffer };
|
package/dist/index.js
CHANGED
|
@@ -127,6 +127,55 @@ function majorityVote(attempts, expectedLength) {
|
|
|
127
127
|
}
|
|
128
128
|
return result.join("");
|
|
129
129
|
}
|
|
130
|
+
function sumOptional(a, b) {
|
|
131
|
+
if (a === void 0 && b === void 0) return void 0;
|
|
132
|
+
return (a ?? 0) + (b ?? 0);
|
|
133
|
+
}
|
|
134
|
+
function aggregateUsage(usages) {
|
|
135
|
+
const zero = {
|
|
136
|
+
inputTokens: void 0,
|
|
137
|
+
inputTokenDetails: {
|
|
138
|
+
noCacheTokens: void 0,
|
|
139
|
+
cacheReadTokens: void 0,
|
|
140
|
+
cacheWriteTokens: void 0
|
|
141
|
+
},
|
|
142
|
+
outputTokens: void 0,
|
|
143
|
+
outputTokenDetails: {
|
|
144
|
+
textTokens: void 0,
|
|
145
|
+
reasoningTokens: void 0
|
|
146
|
+
},
|
|
147
|
+
totalTokens: void 0
|
|
148
|
+
};
|
|
149
|
+
return usages.reduce(
|
|
150
|
+
(acc, u) => ({
|
|
151
|
+
inputTokens: sumOptional(acc.inputTokens, u.inputTokens),
|
|
152
|
+
inputTokenDetails: {
|
|
153
|
+
noCacheTokens: sumOptional(
|
|
154
|
+
acc.inputTokenDetails.noCacheTokens,
|
|
155
|
+
u.inputTokenDetails.noCacheTokens
|
|
156
|
+
),
|
|
157
|
+
cacheReadTokens: sumOptional(
|
|
158
|
+
acc.inputTokenDetails.cacheReadTokens,
|
|
159
|
+
u.inputTokenDetails.cacheReadTokens
|
|
160
|
+
),
|
|
161
|
+
cacheWriteTokens: sumOptional(
|
|
162
|
+
acc.inputTokenDetails.cacheWriteTokens,
|
|
163
|
+
u.inputTokenDetails.cacheWriteTokens
|
|
164
|
+
)
|
|
165
|
+
},
|
|
166
|
+
outputTokens: sumOptional(acc.outputTokens, u.outputTokens),
|
|
167
|
+
outputTokenDetails: {
|
|
168
|
+
textTokens: sumOptional(acc.outputTokenDetails.textTokens, u.outputTokenDetails.textTokens),
|
|
169
|
+
reasoningTokens: sumOptional(
|
|
170
|
+
acc.outputTokenDetails.reasoningTokens,
|
|
171
|
+
u.outputTokenDetails.reasoningTokens
|
|
172
|
+
)
|
|
173
|
+
},
|
|
174
|
+
totalTokens: sumOptional(acc.totalTokens, u.totalTokens)
|
|
175
|
+
}),
|
|
176
|
+
zero
|
|
177
|
+
);
|
|
178
|
+
}
|
|
130
179
|
var Solver = class {
|
|
131
180
|
_model = null;
|
|
132
181
|
_pendingModel = null;
|
|
@@ -167,7 +216,7 @@ var Solver = class {
|
|
|
167
216
|
*
|
|
168
217
|
* @param input - File path (string) or raw image Buffer
|
|
169
218
|
* @param options - Solve options (attempts, expected length, etc.)
|
|
170
|
-
* @returns
|
|
219
|
+
* @returns Solved text, per-attempt answers, and token usage
|
|
171
220
|
*/
|
|
172
221
|
async solve(input, options = {}) {
|
|
173
222
|
const { numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;
|
|
@@ -176,15 +225,18 @@ var Solver = class {
|
|
|
176
225
|
const results = await Promise.all(
|
|
177
226
|
Array.from({ length: numAttempts }, () => this.singleAttempt(model, imageBuffer, maxRetries))
|
|
178
227
|
);
|
|
179
|
-
const
|
|
228
|
+
const valid = results.filter((r) => r !== null);
|
|
180
229
|
if (verbose) {
|
|
181
|
-
|
|
230
|
+
valid.forEach((r, i) => console.log(` Attempt ${i + 1}: ${r.text}`));
|
|
182
231
|
}
|
|
232
|
+
const attempts = valid.map((r) => r.text);
|
|
233
|
+
const attemptUsages = valid.map((r) => r.usage);
|
|
234
|
+
const usage = aggregateUsage(attemptUsages);
|
|
183
235
|
if (attempts.length === 0) {
|
|
184
236
|
if (verbose) console.log(" All attempts failed!");
|
|
185
|
-
return "";
|
|
237
|
+
return { text: "", attempts, usage, attemptUsages };
|
|
186
238
|
}
|
|
187
|
-
return majorityVote(attempts, expectedLength);
|
|
239
|
+
return { text: majorityVote(attempts, expectedLength), attempts, usage, attemptUsages };
|
|
188
240
|
}
|
|
189
241
|
/**
|
|
190
242
|
* Make a single API call to read the captcha.
|
|
@@ -193,7 +245,7 @@ var Solver = class {
|
|
|
193
245
|
async singleAttempt(model, imageBuffer, maxRetries) {
|
|
194
246
|
for (let retry = 0; retry <= maxRetries; retry++) {
|
|
195
247
|
try {
|
|
196
|
-
const { text } = await generateText({
|
|
248
|
+
const { text, usage } = await generateText({
|
|
197
249
|
model,
|
|
198
250
|
messages: [
|
|
199
251
|
{
|
|
@@ -213,7 +265,7 @@ var Solver = class {
|
|
|
213
265
|
return null;
|
|
214
266
|
}
|
|
215
267
|
const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, "");
|
|
216
|
-
return cleaned
|
|
268
|
+
return cleaned ? { text: cleaned, usage } : null;
|
|
217
269
|
} catch (_err) {
|
|
218
270
|
if (retry < maxRetries) {
|
|
219
271
|
await new Promise((r) => setTimeout(r, 1e3 * (retry + 1)));
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/solver.ts","../src/preprocess.ts"],"sourcesContent":["import type { LanguageModel } from 'ai';\nimport { generateText } from 'ai';\nimport { preprocessCaptchaToBuffer } from './preprocess.js';\n\nconst PROMPT = `You are an assistant helping a visually impaired person read distorted text from an image.\nThe text contains uppercase letters A-Z and/or digits 0-9.\nA thin vertical stroke is the digit 1. Never read it as the letter I or L.\nA round closed shape is the letter O, not the letter D.\nOutput ONLY the exact characters you read, nothing else.`;\n\n// ── Types ────────────────────────────────────────────────────────────\n\nexport type Provider = 'openai' | 'anthropic' | 'google';\n\nexport interface SolverOptions {\n /** AI provider to use when constructing the model from an API key (default: \"openai\") */\n provider?: Provider;\n /** Model ID passed to the provider (default: \"gpt-4o\") */\n model?: string;\n}\n\nexport interface SolveOptions {\n /** Number of voting attempts (default: 5) */\n numAttempts?: number;\n /** Expected captcha length — results of other lengths are discarded */\n expectedLength?: number;\n /** Max retries per attempt on API failure (default: 2) */\n maxRetries?: number;\n /** Whether to log attempt details (default: true) */\n verbose?: boolean;\n}\n\n// ── Provider resolution ──────────────────────────────────────────────\n\nconst DEFAULT_MODELS: Record<Provider, string> = {\n openai: 'gpt-4o',\n anthropic: 'claude-sonnet-4-20250514',\n google: 'gemini-2.0-flash',\n};\n\nasync function resolveModel(\n apiKey: string,\n provider: Provider,\n modelId: string\n): Promise<LanguageModel> {\n switch (provider) {\n case 'openai': {\n const { createOpenAI } = await import('@ai-sdk/openai');\n return createOpenAI({ apiKey })(modelId);\n }\n case 'anthropic': {\n // @ts-expect-error — optional peer dependency\n const { createAnthropic } = await import('@ai-sdk/anthropic');\n return createAnthropic({ apiKey })(modelId);\n }\n case 'google': {\n // @ts-expect-error — optional peer dependency\n const { createGoogleGenerativeAI } = await import('@ai-sdk/google');\n return createGoogleGenerativeAI({ apiKey })(modelId);\n }\n default:\n throw new Error(\n `Unknown provider \"${provider}\". Install the matching @ai-sdk/* package and pass the model directly.`\n );\n }\n}\n\n// ── Confusion groups ─────────────────────────────────────────────────\n\n/**\n * Characters the model commonly misreads as each other.\n * Each group maps to its canonical (most likely correct) character.\n */\nconst CONFUSION_GROUPS: Record<string, string> = {\n '1': '1',\n I: '1',\n L: '1',\n O: 'O',\n D: 'O',\n '0': 'O',\n S: 'S',\n '5': 'S',\n Z: 'Z',\n '2': 'Z',\n};\n\n// ── Majority voting ──────────────────────────────────────────────────\n\n/**\n * Character-level majority vote across multiple attempts.\n * Uses confusion-aware voting: characters that the model commonly\n * confuses (e.g. 1/I/L, O/D/0) are grouped together during counting.\n */\nfunction majorityVote(attempts: string[], expectedLength?: number): string {\n let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;\n\n if (filtered.length === 0) {\n filtered = attempts;\n }\n if (filtered.length === 0) return '';\n\n // Find most common length\n const lenCounts = new Map<number, number>();\n for (const a of filtered) {\n lenCounts.set(a.length, (lenCounts.get(a.length) ?? 0) + 1);\n }\n let bestLen = 0;\n let bestCount = 0;\n for (const [len, count] of lenCounts) {\n if (count > bestCount) {\n bestLen = len;\n bestCount = count;\n }\n }\n\n const sameLenAttempts = filtered.filter((a) => a.length === bestLen);\n if (sameLenAttempts.length === 0) return filtered[0];\n\n // Vote per character position with confusion-aware grouping\n const result: string[] = [];\n for (let pos = 0; pos < bestLen; pos++) {\n const charCounts = new Map<string, number>();\n for (const a of sameLenAttempts) {\n const ch = a[pos];\n charCounts.set(ch, (charCounts.get(ch) ?? 0) + 1);\n }\n\n const groupCounts = new Map<string, number>();\n for (const [ch, count] of charCounts) {\n const canonical = CONFUSION_GROUPS[ch] ?? ch;\n groupCounts.set(canonical, (groupCounts.get(canonical) ?? 0) + count);\n }\n\n let bestGroup = '';\n let bestGroupCount = 0;\n for (const [canonical, count] of groupCounts) {\n if (count > bestGroupCount) {\n bestGroup = canonical;\n bestGroupCount = count;\n }\n }\n\n result.push(bestGroup);\n }\n\n return result.join('');\n}\n\n// ── Solver class ─────────────────────────────────────────────────────\n\nexport class Solver {\n private _model: LanguageModel | null = null;\n private _pendingModel: Promise<LanguageModel> | null = null;\n\n /**\n * Create a captcha solver.\n *\n * @example\n * // Simple — defaults to OpenAI gpt-4o\n * const solver = new Solver('sk-...');\n *\n * @example\n * // Specify provider and model\n * const solver = new Solver('sk-ant-...', { provider: 'anthropic', model: 'claude-sonnet-4-20250514' });\n *\n * @example\n * // Pass an AI SDK model directly\n * import { createOpenAI } from '@ai-sdk/openai';\n * const openai = createOpenAI({ apiKey: 'sk-...' });\n * const solver = new Solver(openai('gpt-4o'));\n */\n constructor(keyOrModel: string | LanguageModel, options?: SolverOptions) {\n if (typeof keyOrModel === 'string') {\n const provider = options?.provider ?? 'openai';\n const modelId = options?.model ?? DEFAULT_MODELS[provider];\n // Lazily resolve the model on first use\n this._pendingModel = resolveModel(keyOrModel, provider, modelId);\n } else {\n this._model = keyOrModel;\n }\n }\n\n private async getModel(): Promise<LanguageModel> {\n if (this._model) return this._model;\n this._model = await this._pendingModel!;\n this._pendingModel = null;\n return this._model;\n }\n\n /**\n * Solve a captcha image.\n *\n * @param input - File path (string) or raw image Buffer\n * @param options - Solve options (attempts, expected length, etc.)\n * @returns The captcha text\n */\n async solve(input: string | Buffer, options: SolveOptions = {}): Promise<string> {\n const { numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;\n\n const model = await this.getModel();\n const imageBuffer = await preprocessCaptchaToBuffer(input);\n\n // Fire all attempts in parallel for speed\n const results = await Promise.all(\n Array.from({ length: numAttempts }, () => this.singleAttempt(model, imageBuffer, maxRetries))\n );\n const attempts = results.filter((r): r is string => r !== null);\n if (verbose) {\n attempts.forEach((r, i) => console.log(` Attempt ${i + 1}: ${r}`));\n }\n\n if (attempts.length === 0) {\n if (verbose) console.log(' All attempts failed!');\n return '';\n }\n\n return majorityVote(attempts, expectedLength);\n }\n\n /**\n * Make a single API call to read the captcha.\n * Retries up to `maxRetries` times on failure.\n */\n private async singleAttempt(\n model: LanguageModel,\n imageBuffer: Buffer,\n maxRetries: number\n ): Promise<string | null> {\n for (let retry = 0; retry <= maxRetries; retry++) {\n try {\n const { text } = await generateText({\n model,\n messages: [\n {\n role: 'user',\n content: [\n { type: 'text', text: PROMPT },\n { type: 'image', image: imageBuffer },\n ],\n },\n ],\n temperature: 1,\n maxOutputTokens: 256,\n });\n\n const raw = text.trim();\n\n // Detect refusals\n const lower = raw.toLowerCase();\n if (\n lower.includes('sorry') ||\n lower.includes(\"can't help\") ||\n lower.includes('cannot help') ||\n lower.includes('unable to') ||\n lower.includes(\"i can't\") ||\n raw.length > 20\n ) {\n return null;\n }\n\n // Clean: keep only uppercase letters and digits\n const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, '');\n return cleaned || null;\n } catch (_err) {\n if (retry < maxRetries) {\n await new Promise((r) => setTimeout(r, 1000 * (retry + 1)));\n continue;\n }\n return null;\n }\n }\n return null;\n }\n}\n","import fs from 'fs';\nimport path from 'path';\nimport sharp from 'sharp';\n\n/**\n * Preprocess a captcha image using sharp (libvips).\n *\n * Pipeline:\n * 1. Gaussian blur in color space (smooths dither pattern)\n * 2. Grayscale conversion\n * 3. Upscale 4× with Lanczos\n * 4. Contrast boost (3× around image mean) + sharpen\n * 5. Crop decorative borders\n * 6. Add white padding\n *\n * Accepts a file path or a raw image Buffer.\n * Returns a base64-encoded PNG string.\n */\nexport async function preprocessCaptcha(input: string | Buffer): Promise<string> {\n const buf = await preprocessCaptchaToBuffer(input);\n return buf.toString('base64');\n}\n\n/**\n * Same preprocessing pipeline as `preprocessCaptcha`, but returns the\n * resulting PNG as a raw Buffer (useful for AI SDK image content parts).\n */\nexport async function preprocessCaptchaToBuffer(input: string | Buffer): Promise<Buffer> {\n const source = typeof input === 'string' ? path.resolve(input) : input;\n\n // Read original dimensions for crop/resize calculations\n const metadata = await sharp(source).metadata();\n const origW = metadata.width!;\n const origH = metadata.height!;\n\n // Step 1-2: Blur in color space (smooths dither pattern) → greyscale\n // Separate from resize to prevent pipeline reordering\n const smoothed = await sharp(source).blur(1.5).greyscale().toBuffer();\n\n // Step 3: Upscale 4× with Lanczos\n const upscaled = await sharp(smoothed)\n .resize(origW * 4, origH * 4, { kernel: 'lanczos3' })\n .toBuffer();\n\n // Step 4: Contrast 3× around actual image mean + sharpen\n // Matches PIL's ImageEnhance.Contrast: output = factor*input + mean*(1-factor)\n const stats = await sharp(upscaled).stats();\n const mean = stats.channels[0].mean;\n const enhanced = await sharp(upscaled)\n .linear(3.0, mean * (1 - 3.0))\n .sharpen({ sigma: 1.0, m1: 2.0, m2: 1.0 })\n .toBuffer();\n\n // Step 5: Crop decorative borders\n // Remove 10% left/right, 2% top, 40% bottom (keep top 60%)\n // Math.floor matches Python's int() truncation\n const scaledW = origW * 4;\n const scaledH = origH * 4;\n const cropLeft = Math.floor(scaledW * 0.1);\n const cropTop = Math.floor(scaledH * 0.02);\n const cropRight = Math.floor(scaledW * 0.9);\n const cropBottom = Math.floor(scaledH * 0.6);\n const cropW = cropRight - cropLeft;\n const cropH = cropBottom - cropTop;\n\n // Step 5-6: Crop → add white padding → output PNG\n return sharp(enhanced)\n .extract({ left: cropLeft, top: cropTop, width: cropW, height: cropH })\n .extend({\n top: 20,\n bottom: 20,\n left: 30,\n right: 30,\n background: { r: 255, g: 255, b: 255 },\n })\n .png()\n .toBuffer();\n}\n\n/**\n * Read an image file and return its base64-encoded content.\n */\nexport function imageToBase64(imagePath: string): string {\n const buffer = fs.readFileSync(imagePath);\n return buffer.toString('base64');\n}\n"],"mappings":";AACA,SAAS,oBAAoB;;;ACD7B,OAAO,QAAQ;AACf,OAAO,UAAU;AACjB,OAAO,WAAW;AAgBlB,eAAsB,kBAAkB,OAAyC;AAC/E,QAAM,MAAM,MAAM,0BAA0B,KAAK;AACjD,SAAO,IAAI,SAAS,QAAQ;AAC9B;AAMA,eAAsB,0BAA0B,OAAyC;AACvF,QAAM,SAAS,OAAO,UAAU,WAAW,KAAK,QAAQ,KAAK,IAAI;AAGjE,QAAM,WAAW,MAAM,MAAM,MAAM,EAAE,SAAS;AAC9C,QAAM,QAAQ,SAAS;AACvB,QAAM,QAAQ,SAAS;AAIvB,QAAM,WAAW,MAAM,MAAM,MAAM,EAAE,KAAK,GAAG,EAAE,UAAU,EAAE,SAAS;AAGpE,QAAM,WAAW,MAAM,MAAM,QAAQ,EAClC,OAAO,QAAQ,GAAG,QAAQ,GAAG,EAAE,QAAQ,WAAW,CAAC,EACnD,SAAS;AAIZ,QAAM,QAAQ,MAAM,MAAM,QAAQ,EAAE,MAAM;AAC1C,QAAM,OAAO,MAAM,SAAS,CAAC,EAAE;AAC/B,QAAM,WAAW,MAAM,MAAM,QAAQ,EAClC,OAAO,GAAK,QAAQ,IAAI,EAAI,EAC5B,QAAQ,EAAE,OAAO,GAAK,IAAI,GAAK,IAAI,EAAI,CAAC,EACxC,SAAS;AAKZ,QAAM,UAAU,QAAQ;AACxB,QAAM,UAAU,QAAQ;AACxB,QAAM,WAAW,KAAK,MAAM,UAAU,GAAG;AACzC,QAAM,UAAU,KAAK,MAAM,UAAU,IAAI;AACzC,QAAM,YAAY,KAAK,MAAM,UAAU,GAAG;AAC1C,QAAM,aAAa,KAAK,MAAM,UAAU,GAAG;AAC3C,QAAM,QAAQ,YAAY;AAC1B,QAAM,QAAQ,aAAa;AAG3B,SAAO,MAAM,QAAQ,EAClB,QAAQ,EAAE,MAAM,UAAU,KAAK,SAAS,OAAO,OAAO,QAAQ,MAAM,CAAC,EACrE,OAAO;AAAA,IACN,KAAK;AAAA,IACL,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,OAAO;AAAA,IACP,YAAY,EAAE,GAAG,KAAK,GAAG,KAAK,GAAG,IAAI;AAAA,EACvC,CAAC,EACA,IAAI,EACJ,SAAS;AACd;AAKO,SAAS,cAAc,WAA2B;AACvD,QAAM,SAAS,GAAG,aAAa,SAAS;AACxC,SAAO,OAAO,SAAS,QAAQ;AACjC;;;ADjFA,IAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AA8Bf,IAAM,iBAA2C;AAAA,EAC/C,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,QAAQ;AACV;AAEA,eAAe,aACb,QACA,UACA,SACwB;AACxB,UAAQ,UAAU;AAAA,IAChB,KAAK,UAAU;AACb,YAAM,EAAE,aAAa,IAAI,MAAM,OAAO,gBAAgB;AACtD,aAAO,aAAa,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IACzC;AAAA,IACA,KAAK,aAAa;AAEhB,YAAM,EAAE,gBAAgB,IAAI,MAAM,OAAO,mBAAmB;AAC5D,aAAO,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IAC5C;AAAA,IACA,KAAK,UAAU;AAEb,YAAM,EAAE,yBAAyB,IAAI,MAAM,OAAO,gBAAgB;AAClE,aAAO,yBAAyB,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IACrD;AAAA,IACA;AACE,YAAM,IAAI;AAAA,QACR,qBAAqB,QAAQ;AAAA,MAC/B;AAAA,EACJ;AACF;AAQA,IAAM,mBAA2C;AAAA,EAC/C,KAAK;AAAA,EACL,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,KAAK;AAAA,EACL,GAAG;AAAA,EACH,KAAK;AAAA,EACL,GAAG;AAAA,EACH,KAAK;AACP;AASA,SAAS,aAAa,UAAoB,gBAAiC;AACzE,MAAI,WAAW,iBAAiB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,cAAc,IAAI;AAEtF,MAAI,SAAS,WAAW,GAAG;AACzB,eAAW;AAAA,EACb;AACA,MAAI,SAAS,WAAW,EAAG,QAAO;AAGlC,QAAM,YAAY,oBAAI,IAAoB;AAC1C,aAAW,KAAK,UAAU;AACxB,cAAU,IAAI,EAAE,SAAS,UAAU,IAAI,EAAE,MAAM,KAAK,KAAK,CAAC;AAAA,EAC5D;AACA,MAAI,UAAU;AACd,MAAI,YAAY;AAChB,aAAW,CAAC,KAAK,KAAK,KAAK,WAAW;AACpC,QAAI,QAAQ,WAAW;AACrB,gBAAU;AACV,kBAAY;AAAA,IACd;AAAA,EACF;AAEA,QAAM,kBAAkB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO;AACnE,MAAI,gBAAgB,WAAW,EAAG,QAAO,SAAS,CAAC;AAGnD,QAAM,SAAmB,CAAC;AAC1B,WAAS,MAAM,GAAG,MAAM,SAAS,OAAO;AACtC,UAAM,aAAa,oBAAI,IAAoB;AAC3C,eAAW,KAAK,iBAAiB;AAC/B,YAAM,KAAK,EAAE,GAAG;AAChB,iBAAW,IAAI,KAAK,WAAW,IAAI,EAAE,KAAK,KAAK,CAAC;AAAA,IAClD;AAEA,UAAM,cAAc,oBAAI,IAAoB;AAC5C,eAAW,CAAC,IAAI,KAAK,KAAK,YAAY;AACpC,YAAM,YAAY,iBAAiB,EAAE,KAAK;AAC1C,kBAAY,IAAI,YAAY,YAAY,IAAI,SAAS,KAAK,KAAK,KAAK;AAAA,IACtE;AAEA,QAAI,YAAY;AAChB,QAAI,iBAAiB;AACrB,eAAW,CAAC,WAAW,KAAK,KAAK,aAAa;AAC5C,UAAI,QAAQ,gBAAgB;AAC1B,oBAAY;AACZ,yBAAiB;AAAA,MACnB;AAAA,IACF;AAEA,WAAO,KAAK,SAAS;AAAA,EACvB;AAEA,SAAO,OAAO,KAAK,EAAE;AACvB;AAIO,IAAM,SAAN,MAAa;AAAA,EACV,SAA+B;AAAA,EAC/B,gBAA+C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmBvD,YAAY,YAAoC,SAAyB;AACvE,QAAI,OAAO,eAAe,UAAU;AAClC,YAAM,WAAW,SAAS,YAAY;AACtC,YAAM,UAAU,SAAS,SAAS,eAAe,QAAQ;AAEzD,WAAK,gBAAgB,aAAa,YAAY,UAAU,OAAO;AAAA,IACjE,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAc,WAAmC;AAC/C,QAAI,KAAK,OAAQ,QAAO,KAAK;AAC7B,SAAK,SAAS,MAAM,KAAK;AACzB,SAAK,gBAAgB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MAAM,OAAwB,UAAwB,CAAC,GAAoB;AAC/E,UAAM,EAAE,cAAc,GAAG,gBAAgB,aAAa,GAAG,UAAU,KAAK,IAAI;AAE5E,UAAM,QAAQ,MAAM,KAAK,SAAS;AAClC,UAAM,cAAc,MAAM,0BAA0B,KAAK;AAGzD,UAAM,UAAU,MAAM,QAAQ;AAAA,MAC5B,MAAM,KAAK,EAAE,QAAQ,YAAY,GAAG,MAAM,KAAK,cAAc,OAAO,aAAa,UAAU,CAAC;AAAA,IAC9F;AACA,UAAM,WAAW,QAAQ,OAAO,CAAC,MAAmB,MAAM,IAAI;AAC9D,QAAI,SAAS;AACX,eAAS,QAAQ,CAAC,GAAG,MAAM,QAAQ,IAAI,aAAa,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;AAAA,IACpE;AAEA,QAAI,SAAS,WAAW,GAAG;AACzB,UAAI,QAAS,SAAQ,IAAI,wBAAwB;AACjD,aAAO;AAAA,IACT;AAEA,WAAO,aAAa,UAAU,cAAc;AAAA,EAC9C;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAc,cACZ,OACA,aACA,YACwB;AACxB,aAAS,QAAQ,GAAG,SAAS,YAAY,SAAS;AAChD,UAAI;AACF,cAAM,EAAE,KAAK,IAAI,MAAM,aAAa;AAAA,UAClC;AAAA,UACA,UAAU;AAAA,YACR;AAAA,cACE,MAAM;AAAA,cACN,SAAS;AAAA,gBACP,EAAE,MAAM,QAAQ,MAAM,OAAO;AAAA,gBAC7B,EAAE,MAAM,SAAS,OAAO,YAAY;AAAA,cACtC;AAAA,YACF;AAAA,UACF;AAAA,UACA,aAAa;AAAA,UACb,iBAAiB;AAAA,QACnB,CAAC;AAED,cAAM,MAAM,KAAK,KAAK;AAGtB,cAAM,QAAQ,IAAI,YAAY;AAC9B,YACE,MAAM,SAAS,OAAO,KACtB,MAAM,SAAS,YAAY,KAC3B,MAAM,SAAS,aAAa,KAC5B,MAAM,SAAS,WAAW,KAC1B,MAAM,SAAS,SAAS,KACxB,IAAI,SAAS,IACb;AACA,iBAAO;AAAA,QACT;AAGA,cAAM,UAAU,IAAI,YAAY,EAAE,QAAQ,cAAc,EAAE;AAC1D,eAAO,WAAW;AAAA,MACpB,SAAS,MAAM;AACb,YAAI,QAAQ,YAAY;AACtB,gBAAM,IAAI,QAAQ,CAAC,MAAM,WAAW,GAAG,OAAQ,QAAQ,EAAE,CAAC;AAC1D;AAAA,QACF;AACA,eAAO;AAAA,MACT;AAAA,IACF;AACA,WAAO;AAAA,EACT;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/solver.ts","../src/preprocess.ts"],"sourcesContent":["import type { LanguageModel, LanguageModelUsage } from 'ai';\nimport { generateText } from 'ai';\nimport { preprocessCaptchaToBuffer } from './preprocess.js';\n\nconst PROMPT = `You are an assistant helping a visually impaired person read distorted text from an image.\nThe text contains uppercase letters A-Z and/or digits 0-9.\nA thin vertical stroke is the digit 1. Never read it as the letter I or L.\nA round closed shape is the letter O, not the letter D.\nOutput ONLY the exact characters you read, nothing else.`;\n\n// ── Types ────────────────────────────────────────────────────────────\n\nexport type Provider = 'openai' | 'anthropic' | 'google';\n\nexport interface SolverOptions {\n /** AI provider to use when constructing the model from an API key (default: \"openai\") */\n provider?: Provider;\n /** Model ID passed to the provider (default: \"gpt-4o\") */\n model?: string;\n}\n\nexport interface SolveOptions {\n /** Number of voting attempts (default: 5) */\n numAttempts?: number;\n /** Expected captcha length — results of other lengths are discarded */\n expectedLength?: number;\n /** Max retries per attempt on API failure (default: 2) */\n maxRetries?: number;\n /** Whether to log attempt details (default: true) */\n verbose?: boolean;\n}\n\nexport interface SolveResult {\n /** The solved captcha text (majority-voted) */\n text: string;\n /** Per-attempt raw answers (before voting) */\n attempts: string[];\n /** Aggregated token usage across all parallel attempts */\n usage: LanguageModelUsage;\n /** Per-attempt usage breakdown */\n attemptUsages: LanguageModelUsage[];\n}\n\ninterface AttemptResult {\n text: string;\n usage: LanguageModelUsage;\n}\n\n// ── Provider resolution ──────────────────────────────────────────────\n\nconst DEFAULT_MODELS: Record<Provider, string> = {\n openai: 'gpt-4o',\n anthropic: 'claude-sonnet-4-20250514',\n google: 'gemini-2.0-flash',\n};\n\nasync function resolveModel(\n apiKey: string,\n provider: Provider,\n modelId: string\n): Promise<LanguageModel> {\n switch (provider) {\n case 'openai': {\n const { createOpenAI } = await import('@ai-sdk/openai');\n return createOpenAI({ apiKey })(modelId);\n }\n case 'anthropic': {\n // @ts-expect-error — optional peer dependency\n const { createAnthropic } = await import('@ai-sdk/anthropic');\n return createAnthropic({ apiKey })(modelId);\n }\n case 'google': {\n // @ts-expect-error — optional peer dependency\n const { createGoogleGenerativeAI } = await import('@ai-sdk/google');\n return createGoogleGenerativeAI({ apiKey })(modelId);\n }\n default:\n throw new Error(\n `Unknown provider \"${provider}\". Install the matching @ai-sdk/* package and pass the model directly.`\n );\n }\n}\n\n// ── Confusion groups ─────────────────────────────────────────────────\n\n/**\n * Characters the model commonly misreads as each other.\n * Each group maps to its canonical (most likely correct) character.\n */\nconst CONFUSION_GROUPS: Record<string, string> = {\n '1': '1',\n I: '1',\n L: '1',\n O: 'O',\n D: 'O',\n '0': 'O',\n S: 'S',\n '5': 'S',\n Z: 'Z',\n '2': 'Z',\n};\n\n// ── Majority voting ──────────────────────────────────────────────────\n\n/**\n * Character-level majority vote across multiple attempts.\n * Uses confusion-aware voting: characters that the model commonly\n * confuses (e.g. 1/I/L, O/D/0) are grouped together during counting.\n */\nfunction majorityVote(attempts: string[], expectedLength?: number): string {\n let filtered = expectedLength ? attempts.filter((a) => a.length === expectedLength) : attempts;\n\n if (filtered.length === 0) {\n filtered = attempts;\n }\n if (filtered.length === 0) return '';\n\n // Find most common length\n const lenCounts = new Map<number, number>();\n for (const a of filtered) {\n lenCounts.set(a.length, (lenCounts.get(a.length) ?? 0) + 1);\n }\n let bestLen = 0;\n let bestCount = 0;\n for (const [len, count] of lenCounts) {\n if (count > bestCount) {\n bestLen = len;\n bestCount = count;\n }\n }\n\n const sameLenAttempts = filtered.filter((a) => a.length === bestLen);\n if (sameLenAttempts.length === 0) return filtered[0];\n\n // Vote per character position with confusion-aware grouping\n const result: string[] = [];\n for (let pos = 0; pos < bestLen; pos++) {\n const charCounts = new Map<string, number>();\n for (const a of sameLenAttempts) {\n const ch = a[pos];\n charCounts.set(ch, (charCounts.get(ch) ?? 0) + 1);\n }\n\n const groupCounts = new Map<string, number>();\n for (const [ch, count] of charCounts) {\n const canonical = CONFUSION_GROUPS[ch] ?? ch;\n groupCounts.set(canonical, (groupCounts.get(canonical) ?? 0) + count);\n }\n\n let bestGroup = '';\n let bestGroupCount = 0;\n for (const [canonical, count] of groupCounts) {\n if (count > bestGroupCount) {\n bestGroup = canonical;\n bestGroupCount = count;\n }\n }\n\n result.push(bestGroup);\n }\n\n return result.join('');\n}\n\n// ── Usage aggregation ────────────────────────────────────────────────\n\nfunction sumOptional(a: number | undefined, b: number | undefined): number | undefined {\n if (a === undefined && b === undefined) return undefined;\n return (a ?? 0) + (b ?? 0);\n}\n\nfunction aggregateUsage(usages: LanguageModelUsage[]): LanguageModelUsage {\n const zero: LanguageModelUsage = {\n inputTokens: undefined,\n inputTokenDetails: {\n noCacheTokens: undefined,\n cacheReadTokens: undefined,\n cacheWriteTokens: undefined,\n },\n outputTokens: undefined,\n outputTokenDetails: {\n textTokens: undefined,\n reasoningTokens: undefined,\n },\n totalTokens: undefined,\n };\n return usages.reduce<LanguageModelUsage>(\n (acc, u) => ({\n inputTokens: sumOptional(acc.inputTokens, u.inputTokens),\n inputTokenDetails: {\n noCacheTokens: sumOptional(\n acc.inputTokenDetails.noCacheTokens,\n u.inputTokenDetails.noCacheTokens\n ),\n cacheReadTokens: sumOptional(\n acc.inputTokenDetails.cacheReadTokens,\n u.inputTokenDetails.cacheReadTokens\n ),\n cacheWriteTokens: sumOptional(\n acc.inputTokenDetails.cacheWriteTokens,\n u.inputTokenDetails.cacheWriteTokens\n ),\n },\n outputTokens: sumOptional(acc.outputTokens, u.outputTokens),\n outputTokenDetails: {\n textTokens: sumOptional(acc.outputTokenDetails.textTokens, u.outputTokenDetails.textTokens),\n reasoningTokens: sumOptional(\n acc.outputTokenDetails.reasoningTokens,\n u.outputTokenDetails.reasoningTokens\n ),\n },\n totalTokens: sumOptional(acc.totalTokens, u.totalTokens),\n }),\n zero\n );\n}\n\n// ── Solver class ─────────────────────────────────────────────────────\n\nexport class Solver {\n private _model: LanguageModel | null = null;\n private _pendingModel: Promise<LanguageModel> | null = null;\n\n /**\n * Create a captcha solver.\n *\n * @example\n * // Simple — defaults to OpenAI gpt-4o\n * const solver = new Solver('sk-...');\n *\n * @example\n * // Specify provider and model\n * const solver = new Solver('sk-ant-...', { provider: 'anthropic', model: 'claude-sonnet-4-20250514' });\n *\n * @example\n * // Pass an AI SDK model directly\n * import { createOpenAI } from '@ai-sdk/openai';\n * const openai = createOpenAI({ apiKey: 'sk-...' });\n * const solver = new Solver(openai('gpt-4o'));\n */\n constructor(keyOrModel: string | LanguageModel, options?: SolverOptions) {\n if (typeof keyOrModel === 'string') {\n const provider = options?.provider ?? 'openai';\n const modelId = options?.model ?? DEFAULT_MODELS[provider];\n // Lazily resolve the model on first use\n this._pendingModel = resolveModel(keyOrModel, provider, modelId);\n } else {\n this._model = keyOrModel;\n }\n }\n\n private async getModel(): Promise<LanguageModel> {\n if (this._model) return this._model;\n this._model = await this._pendingModel!;\n this._pendingModel = null;\n return this._model;\n }\n\n /**\n * Solve a captcha image.\n *\n * @param input - File path (string) or raw image Buffer\n * @param options - Solve options (attempts, expected length, etc.)\n * @returns Solved text, per-attempt answers, and token usage\n */\n async solve(input: string | Buffer, options: SolveOptions = {}): Promise<SolveResult> {\n const { numAttempts = 5, expectedLength, maxRetries = 2, verbose = true } = options;\n\n const model = await this.getModel();\n const imageBuffer = await preprocessCaptchaToBuffer(input);\n\n // Fire all attempts in parallel for speed\n const results = await Promise.all(\n Array.from({ length: numAttempts }, () => this.singleAttempt(model, imageBuffer, maxRetries))\n );\n const valid = results.filter((r): r is AttemptResult => r !== null);\n if (verbose) {\n valid.forEach((r, i) => console.log(` Attempt ${i + 1}: ${r.text}`));\n }\n\n const attempts = valid.map((r) => r.text);\n const attemptUsages = valid.map((r) => r.usage);\n const usage = aggregateUsage(attemptUsages);\n\n if (attempts.length === 0) {\n if (verbose) console.log(' All attempts failed!');\n return { text: '', attempts, usage, attemptUsages };\n }\n\n return { text: majorityVote(attempts, expectedLength), attempts, usage, attemptUsages };\n }\n\n /**\n * Make a single API call to read the captcha.\n * Retries up to `maxRetries` times on failure.\n */\n private async singleAttempt(\n model: LanguageModel,\n imageBuffer: Buffer,\n maxRetries: number\n ): Promise<AttemptResult | null> {\n for (let retry = 0; retry <= maxRetries; retry++) {\n try {\n const { text, usage } = await generateText({\n model,\n messages: [\n {\n role: 'user',\n content: [\n { type: 'text', text: PROMPT },\n { type: 'image', image: imageBuffer },\n ],\n },\n ],\n temperature: 1,\n maxOutputTokens: 256,\n });\n\n const raw = text.trim();\n\n // Detect refusals\n const lower = raw.toLowerCase();\n if (\n lower.includes('sorry') ||\n lower.includes(\"can't help\") ||\n lower.includes('cannot help') ||\n lower.includes('unable to') ||\n lower.includes(\"i can't\") ||\n raw.length > 20\n ) {\n return null;\n }\n\n // Clean: keep only uppercase letters and digits\n const cleaned = raw.toUpperCase().replace(/[^A-Z0-9]/g, '');\n return cleaned ? { text: cleaned, usage } : null;\n } catch (_err) {\n if (retry < maxRetries) {\n await new Promise((r) => setTimeout(r, 1000 * (retry + 1)));\n continue;\n }\n return null;\n }\n }\n return null;\n }\n}\n","import fs from 'fs';\nimport path from 'path';\nimport sharp from 'sharp';\n\n/**\n * Preprocess a captcha image using sharp (libvips).\n *\n * Pipeline:\n * 1. Gaussian blur in color space (smooths dither pattern)\n * 2. Grayscale conversion\n * 3. Upscale 4× with Lanczos\n * 4. Contrast boost (3× around image mean) + sharpen\n * 5. Crop decorative borders\n * 6. Add white padding\n *\n * Accepts a file path or a raw image Buffer.\n * Returns a base64-encoded PNG string.\n */\nexport async function preprocessCaptcha(input: string | Buffer): Promise<string> {\n const buf = await preprocessCaptchaToBuffer(input);\n return buf.toString('base64');\n}\n\n/**\n * Same preprocessing pipeline as `preprocessCaptcha`, but returns the\n * resulting PNG as a raw Buffer (useful for AI SDK image content parts).\n */\nexport async function preprocessCaptchaToBuffer(input: string | Buffer): Promise<Buffer> {\n const source = typeof input === 'string' ? path.resolve(input) : input;\n\n // Read original dimensions for crop/resize calculations\n const metadata = await sharp(source).metadata();\n const origW = metadata.width!;\n const origH = metadata.height!;\n\n // Step 1-2: Blur in color space (smooths dither pattern) → greyscale\n // Separate from resize to prevent pipeline reordering\n const smoothed = await sharp(source).blur(1.5).greyscale().toBuffer();\n\n // Step 3: Upscale 4× with Lanczos\n const upscaled = await sharp(smoothed)\n .resize(origW * 4, origH * 4, { kernel: 'lanczos3' })\n .toBuffer();\n\n // Step 4: Contrast 3× around actual image mean + sharpen\n // Matches PIL's ImageEnhance.Contrast: output = factor*input + mean*(1-factor)\n const stats = await sharp(upscaled).stats();\n const mean = stats.channels[0].mean;\n const enhanced = await sharp(upscaled)\n .linear(3.0, mean * (1 - 3.0))\n .sharpen({ sigma: 1.0, m1: 2.0, m2: 1.0 })\n .toBuffer();\n\n // Step 5: Crop decorative borders\n // Remove 10% left/right, 2% top, 40% bottom (keep top 60%)\n // Math.floor matches Python's int() truncation\n const scaledW = origW * 4;\n const scaledH = origH * 4;\n const cropLeft = Math.floor(scaledW * 0.1);\n const cropTop = Math.floor(scaledH * 0.02);\n const cropRight = Math.floor(scaledW * 0.9);\n const cropBottom = Math.floor(scaledH * 0.6);\n const cropW = cropRight - cropLeft;\n const cropH = cropBottom - cropTop;\n\n // Step 5-6: Crop → add white padding → output PNG\n return sharp(enhanced)\n .extract({ left: cropLeft, top: cropTop, width: cropW, height: cropH })\n .extend({\n top: 20,\n bottom: 20,\n left: 30,\n right: 30,\n background: { r: 255, g: 255, b: 255 },\n })\n .png()\n .toBuffer();\n}\n\n/**\n * Read an image file and return its base64-encoded content.\n */\nexport function imageToBase64(imagePath: string): string {\n const buffer = fs.readFileSync(imagePath);\n return buffer.toString('base64');\n}\n"],"mappings":";AACA,SAAS,oBAAoB;;;ACD7B,OAAO,QAAQ;AACf,OAAO,UAAU;AACjB,OAAO,WAAW;AAgBlB,eAAsB,kBAAkB,OAAyC;AAC/E,QAAM,MAAM,MAAM,0BAA0B,KAAK;AACjD,SAAO,IAAI,SAAS,QAAQ;AAC9B;AAMA,eAAsB,0BAA0B,OAAyC;AACvF,QAAM,SAAS,OAAO,UAAU,WAAW,KAAK,QAAQ,KAAK,IAAI;AAGjE,QAAM,WAAW,MAAM,MAAM,MAAM,EAAE,SAAS;AAC9C,QAAM,QAAQ,SAAS;AACvB,QAAM,QAAQ,SAAS;AAIvB,QAAM,WAAW,MAAM,MAAM,MAAM,EAAE,KAAK,GAAG,EAAE,UAAU,EAAE,SAAS;AAGpE,QAAM,WAAW,MAAM,MAAM,QAAQ,EAClC,OAAO,QAAQ,GAAG,QAAQ,GAAG,EAAE,QAAQ,WAAW,CAAC,EACnD,SAAS;AAIZ,QAAM,QAAQ,MAAM,MAAM,QAAQ,EAAE,MAAM;AAC1C,QAAM,OAAO,MAAM,SAAS,CAAC,EAAE;AAC/B,QAAM,WAAW,MAAM,MAAM,QAAQ,EAClC,OAAO,GAAK,QAAQ,IAAI,EAAI,EAC5B,QAAQ,EAAE,OAAO,GAAK,IAAI,GAAK,IAAI,EAAI,CAAC,EACxC,SAAS;AAKZ,QAAM,UAAU,QAAQ;AACxB,QAAM,UAAU,QAAQ;AACxB,QAAM,WAAW,KAAK,MAAM,UAAU,GAAG;AACzC,QAAM,UAAU,KAAK,MAAM,UAAU,IAAI;AACzC,QAAM,YAAY,KAAK,MAAM,UAAU,GAAG;AAC1C,QAAM,aAAa,KAAK,MAAM,UAAU,GAAG;AAC3C,QAAM,QAAQ,YAAY;AAC1B,QAAM,QAAQ,aAAa;AAG3B,SAAO,MAAM,QAAQ,EAClB,QAAQ,EAAE,MAAM,UAAU,KAAK,SAAS,OAAO,OAAO,QAAQ,MAAM,CAAC,EACrE,OAAO;AAAA,IACN,KAAK;AAAA,IACL,QAAQ;AAAA,IACR,MAAM;AAAA,IACN,OAAO;AAAA,IACP,YAAY,EAAE,GAAG,KAAK,GAAG,KAAK,GAAG,IAAI;AAAA,EACvC,CAAC,EACA,IAAI,EACJ,SAAS;AACd;AAKO,SAAS,cAAc,WAA2B;AACvD,QAAM,SAAS,GAAG,aAAa,SAAS;AACxC,SAAO,OAAO,SAAS,QAAQ;AACjC;;;ADjFA,IAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AA8Cf,IAAM,iBAA2C;AAAA,EAC/C,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,QAAQ;AACV;AAEA,eAAe,aACb,QACA,UACA,SACwB;AACxB,UAAQ,UAAU;AAAA,IAChB,KAAK,UAAU;AACb,YAAM,EAAE,aAAa,IAAI,MAAM,OAAO,gBAAgB;AACtD,aAAO,aAAa,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IACzC;AAAA,IACA,KAAK,aAAa;AAEhB,YAAM,EAAE,gBAAgB,IAAI,MAAM,OAAO,mBAAmB;AAC5D,aAAO,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IAC5C;AAAA,IACA,KAAK,UAAU;AAEb,YAAM,EAAE,yBAAyB,IAAI,MAAM,OAAO,gBAAgB;AAClE,aAAO,yBAAyB,EAAE,OAAO,CAAC,EAAE,OAAO;AAAA,IACrD;AAAA,IACA;AACE,YAAM,IAAI;AAAA,QACR,qBAAqB,QAAQ;AAAA,MAC/B;AAAA,EACJ;AACF;AAQA,IAAM,mBAA2C;AAAA,EAC/C,KAAK;AAAA,EACL,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,KAAK;AAAA,EACL,GAAG;AAAA,EACH,KAAK;AAAA,EACL,GAAG;AAAA,EACH,KAAK;AACP;AASA,SAAS,aAAa,UAAoB,gBAAiC;AACzE,MAAI,WAAW,iBAAiB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,cAAc,IAAI;AAEtF,MAAI,SAAS,WAAW,GAAG;AACzB,eAAW;AAAA,EACb;AACA,MAAI,SAAS,WAAW,EAAG,QAAO;AAGlC,QAAM,YAAY,oBAAI,IAAoB;AAC1C,aAAW,KAAK,UAAU;AACxB,cAAU,IAAI,EAAE,SAAS,UAAU,IAAI,EAAE,MAAM,KAAK,KAAK,CAAC;AAAA,EAC5D;AACA,MAAI,UAAU;AACd,MAAI,YAAY;AAChB,aAAW,CAAC,KAAK,KAAK,KAAK,WAAW;AACpC,QAAI,QAAQ,WAAW;AACrB,gBAAU;AACV,kBAAY;AAAA,IACd;AAAA,EACF;AAEA,QAAM,kBAAkB,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO;AACnE,MAAI,gBAAgB,WAAW,EAAG,QAAO,SAAS,CAAC;AAGnD,QAAM,SAAmB,CAAC;AAC1B,WAAS,MAAM,GAAG,MAAM,SAAS,OAAO;AACtC,UAAM,aAAa,oBAAI,IAAoB;AAC3C,eAAW,KAAK,iBAAiB;AAC/B,YAAM,KAAK,EAAE,GAAG;AAChB,iBAAW,IAAI,KAAK,WAAW,IAAI,EAAE,KAAK,KAAK,CAAC;AAAA,IAClD;AAEA,UAAM,cAAc,oBAAI,IAAoB;AAC5C,eAAW,CAAC,IAAI,KAAK,KAAK,YAAY;AACpC,YAAM,YAAY,iBAAiB,EAAE,KAAK;AAC1C,kBAAY,IAAI,YAAY,YAAY,IAAI,SAAS,KAAK,KAAK,KAAK;AAAA,IACtE;AAEA,QAAI,YAAY;AAChB,QAAI,iBAAiB;AACrB,eAAW,CAAC,WAAW,KAAK,KAAK,aAAa;AAC5C,UAAI,QAAQ,gBAAgB;AAC1B,oBAAY;AACZ,yBAAiB;AAAA,MACnB;AAAA,IACF;AAEA,WAAO,KAAK,SAAS;AAAA,EACvB;AAEA,SAAO,OAAO,KAAK,EAAE;AACvB;AAIA,SAAS,YAAY,GAAuB,GAA2C;AACrF,MAAI,MAAM,UAAa,MAAM,OAAW,QAAO;AAC/C,UAAQ,KAAK,MAAM,KAAK;AAC1B;AAEA,SAAS,eAAe,QAAkD;AACxE,QAAM,OAA2B;AAAA,IAC/B,aAAa;AAAA,IACb,mBAAmB;AAAA,MACjB,eAAe;AAAA,MACf,iBAAiB;AAAA,MACjB,kBAAkB;AAAA,IACpB;AAAA,IACA,cAAc;AAAA,IACd,oBAAoB;AAAA,MAClB,YAAY;AAAA,MACZ,iBAAiB;AAAA,IACnB;AAAA,IACA,aAAa;AAAA,EACf;AACA,SAAO,OAAO;AAAA,IACZ,CAAC,KAAK,OAAO;AAAA,MACX,aAAa,YAAY,IAAI,aAAa,EAAE,WAAW;AAAA,MACvD,mBAAmB;AAAA,QACjB,eAAe;AAAA,UACb,IAAI,kBAAkB;AAAA,UACtB,EAAE,kBAAkB;AAAA,QACtB;AAAA,QACA,iBAAiB;AAAA,UACf,IAAI,kBAAkB;AAAA,UACtB,EAAE,kBAAkB;AAAA,QACtB;AAAA,QACA,kBAAkB;AAAA,UAChB,IAAI,kBAAkB;AAAA,UACtB,EAAE,kBAAkB;AAAA,QACtB;AAAA,MACF;AAAA,MACA,cAAc,YAAY,IAAI,cAAc,EAAE,YAAY;AAAA,MAC1D,oBAAoB;AAAA,QAClB,YAAY,YAAY,IAAI,mBAAmB,YAAY,EAAE,mBAAmB,UAAU;AAAA,QAC1F,iBAAiB;AAAA,UACf,IAAI,mBAAmB;AAAA,UACvB,EAAE,mBAAmB;AAAA,QACvB;AAAA,MACF;AAAA,MACA,aAAa,YAAY,IAAI,aAAa,EAAE,WAAW;AAAA,IACzD;AAAA,IACA;AAAA,EACF;AACF;AAIO,IAAM,SAAN,MAAa;AAAA,EACV,SAA+B;AAAA,EAC/B,gBAA+C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmBvD,YAAY,YAAoC,SAAyB;AACvE,QAAI,OAAO,eAAe,UAAU;AAClC,YAAM,WAAW,SAAS,YAAY;AACtC,YAAM,UAAU,SAAS,SAAS,eAAe,QAAQ;AAEzD,WAAK,gBAAgB,aAAa,YAAY,UAAU,OAAO;AAAA,IACjE,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAc,WAAmC;AAC/C,QAAI,KAAK,OAAQ,QAAO,KAAK;AAC7B,SAAK,SAAS,MAAM,KAAK;AACzB,SAAK,gBAAgB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MAAM,OAAwB,UAAwB,CAAC,GAAyB;AACpF,UAAM,EAAE,cAAc,GAAG,gBAAgB,aAAa,GAAG,UAAU,KAAK,IAAI;AAE5E,UAAM,QAAQ,MAAM,KAAK,SAAS;AAClC,UAAM,cAAc,MAAM,0BAA0B,KAAK;AAGzD,UAAM,UAAU,MAAM,QAAQ;AAAA,MAC5B,MAAM,KAAK,EAAE,QAAQ,YAAY,GAAG,MAAM,KAAK,cAAc,OAAO,aAAa,UAAU,CAAC;AAAA,IAC9F;AACA,UAAM,QAAQ,QAAQ,OAAO,CAAC,MAA0B,MAAM,IAAI;AAClE,QAAI,SAAS;AACX,YAAM,QAAQ,CAAC,GAAG,MAAM,QAAQ,IAAI,aAAa,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC;AAAA,IACtE;AAEA,UAAM,WAAW,MAAM,IAAI,CAAC,MAAM,EAAE,IAAI;AACxC,UAAM,gBAAgB,MAAM,IAAI,CAAC,MAAM,EAAE,KAAK;AAC9C,UAAM,QAAQ,eAAe,aAAa;AAE1C,QAAI,SAAS,WAAW,GAAG;AACzB,UAAI,QAAS,SAAQ,IAAI,wBAAwB;AACjD,aAAO,EAAE,MAAM,IAAI,UAAU,OAAO,cAAc;AAAA,IACpD;AAEA,WAAO,EAAE,MAAM,aAAa,UAAU,cAAc,GAAG,UAAU,OAAO,cAAc;AAAA,EACxF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAc,cACZ,OACA,aACA,YAC+B;AAC/B,aAAS,QAAQ,GAAG,SAAS,YAAY,SAAS;AAChD,UAAI;AACF,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,aAAa;AAAA,UACzC;AAAA,UACA,UAAU;AAAA,YACR;AAAA,cACE,MAAM;AAAA,cACN,SAAS;AAAA,gBACP,EAAE,MAAM,QAAQ,MAAM,OAAO;AAAA,gBAC7B,EAAE,MAAM,SAAS,OAAO,YAAY;AAAA,cACtC;AAAA,YACF;AAAA,UACF;AAAA,UACA,aAAa;AAAA,UACb,iBAAiB;AAAA,QACnB,CAAC;AAED,cAAM,MAAM,KAAK,KAAK;AAGtB,cAAM,QAAQ,IAAI,YAAY;AAC9B,YACE,MAAM,SAAS,OAAO,KACtB,MAAM,SAAS,YAAY,KAC3B,MAAM,SAAS,aAAa,KAC5B,MAAM,SAAS,WAAW,KAC1B,MAAM,SAAS,SAAS,KACxB,IAAI,SAAS,IACb;AACA,iBAAO;AAAA,QACT;AAGA,cAAM,UAAU,IAAI,YAAY,EAAE,QAAQ,cAAc,EAAE;AAC1D,eAAO,UAAU,EAAE,MAAM,SAAS,MAAM,IAAI;AAAA,MAC9C,SAAS,MAAM;AACb,YAAI,QAAQ,YAAY;AACtB,gBAAM,IAAI,QAAQ,CAAC,MAAM,WAAW,GAAG,OAAQ,QAAQ,EAAE,CAAC;AAC1D;AAAA,QACF;AACA,eAAO;AAAA,MACT;AAAA,IACF;AACA,WAAO;AAAA,EACT;AACF;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yigitahmetsahin/captcha-solver",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.1",
|
|
4
4
|
"description": "AI-powered captcha solver using image preprocessing and multi-provider vision models (Vercel AI SDK)",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|