@painitehq/structured-llm 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,75 +1,161 @@
1
- # React + TypeScript + Vite
1
+ # @painitehq/structured-llm
2
2
 
3
- This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
3
+ Force LLM output into structured, type-safe JSON. Stop your app from crashing on malformed AI responses.
4
4
 
5
- Currently, two official plugins are available:
5
+ ## Install
6
6
 
7
- - [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Oxc](https://oxc.rs)
8
- - [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/)
7
+ ```bash
8
+ npm install @painitehq/structured-llm
9
+ # or
10
+ bun add @painitehq/structured-llm
11
+ ```
9
12
 
10
- ## React Compiler
13
+ ## Quick Start
11
14
 
12
- The React Compiler is enabled on this template. See [this documentation](https://react.dev/learn/react-compiler) for more information.
15
+ ```ts
16
+ import { extract, defineSchema } from "@painitehq/structured-llm";
13
17
 
14
- Note: This will impact Vite dev & build performances.
18
+ const schema = defineSchema("invoice", {
19
+ invoiceNumber: { type: "string" },
20
+ totalAmount: { type: "number" },
21
+ vendor: { type: "string" },
22
+ items: { type: "array", items: { type: "string" } },
23
+ });
15
24
 
16
- ## Expanding the ESLint configuration
25
+ const result = await extract(messyText, schema, {
26
+ apiKey: "your-openrouter-api-key",
27
+ });
17
28
 
18
- If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules:
29
+ console.log(result.data);
30
+ // { invoiceNumber: "INV-2024-042", totalAmount: 500, vendor: "Acme Corp", items: [...] }
19
31
 
20
- ```js
21
- export default defineConfig([
22
- globalIgnores(['dist']),
23
- {
24
- files: ['**/*.{ts,tsx}'],
25
- extends: [
26
- // Other configs...
32
+ console.log(result.confidence); // 100
33
+ console.log(result.attempts); // 1
34
+ ```
27
35
 
28
- // Remove tseslint.configs.recommended and replace with this
29
- tseslint.configs.recommendedTypeChecked,
30
- // Alternatively, use this for stricter rules
31
- tseslint.configs.strictTypeChecked,
32
- // Optionally, add this for stylistic rules
33
- tseslint.configs.stylisticTypeChecked,
36
+ ## What It Does
34
37
 
35
- // Other configs...
36
- ],
37
- languageOptions: {
38
- parserOptions: {
39
- project: ['./tsconfig.node.json', './tsconfig.app.json'],
40
- tsconfigRootDir: import.meta.dirname,
41
- },
42
- // other options...
43
- },
44
- },
45
- ])
38
+ LLMs return unstructured text. Sometimes it's valid JSON. Sometimes it's wrapped in markdown. Sometimes it's completely broken. This SDK:
39
+
40
+ 1. **Forces** the model to output valid JSON via strict prompt engineering
41
+ 2. **Repairs** malformed JSON (trailing commas, missing brackets, broken quotes)
42
+ 3. **Unwraps** named wrappers like `{"invoice": {...}}` → `{...}`
43
+ 4. **Validates** the output against your schema
44
+ 5. **Coerces** wrong types (`"42"` → `42`, `"true"` → `true`)
45
+ 6. **Fills** missing fields with sensible defaults
46
+ 7. **Retries** with escalating instructions if the model fails
47
+
48
+ ## Features
49
+
50
+ ### Forced Structured Output
51
+
52
+ The SDK doesn't ask the model to "give JSON". It forces it:
53
+
54
+ ```
55
+ CRITICAL RULES:
56
+ - Output ONLY valid JSON. No text before or after.
57
+ - No markdown. No code blocks. No explanations.
58
+ - Every field MUST be present with the correct type.
46
59
  ```
47
60
 
48
- You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules:
49
-
50
- ```js
51
- // eslint.config.js
52
- import reactX from 'eslint-plugin-react-x'
53
- import reactDom from 'eslint-plugin-react-dom'
54
-
55
- export default defineConfig([
56
- globalIgnores(['dist']),
57
- {
58
- files: ['**/*.{ts,tsx}'],
59
- extends: [
60
- // Other configs...
61
- // Enable lint rules for React
62
- reactX.configs['recommended-typescript'],
63
- // Enable lint rules for React DOM
64
- reactDom.configs.recommended,
65
- ],
66
- languageOptions: {
67
- parserOptions: {
68
- project: ['./tsconfig.node.json', './tsconfig.app.json'],
69
- tsconfigRootDir: import.meta.dirname,
70
- },
71
- // other options...
61
+ ### Escalating Retries
62
+
63
+ If the model fails, the SDK retries with increasingly strict instructions:
64
+
65
+ - **Attempt 1**: Clean forced prompt
66
+ - **Attempt 2**: "Your response was invalid. Here's the error. Fix it."
67
+ - **Attempt 3**: "FINAL ATTEMPT. THIS IS YOUR LAST CHANCE."
68
+
69
+ ### Post-Validation Repair
70
+
71
+ Even if the JSON parses, the SDK fixes type mismatches:
72
+
73
+ | Model returns | Schema expects | SDK does |
74
+ |---------------|----------------|----------|
75
+ | `"42"` | `number` | coerces to `42` |
76
+ | `"true"` | `boolean` | coerces to `true` |
77
+ | `42` | `string` | coerces to `"42"` |
78
+ | missing field | any type | fills with default |
79
+
80
+ ### Confidence Scoring
81
+
82
+ Every extraction returns a confidence score (0-100):
83
+
84
+ ```ts
85
+ const result = await extract(text, schema, { apiKey });
86
+
87
+ result.confidence; // 85
88
+ result.repairLog; // [{ type: "type_coercion", detail: "Coerced \"price\" to number" }]
89
+ result.attempts; // 2
90
+ ```
91
+
92
+ Confidence deductions:
93
+ - JSON not valid first try: -15
94
+ - Each retry: -10
95
+ - Type coercion per field: -5
96
+ - Missing field filled: -8
97
+
98
+ ## Schema Definition
99
+
100
+ ```ts
101
+ import { defineSchema } from "@painitehq/structured-llm";
102
+
103
+ const schema = defineSchema("person", {
104
+ name: { type: "string", description: "Full name" },
105
+ age: { type: "number" },
106
+ isStudent: { type: "boolean" },
107
+ hobbies: { type: "array", items: { type: "string" } },
108
+ address: {
109
+ type: "object",
110
+ properties: {
111
+ city: { type: "string" },
112
+ zip: { type: "string" },
72
113
  },
73
114
  },
74
- ])
115
+ });
116
+ ```
117
+
118
+ Supported types: `string`, `number`, `boolean`, `array`, `object`
119
+
120
+ ## API
121
+
122
+ ### `extract<T>(input, schema, options)`
123
+
124
+ Returns `Promise<ExtractionResult<T>>`:
125
+
126
+ ```ts
127
+ interface ExtractionResult<T> {
128
+ data: T; // typed structured data
129
+ raw: string; // raw LLM response
130
+ model: string; // model used
131
+ confidence: number; // 0-100 score
132
+ repairLog: RepairAction[]; // what was fixed
133
+ attempts: number; // how many tries
134
+ usage?: {
135
+ promptTokens: number;
136
+ completionTokens: number;
137
+ totalTokens: number;
138
+ };
139
+ }
140
+ ```
141
+
142
+ ### Options
143
+
144
+ ```ts
145
+ {
146
+ apiKey?: string; // OpenRouter API key (or set OPENROUTER_API_KEY env var)
147
+ model?: string; // model to use (default: "openrouter/free")
148
+ temperature?: number; // 0-1 (default: 0)
149
+ maxRetries?: number; // max retry attempts (default: 3)
150
+ timeout?: number; // request timeout in ms (default: 60000)
151
+ }
75
152
  ```
153
+
154
+ ## Requirements
155
+
156
+ - OpenRouter API key (get one at https://openrouter.ai)
157
+ - Any runtime: Node.js, Bun, Deno, browsers
158
+
159
+ ## License
160
+
161
+ MIT
package/dist/index.cjs CHANGED
@@ -111,12 +111,6 @@ function repairJSON(raw) {
111
111
  candidate = candidate.replace(/,\s*([}\]])/g, "$1");
112
112
  candidate = candidate.replace(/'/g, '"');
113
113
  candidate = candidate.replace(/(\w+)\s*:/g, '"$1":');
114
- candidate = candidate.replace(/:\s*"([^"]*?)"/g, (match, content) => {
115
- if (content.includes('"')) {
116
- return match;
117
- }
118
- return match;
119
- });
120
114
  if (!candidate.startsWith("{") && !candidate.startsWith("[")) {
121
115
  const firstBrace = candidate.indexOf("{");
122
116
  const firstBracket = candidate.indexOf("[");
@@ -141,6 +135,128 @@ function repairJSON(raw) {
141
135
  }
142
136
  return null;
143
137
  }
138
+ function coerceType(value, targetType) {
139
+ if (value === void 0 || value === null) {
140
+ return { value, coerced: false };
141
+ }
142
+ switch (targetType) {
143
+ case "string":
144
+ if (typeof value === "string") return { value, coerced: false };
145
+ return { value: String(value), coerced: true };
146
+ case "number":
147
+ if (typeof value === "number") return { value, coerced: false };
148
+ if (typeof value === "string") {
149
+ const num = Number(value);
150
+ if (!isNaN(num)) return { value: num, coerced: true };
151
+ }
152
+ return { value, coerced: false };
153
+ case "boolean":
154
+ if (typeof value === "boolean") return { value, coerced: false };
155
+ if (typeof value === "string") {
156
+ const lower = value.toLowerCase();
157
+ if (lower === "true" || lower === "yes" || lower === "1") return { value: true, coerced: true };
158
+ if (lower === "false" || lower === "no" || lower === "0") return { value: false, coerced: true };
159
+ }
160
+ if (typeof value === "number") {
161
+ return { value: value !== 0, coerced: true };
162
+ }
163
+ return { value, coerced: false };
164
+ case "array":
165
+ if (Array.isArray(value)) return { value, coerced: false };
166
+ return { value: [value], coerced: true };
167
+ default:
168
+ return { value, coerced: false };
169
+ }
170
+ }
171
+ function repairData(data, fields, schemaName) {
172
+ const actions = [];
173
+ if (typeof data !== "object" || data === null || Array.isArray(data)) {
174
+ return { data, actions };
175
+ }
176
+ let obj = data;
177
+ if (schemaName in obj && typeof obj[schemaName] === "object" && obj[schemaName] !== null) {
178
+ actions.push({ attempt: 0, type: "unwrap", detail: `Unwrapped "${schemaName}" wrapper` });
179
+ obj = obj[schemaName];
180
+ }
181
+ for (const [key, field] of Object.entries(fields)) {
182
+ if (obj[key] === void 0) {
183
+ if (field.default !== void 0) {
184
+ obj[key] = field.default;
185
+ actions.push({ attempt: 0, type: "default_fill", detail: `Set "${key}" to default: ${JSON.stringify(field.default)}` });
186
+ } else if (field.type === "string") {
187
+ obj[key] = "";
188
+ actions.push({ attempt: 0, type: "missing_field", detail: `Added empty string for "${key}"` });
189
+ } else if (field.type === "number") {
190
+ obj[key] = 0;
191
+ actions.push({ attempt: 0, type: "missing_field", detail: `Added 0 for "${key}"` });
192
+ } else if (field.type === "boolean") {
193
+ obj[key] = false;
194
+ actions.push({ attempt: 0, type: "missing_field", detail: `Added false for "${key}"` });
195
+ } else if (field.type === "array") {
196
+ obj[key] = [];
197
+ actions.push({ attempt: 0, type: "missing_field", detail: `Added empty array for "${key}"` });
198
+ } else if (field.type === "object") {
199
+ obj[key] = {};
200
+ actions.push({ attempt: 0, type: "missing_field", detail: `Added empty object for "${key}"` });
201
+ }
202
+ }
203
+ if (obj[key] !== void 0) {
204
+ const coerced = coerceType(obj[key], field.type);
205
+ if (coerced.coerced) {
206
+ obj[key] = coerced.value;
207
+ actions.push({ attempt: 0, type: "type_coercion", detail: `Coerced "${key}" to ${field.type}: ${JSON.stringify(coerced.value)}` });
208
+ }
209
+ if (field.type === "array" && Array.isArray(obj[key]) && field.items) {
210
+ obj[key] = obj[key].map((item, i) => {
211
+ const itemResult = repairItem(item, field.items);
212
+ if (itemResult.actions.length > 0) {
213
+ actions.push(...itemResult.actions.map((a) => ({ ...a, detail: `[${i}] ${a.detail}` })));
214
+ }
215
+ return itemResult.data;
216
+ });
217
+ }
218
+ if (field.type === "object" && typeof obj[key] === "object" && obj[key] !== null && field.properties) {
219
+ const nested = repairData(obj[key], field.properties, "");
220
+ if (nested.actions.length > 0) {
221
+ actions.push(...nested.actions);
222
+ }
223
+ obj[key] = nested.data;
224
+ }
225
+ }
226
+ }
227
+ return { data: obj, actions };
228
+ }
229
+ function repairItem(item, field) {
230
+ const actions = [];
231
+ if (field.type === "object" && typeof item === "object" && item !== null && !Array.isArray(item) && field.properties) {
232
+ const obj = item;
233
+ for (const [key, prop] of Object.entries(field.properties)) {
234
+ if (obj[key] === void 0) {
235
+ if (prop.type === "string") {
236
+ obj[key] = "";
237
+ actions.push({ attempt: 0, type: "missing_field", detail: `Added empty string for "${key}"` });
238
+ } else if (prop.type === "number") {
239
+ obj[key] = 0;
240
+ actions.push({ attempt: 0, type: "missing_field", detail: `Added 0 for "${key}"` });
241
+ } else if (prop.type === "boolean") {
242
+ obj[key] = false;
243
+ actions.push({ attempt: 0, type: "missing_field", detail: `Added false for "${key}"` });
244
+ } else if (prop.type === "array") {
245
+ obj[key] = [];
246
+ actions.push({ attempt: 0, type: "missing_field", detail: `Added empty array for "${key}"` });
247
+ }
248
+ }
249
+ if (obj[key] !== void 0) {
250
+ const coerced = coerceType(obj[key], prop.type);
251
+ if (coerced.coerced) {
252
+ obj[key] = coerced.value;
253
+ actions.push({ attempt: 0, type: "type_coercion", detail: `Coerced "${key}" to ${prop.type}` });
254
+ }
255
+ }
256
+ }
257
+ }
258
+ return { data: item, actions };
259
+ }
144
260
  function validateAgainstSchema(data, schema) {
145
261
  if (typeof data !== "object" || data === null || Array.isArray(data)) {
146
262
  return { valid: false, errors: ["Root value must be an object"] };
@@ -194,40 +310,120 @@ function validateAgainstSchema(data, schema) {
194
310
  function defineSchema(name, fields) {
195
311
  return { name, fields };
196
312
  }
197
- function schemaToPrompt(schema) {
198
- const fieldDescriptions = Object.entries(schema.fields).map(([key, field]) => {
199
- const parts = [`"${key}": ${fieldTypeToExample(field)}`];
200
- if (field.description) {
201
- parts.push(`// ${field.description}`);
313
+ function schemaToJSONExample(fields) {
314
+ const result = {};
315
+ for (const [key, field] of Object.entries(fields)) {
316
+ switch (field.type) {
317
+ case "string":
318
+ result[key] = field.description || "...";
319
+ break;
320
+ case "number":
321
+ result[key] = 0;
322
+ break;
323
+ case "boolean":
324
+ result[key] = true;
325
+ break;
326
+ case "array":
327
+ if (field.items) {
328
+ result[key] = [schemaToJSONExample(field.items.properties || {})];
329
+ } else {
330
+ result[key] = [];
331
+ }
332
+ break;
333
+ case "object":
334
+ if (field.properties) {
335
+ result[key] = schemaToJSONExample(field.properties);
336
+ } else {
337
+ result[key] = {};
338
+ }
339
+ break;
202
340
  }
203
- return parts.join(" ");
204
- }).join(",\n ");
205
- return `{
206
- "${schema.name}": {
207
- ${fieldDescriptions}
208
341
  }
209
- }`;
342
+ return result;
210
343
  }
211
- function fieldTypeToExample(field) {
212
- switch (field.type) {
213
- case "string":
214
- return `"..."`;
215
- case "number":
216
- return "0";
217
- case "boolean":
218
- return "true";
219
- case "array":
220
- if (field.items) {
221
- return `[${fieldTypeToExample(field.items)}]`;
222
- }
223
- return "[]";
224
- case "object":
225
- if (field.properties) {
226
- const inner = Object.entries(field.properties).map(([k, v]) => `"${k}": ${fieldTypeToExample(v)}`).join(", ");
227
- return `{ ${inner} }`;
228
- }
229
- return "{}";
230
- }
344
+ function buildForcedPrompt(input, schema) {
345
+ const example = schemaToJSONExample(schema.fields);
346
+ const exampleStr = JSON.stringify({ [schema.name]: example }, null, 2);
347
+ return `TASK: Extract structured data from the text below into EXACTLY this JSON format.
348
+
349
+ CRITICAL RULES:
350
+ - Output ONLY valid JSON. No text before or after.
351
+ - No markdown. No code blocks. No explanations.
352
+ - Every field MUST be present with the correct type.
353
+ - String fields: use "" if unknown.
354
+ - Number fields: use 0 if unknown.
355
+ - Boolean fields: use true or false, never null.
356
+ - Array fields: use [] if empty, never null.
357
+ - Object fields: use {} if empty, never null.
358
+
359
+ REQUIRED JSON STRUCTURE:
360
+ ${exampleStr}
361
+
362
+ TEXT TO EXTRACT FROM:
363
+ """
364
+ ${input}
365
+ """
366
+
367
+ OUTPUT ONLY THE JSON OBJECT. NOTHING ELSE.`;
368
+ }
369
+ function buildRetryPrompt(input, schema, previousResponse, error) {
370
+ const example = schemaToJSONExample(schema.fields);
371
+ const exampleStr = JSON.stringify({ [schema.name]: example }, null, 2);
372
+ return `YOUR PREVIOUS RESPONSE WAS INVALID. You MUST fix it.
373
+
374
+ ERROR: ${error}
375
+
376
+ YOUR PREVIOUS RESPONSE:
377
+ ${previousResponse.slice(0, 1e3)}
378
+
379
+ WHAT YOU MUST DO NOW:
380
+ 1. Output ONLY valid JSON matching this EXACT structure
381
+ 2. No text before or after the JSON
382
+ 3. No markdown, no code blocks, no explanations
383
+ 4. Fix the errors listed above
384
+ 5. Every field MUST be present
385
+
386
+ REQUIRED JSON STRUCTURE:
387
+ ${exampleStr}
388
+
389
+ TEXT TO EXTRACT FROM:
390
+ """
391
+ ${input}
392
+ """
393
+
394
+ OUTPUT ONLY THE JSON OBJECT. NOTHING ELSE.`;
395
+ }
396
+ function buildFinalPrompt(input, schema, previousResponse, error) {
397
+ const example = schemaToJSONExample(schema.fields);
398
+ const exampleStr = JSON.stringify({ [schema.name]: example }, null, 2);
399
+ return `FINAL ATTEMPT. THIS IS YOUR LAST CHANCE.
400
+
401
+ YOUR PREVIOUS RESPONSE FAILED VALIDATION:
402
+ ${error}
403
+
404
+ YOUR PREVIOUS RESPONSE:
405
+ ${previousResponse.slice(0, 800)}
406
+
407
+ YOU MUST OUTPUT EXACTLY THIS STRUCTURE. NOTHING MORE. NOTHING LESS.
408
+ DO NOT ADD FIELDS THAT ARE NOT IN THE SCHEMA.
409
+ DO NOT OMIT ANY FIELDS.
410
+ DO NOT WRAP IN markdown OR code blocks.
411
+ DO NOT ADD ANY TEXT BEFORE OR AFTER THE JSON.
412
+
413
+ STRUCTURE:
414
+ ${exampleStr}
415
+
416
+ INPUT TEXT:
417
+ """
418
+ ${input}
419
+ """
420
+
421
+ OUTPUT ONLY:
422
+ `;
423
+ }
424
+ function schemaToPrompt(schema) {
425
+ const example = schemaToJSONExample(schema.fields);
426
+ return JSON.stringify({ [schema.name]: example }, null, 2);
231
427
  }
232
428
  function schemaToZodishString(schema) {
233
429
  const lines = [];
@@ -241,11 +437,16 @@ function schemaToZodishString(schema) {
241
437
 
242
438
  // src/extractor.ts
243
439
  var MAX_REPAIR_ATTEMPTS = 3;
244
- function unwrapNamedResponse(data, schemaName) {
245
- if (typeof data === "object" && data !== null && !Array.isArray(data) && schemaName in data && typeof data[schemaName] === "object") {
246
- return data[schemaName];
440
+ function calculateConfidence(attempts, repairActions, jsonValidFirstTry) {
441
+ let score = 100;
442
+ if (!jsonValidFirstTry) {
443
+ score -= 15;
247
444
  }
248
- return data;
445
+ score -= (attempts - 1) * 10;
446
+ score -= repairActions.filter((a) => a.type === "type_coercion").length * 5;
447
+ score -= repairActions.filter((a) => a.type === "missing_field").length * 8;
448
+ score -= repairActions.filter((a) => a.type === "default_fill").length * 3;
449
+ return Math.max(0, Math.min(100, score));
249
450
  }
250
451
  async function extract(input, schema, options = {}) {
251
452
  const config = {
@@ -259,59 +460,62 @@ async function extract(input, schema, options = {}) {
259
460
  "OpenRouter API key required. Pass it in options or set OPENROUTER_API_KEY environment variable."
260
461
  );
261
462
  }
262
- const schemaDescription = schemaToZodishString(schema);
263
- const exampleJSON = schemaToPrompt(schema);
264
- const userPrompt = `Extract structured data from the following text.
265
-
266
- ${schemaDescription}
267
-
268
- Return ONLY valid JSON matching this structure:
269
- ${exampleJSON}
270
-
271
- Text to extract from:
272
- """
273
- ${input}
274
- """`;
275
- const schemaFields = {};
276
- for (const [key, field] of Object.entries(schema.fields)) {
277
- schemaFields[key] = { type: field.type, required: field.required };
278
- }
463
+ const allRepairActions = [];
279
464
  let lastRaw = "";
280
465
  let lastError = "";
466
+ let jsonValidFirstTry = false;
467
+ let successAttempt = 0;
281
468
  for (let attempt = 0; attempt <= MAX_REPAIR_ATTEMPTS; attempt++) {
282
- const prompt = attempt === 0 ? userPrompt : `${userPrompt}
283
-
284
- IMPORTANT: Your previous response was invalid JSON. Here was the error:
285
- ${lastError}
286
-
287
- Previous raw response:
288
- ${lastRaw}
289
-
290
- Fix the JSON and return ONLY valid JSON. No explanations, no markdown, just the raw JSON object.`;
469
+ let prompt;
470
+ if (attempt === 0) {
471
+ prompt = buildForcedPrompt(input, schema);
472
+ } else if (attempt === 1) {
473
+ prompt = buildRetryPrompt(input, schema, lastRaw, lastError);
474
+ } else {
475
+ prompt = buildFinalPrompt(input, schema, lastRaw, lastError);
476
+ }
291
477
  const response = await callLLM(prompt, config);
292
478
  lastRaw = response.content;
293
479
  const extracted = extractJSON(response.content);
294
480
  if (!extracted) {
295
- lastError = "Could not extract JSON from response";
481
+ lastError = "Could not extract JSON from response. Your response contained no valid JSON.";
296
482
  continue;
297
483
  }
298
484
  const parsed = tryParseJSON(extracted);
299
485
  if (!parsed.success) {
300
- lastError = parsed.error;
486
+ lastError = `Invalid JSON: ${parsed.error}`;
301
487
  continue;
302
488
  }
489
+ if (attempt === 0) {
490
+ jsonValidFirstTry = true;
491
+ }
303
492
  const repaired = repairJSON(extracted);
304
- const finalData = repaired ? tryParseJSON(repaired).success ? JSON.parse(repaired) : parsed.data : parsed.data;
305
- const unwrapped = unwrapNamedResponse(finalData, schema.name);
306
- const validation = validateAgainstSchema(unwrapped, schemaFields);
493
+ let finalData = repaired ? tryParseJSON(repaired).success ? JSON.parse(repaired) : parsed.data : parsed.data;
494
+ const repairResult = repairData(finalData, schema.fields, schema.name);
495
+ finalData = repairResult.data;
496
+ allRepairActions.push(...repairResult.actions);
497
+ const schemaFields = {};
498
+ for (const [key, field] of Object.entries(schema.fields)) {
499
+ schemaFields[key] = field;
500
+ }
501
+ const validation = validateAgainstSchema(finalData, schemaFields);
307
502
  if (!validation.valid) {
308
- lastError = `Schema validation failed: ${validation.errors.join(", ")}`;
503
+ lastError = `Schema validation failed: ${validation.errors.join("; ")}`;
504
+ allRepairActions.push({
505
+ attempt,
506
+ type: "type_coercion",
507
+ detail: `Validation failed: ${validation.errors.join("; ")}`
508
+ });
309
509
  continue;
310
510
  }
511
+ successAttempt = attempt + 1;
311
512
  return {
312
- data: unwrapped,
513
+ data: finalData,
313
514
  raw: response.content,
314
515
  model: response.model,
516
+ confidence: calculateConfidence(successAttempt, allRepairActions, jsonValidFirstTry),
517
+ repairLog: allRepairActions,
518
+ attempts: successAttempt,
315
519
  usage: response.usage
316
520
  };
317
521
  }
@@ -320,9 +524,11 @@ Fix the JSON and return ONLY valid JSON. No explanations, no markdown, just the
320
524
  );
321
525
  }
322
526
 
527
+ exports.coerceType = coerceType;
323
528
  exports.defineSchema = defineSchema;
324
529
  exports.extract = extract;
325
530
  exports.extractJSON = extractJSON;
531
+ exports.repairData = repairData;
326
532
  exports.repairJSON = repairJSON;
327
533
  exports.schemaToPrompt = schemaToPrompt;
328
534
  exports.schemaToZodishString = schemaToZodishString;