parse-llm-json 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +232 -0
- package/dist/index.cjs +1 -0
- package/dist/index.d.cts +276 -0
- package/dist/index.d.ts +276 -0
- package/dist/index.js +1 -0
- package/package.json +49 -0
package/README.md
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
# llm-json
|
|
2
|
+
|
|
3
|
+
Extract structured data from LLM output. Handles malformed JSON, streaming responses, and partial output — never throws.
|
|
4
|
+
|
|
5
|
+
## The Problem
|
|
6
|
+
|
|
7
|
+
LLMs return broken JSON constantly:
|
|
8
|
+
|
|
9
|
+
```javascript
|
|
10
|
+
// What GPT-4 returns:
|
|
11
|
+
{name: 'John', age: 30, "bio": "User said "hello"",}
|
|
12
|
+
|
|
13
|
+
// What JSON.parse sees:
|
|
14
|
+
SyntaxError: Expected double-quoted property name
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
**Common failures:**
|
|
18
|
+
- Single quotes instead of double quotes
|
|
19
|
+
- Unquoted keys (`{name: ...}` instead of `{"name": ...}`)
|
|
20
|
+
- Trailing commas
|
|
21
|
+
- Apostrophes inside strings (`"user's name"`)
|
|
22
|
+
- Markdown code blocks wrapping the JSON
|
|
23
|
+
- Prose before/after the JSON
|
|
24
|
+
- Python literals (`None`, `True`, `False`)
|
|
25
|
+
- Incomplete/truncated JSON from token limits
|
|
26
|
+
|
|
27
|
+
## Install
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
npm install llm-json
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
### Basic Parsing
|
|
36
|
+
|
|
37
|
+
```typescript
|
|
38
|
+
import { parse } from 'llm-json';
|
|
39
|
+
|
|
40
|
+
// Handles all the broken JSON patterns
|
|
41
|
+
const result = parse(`
|
|
42
|
+
Here's the data you requested:
|
|
43
|
+
\`\`\`json
|
|
44
|
+
{name: 'John', age: 30, interests: ["ai", "llm's"]}
|
|
45
|
+
\`\`\`
|
|
46
|
+
`);
|
|
47
|
+
|
|
48
|
+
if (result.ok) {
|
|
49
|
+
console.log(result.data.name); // "John"
|
|
50
|
+
console.log(result.data.interests); // ["ai", "llm's"]
|
|
51
|
+
} else {
|
|
52
|
+
console.log(result.error.code); // "no_json_found" | "invalid_json" | ...
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### With Schema Validation
|
|
57
|
+
|
|
58
|
+
```typescript
|
|
59
|
+
import { parse } from 'llm-json';
|
|
60
|
+
|
|
61
|
+
const schema = {
|
|
62
|
+
type: 'object',
|
|
63
|
+
properties: {
|
|
64
|
+
name: { type: 'string' },
|
|
65
|
+
age: { type: 'number' }
|
|
66
|
+
},
|
|
67
|
+
required: ['name']
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
const result = parse('{name: "Alice", age: "wrong"}', schema);
|
|
71
|
+
// result.ok === false
|
|
72
|
+
// result.error.code === 'schema_mismatch'
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Streaming (SSE / Token-by-Token)
|
|
76
|
+
|
|
77
|
+
```typescript
|
|
78
|
+
import { createStreamingParser } from 'llm-json';
|
|
79
|
+
|
|
80
|
+
const parser = createStreamingParser({
|
|
81
|
+
schema: { type: 'object', properties: { name: { type: 'string' } } }
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
// Feed chunks as they arrive from OpenAI, Claude, etc.
|
|
85
|
+
for await (const chunk of llmStream) {
|
|
86
|
+
const result = parser.write(chunk);
|
|
87
|
+
if (result.ok) {
|
|
88
|
+
updateUI(result.data); // Show partial results in real-time
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Get final result
|
|
93
|
+
const final = parser.finish();
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Extract Multiple JSON Objects
|
|
97
|
+
|
|
98
|
+
```typescript
|
|
99
|
+
import { extractAll } from 'llm-json';
|
|
100
|
+
|
|
101
|
+
const text = `
|
|
102
|
+
First user: {"id": 1, "name": "Alice"}
|
|
103
|
+
Second user: {"id": 2, "name": "Bob"}
|
|
104
|
+
`;
|
|
105
|
+
|
|
106
|
+
const { multiple } = extractAll(text);
|
|
107
|
+
// multiple === [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## API
|
|
111
|
+
|
|
112
|
+
### `parse<T>(input: string, schema?: Schema): Result<T>`
|
|
113
|
+
|
|
114
|
+
Main entry point. Extracts JSON from LLM output, repairs common issues, validates against schema.
|
|
115
|
+
|
|
116
|
+
```typescript
|
|
117
|
+
const result = parse('{"name": "test"}');
|
|
118
|
+
if (result.ok) {
|
|
119
|
+
result.data; // { name: "test" }
|
|
120
|
+
result.warnings; // Repair warnings, if any
|
|
121
|
+
} else {
|
|
122
|
+
result.error; // { code, message, position?, context? }
|
|
123
|
+
}
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### `createStreamingParser<T>(options?): StreamingParser<T>`
|
|
127
|
+
|
|
128
|
+
Stateful parser for streaming responses. Call `write(chunk)` for each chunk, `finish()` when done.
|
|
129
|
+
|
|
130
|
+
```typescript
|
|
131
|
+
const parser = createStreamingParser({ schema });
|
|
132
|
+
parser.write(chunk1);
|
|
133
|
+
parser.write(chunk2);
|
|
134
|
+
const result = parser.finish();
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### `repair(input: string): RepairResult`
|
|
138
|
+
|
|
139
|
+
Low-level repair function. Returns repaired JSON string plus warnings.
|
|
140
|
+
|
|
141
|
+
```typescript
|
|
142
|
+
const { output, warnings, valid } = repair("{name: 'test'}");
|
|
143
|
+
// output === '{"name": "test"}'
|
|
144
|
+
// valid === true
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### `extract(input: string): ExtractResult`
|
|
148
|
+
|
|
149
|
+
Extract first JSON object from text. Strips markdown, prose, etc.
|
|
150
|
+
|
|
151
|
+
```typescript
|
|
152
|
+
const { json, start, end } = extract('prefix {"a": 1} suffix');
|
|
153
|
+
// json === '{"a": 1}'
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### `extractAll(input: string): ExtractResult`
|
|
157
|
+
|
|
158
|
+
Extract all JSON objects from text.
|
|
159
|
+
|
|
160
|
+
```typescript
|
|
161
|
+
const { multiple } = extractAll('{"a": 1} text {"b": 2}');
|
|
162
|
+
// multiple === ['{"a": 1}', '{"b": 2}']
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### `parsePartial<T>(input: string, options?): Result<T>`
|
|
166
|
+
|
|
167
|
+
Parse potentially incomplete JSON. Useful for streaming when you want manual control.
|
|
168
|
+
|
|
169
|
+
```typescript
|
|
170
|
+
const result = parsePartial('{"users": [{"name": "Al');
|
|
171
|
+
// result.ok === true
|
|
172
|
+
// result.data === { users: [{ name: "Al" }] }
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### `validate<T>(data: unknown, schema: Schema): ValidationResult<T>`
|
|
176
|
+
|
|
177
|
+
Validate parsed data against schema. Separate from parsing for when you already have data.
|
|
178
|
+
|
|
179
|
+
```typescript
|
|
180
|
+
const result = validate({ name: "test" }, { type: 'object', properties: { name: { type: 'string' } } });
|
|
181
|
+
// result.ok === true
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### `parseStream<T>(chunks, schema?, options?): Promise<Result<T>>`
|
|
185
|
+
|
|
186
|
+
Parse an async iterable or ReadableStream.
|
|
187
|
+
|
|
188
|
+
```typescript
|
|
189
|
+
const result = await parseStream(openaiStream, schema);
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## Schema Format
|
|
193
|
+
|
|
194
|
+
Minimal schema format (not JSON Schema — kept small for bundle size):
|
|
195
|
+
|
|
196
|
+
```typescript
|
|
197
|
+
type Schema =
|
|
198
|
+
| { type: 'string' | 'number' | 'boolean' | 'null', enum?: string[] }
|
|
199
|
+
| { type: 'array', items: Schema, minItems?: number, maxItems?: number }
|
|
200
|
+
| { type: 'object', properties: Record<string, Schema>, required?: string[], additionalProperties?: boolean }
|
|
201
|
+
| { type: 'union', variants: Schema[] }
|
|
202
|
+
| { type: 'literal', value: string | number | boolean | null }
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
For complex validation, pipe output through Zod.
|
|
206
|
+
|
|
207
|
+
## Known Limitations
|
|
208
|
+
|
|
209
|
+
**repair() edge cases:**
|
|
210
|
+
- Cannot fix structural errors (mismatched brackets, completely malformed syntax)
|
|
211
|
+
- May produce incorrect output for deeply nested quote escaping (`"a\"b'c\"d"`)
|
|
212
|
+
- Doesn't handle JavaScript-style template literals
|
|
213
|
+
- Numbers with leading zeros or multiple decimal points not repaired
|
|
214
|
+
|
|
215
|
+
**Schema limitations:**
|
|
216
|
+
- No regex patterns, custom validators, or conditional schemas
|
|
217
|
+
- No recursive schema references
|
|
218
|
+
- No `$ref` or JSON Schema standard support
|
|
219
|
+
|
|
220
|
+
**Streaming:**
|
|
221
|
+
- Requires explicit `finish()` call — no auto-detection of complete JSON
|
|
222
|
+
- Very large strings (>64KB) may cause issues in some environments
|
|
223
|
+
|
|
224
|
+
## Bundle Size
|
|
225
|
+
|
|
226
|
+
- ESM: 9.00 KB minified
|
|
227
|
+
- CJS: 9.61 KB minified
|
|
228
|
+
- Zero dependencies
|
|
229
|
+
|
|
230
|
+
## License
|
|
231
|
+
|
|
232
|
+
MIT
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"use strict";var R=Object.defineProperty;var I=Object.getOwnPropertyDescriptor;var A=Object.getOwnPropertyNames;var J=Object.prototype.hasOwnProperty;var N=(e,r)=>{for(var t in r)R(e,t,{get:r[t],enumerable:!0})},W=(e,r,t,n)=>{if(r&&typeof r=="object"||typeof r=="function")for(let a of A(r))!J.call(e,a)&&a!==t&&R(e,a,{get:()=>r[a],enumerable:!(n=I(r,a))||n.enumerable});return e};var $=e=>W(R({},"__esModule",{value:!0}),e);var K={};N(K,{configure:()=>j,createInstance:()=>O,createStreamingParser:()=>x,extract:()=>d,extractAll:()=>y,getConfig:()=>v,parse:()=>k,parsePartial:()=>S,parseStream:()=>w,parseWithSchema:()=>_,repair:()=>h,validate:()=>f});module.exports=$(K);var P={maxBufferSize:1048576,maxRepairs:10,collectWarnings:!0};function j(e){P={...P,...e}}function v(){return P}function E(e,r){let t=0,n=!1,a=!1,i=-1,o=[],u=[];for(let l=0;l<e.length;l++){let m=e[l];if(a){a=!1;continue}if(m==="\\"&&n){a=!0;continue}if(m==='"'){n=!n;continue}if(!n){if(m==="{"||m==="[")t===0&&(i=l),t++;else if((m==="}"||m==="]")&&(t--,t===0&&i>=0)){let p=e.slice(i,l+1);if(o.push(p),u.push({start:i,end:l+1}),!r)break}}}return o.length===0?{json:null,start:0,end:0,multiple:[]}:{json:o[0],start:u[0].start,end:u[0].end,multiple:o}}function T(e){return e.replace(/```(?:json)?\s*\n?([\s\S]*?)\n?```/g,"$1").trim()}function d(e){if(!e)return{json:null,start:0,end:0};let r=T(e);return E(r,!1)}function y(e){if(!e)return{json:null,start:0,end:0,multiple:[]};let r=T(e);return E(r,!0)}function h(e){if(!e)return{output:"",warnings:[],valid:!1};let r=[],t=e.trim();t=t.replace(/```json?\s*\n?/gi,"").replace(/```\s*$/g,""),t=t.replace(/\/\/[^\n]*/g,"").replace(/\/\*[\s\S]*?\*\//g,""),t=C(t,r);{let a=t;t=t.replace(/,\s*([}\]])/g,"$1"),a!==t&&r.push({code:"trailing_comma_removed",message:""})}t=t.replace(/,\s*,/g,",");let n=!1;try{JSON.parse(t),n=!0}catch{}return{output:t,warnings:r,valid:n}}function C(e,r){let t="",n=0,a=!1,i="",o=!1,u=!1;for(;n<e.length;){let l=e[n];if(o){if(o=!1,a&&i==="'"){if(l==="'"){t+="'",n++;continue}else if(l==='"'){t+='\\"',n++;continue}}t+=l,n++;continue}if(l==="\\"&&a){if(i==="'"){o=!0,n++;continue}o=!0,t+=l,n++;continue}if(l==='"'){if(a){if(i==='"')a=!1,i="";else if(i==="'"){t+='\\"',n++;continue}}else a=!0,i='"';t+='"',n++;continue}if(l==="'"){if(a)if(i==='"'){t+="'",n++;continue}else i==="'"&&(a=!1,i="",u=!0);else a=!0,i="'",u=!0;t+='"',n++;continue}if(a){t+=l,n++;continue}if(l==="{"||l===","){for(t+=l,n++;n<e.length&&/\s/.test(e[n]);)t+=e[n++];if(n>=e.length)break;if(e[n]==='"'){a=!0,i='"',t+='"',n++;continue}if(e[n]==="'"){a=!0,i="'",u=!0,t+='"',n++;continue}let m=n;for(;n<e.length&&/[\w$_]/.test(e[n]);)n++;n>m&&!/^(true|false|null|undefined|None|True|False)$/.test(e.slice(m,n))&&(t+='"'+e.slice(m,n)+'"',r.push({code:"unquoted_key_fixed",message:""}));continue}if(e.slice(n,n+4)==="None"){t+="null",r.push({code:"python_literal_converted",message:""}),n+=4;continue}if(e.slice(n,n+4)==="True"){t+="true",r.push({code:"python_literal_converted",message:""}),n+=4;continue}if(e.slice(n,n+5)==="False"){t+="false",r.push({code:"python_literal_converted",message:""}),n+=5;continue}t+=l,n++}return u&&r.push({code:"single_quotes_replaced",message:""}),t}function S(e,r){if(!e)return{ok:!1,error:{code:"no_json_found",message:"Empty input"}};try{return{ok:!0,data:JSON.parse(e)}}catch{}let t={allowPartialStrings:!0,allowPartialObjects:!0,allowPartialArrays:!0,allowPartialNumbers:!1,...r};try{return{ok:!0,data:V(e,t)}}catch(n){return{ok:!1,error:{code:"truncated",message:n.message},partial:{confidence:"medium",complete:{},pending:[]}}}}function V(e,r){let t=0,n=()=>{for(;t<e.length&&/\s/.test(e[t]);)t++},a=()=>e[t],i=()=>e[t++],o=()=>{n();let s=a();if(s==="{")return p();if(s==="[")return m();if(s==='"')return u();if(s==="-"||/[0-9]/.test(s))return l();if(e.slice(t,t+4)==="true")return t+=4,!0;if(e.slice(t,t+5)==="false")return t+=5,!1;if(e.slice(t,t+4)==="null")return t+=4,null;throw new Error("Unexpected token at "+t)},u=()=>{i();let s="",c=!1;for(;t<e.length;){let g=i();if(c){c=!1,s+=g;continue}if(g==="\\"){c=!0;continue}if(g==='"')return s;s+=g}if(r.allowPartialStrings)return s;throw new Error("Unterminated string")},l=()=>{let s=t;for(a()==="-"&&i();t<e.length&&/[0-9]/.test(e[t]);)i();if(a()===".")for(i();t<e.length&&/[0-9]/.test(e[t]);)i();if(a()==="e"||a()==="E")for(i(),(a()==="+"||a()==="-")&&i();t<e.length&&/[0-9]/.test(e[t]);)i();let c=e.slice(s,t);return r.allowPartialNumbers&&c.endsWith(".")&&(c=c.slice(0,-1)),parseFloat(c)},m=()=>{i();let s=[];if(n(),a()==="]")return i(),s;for(;t<e.length;){if(n(),a()==="]")return i(),s;if(a()===","){i();continue}s.push(o())}if(r.allowPartialArrays)return s;throw new Error("Unterminated array")},p=()=>{i();let s={};if(n(),a()==="}")return i(),s;for(;t<e.length;){if(n(),a()==="}")return i(),s;if(a()===","){i();continue}let c=u();if(n(),a()!==":"){if(r.allowPartialObjects)return s;throw new Error("Expected colon")}i(),n(),s[c]=o()}if(r.allowPartialObjects)return s;throw new Error("Unterminated object")};return o()}function f(e,r){let t=b(e,r,"");return t.length===0?{ok:!0,data:e}:{ok:!1,errors:t}}function b(e,r,t){switch(r.type){case"null":return e===null?[]:[{path:t,code:"type_error",message:"Expected null",expected:"null",actual:typeof e}];case"string":return typeof e=="string"?L(e,r.enum,t):[{path:t,code:"type_error",message:"Expected string",expected:"string",actual:typeof e}];case"number":return typeof e=="number"?[]:[{path:t,code:"type_error",message:"Expected number",expected:"number",actual:typeof e}];case"boolean":return typeof e=="boolean"?[]:[{path:t,code:"type_error",message:"Expected boolean",expected:"boolean",actual:typeof e}];case"array":return U(e,r,t);case"object":return q(e,r,t);case"union":return F(e,r,t);case"literal":return e===r.value?[]:[{path:t,code:"type_error",message:`Expected ${r.value}`,expected:String(r.value),actual:String(e)}];default:return[]}}function L(e,r,t){return r&&!r.includes(e)?[{path:t,code:"type_error",message:"Not in enum",expected:r.join("|"),actual:e}]:[]}function U(e,r,t){if(!Array.isArray(e))return[{path:t,code:"type_error",message:"Expected array",expected:"array",actual:typeof e}];let n=[];return r.minItems!==void 0&&e.length<r.minItems&&n.push({path:t,code:"type_error",message:`Min ${r.minItems} items`,expected:`>=${r.minItems}`,actual:String(e.length)}),r.maxItems!==void 0&&e.length>r.maxItems&&n.push({path:t,code:"type_error",message:`Max ${r.maxItems} items`,expected:`<=${r.maxItems}`,actual:String(e.length)}),e.forEach((a,i)=>n.push(...b(a,r.items,`${t}/${i}`))),n}function q(e,r,t){if(typeof e!="object"||e===null||Array.isArray(e))return[{path:t,code:"type_error",message:"Expected object",expected:"object",actual:e===null?"null":Array.isArray(e)?"array":typeof e}];let n=[],a=e,i=r.required||[];for(let o of i)o in a||n.push({path:`${t}/${o}`,code:"missing_required",message:`Missing ${o}`,expected:o});for(let[o,u]of Object.entries(a))o in r.properties?n.push(...b(u,r.properties[o],`${t}/${o}`)):r.additionalProperties===!1&&n.push({path:`${t}/${o}`,code:"type_error",message:`Unknown property ${o}`});return n}function F(e,r,t){for(let n of r.variants)if(b(e,n,t).length===0)return[];return[{path:t,code:"type_error",message:"No union variant matched",expected:"union",actual:String(e)}]}function x(e){let r=e||{},t={allowPartialStrings:!0,allowPartialObjects:!0,allowPartialArrays:!0,allowPartialNumbers:!1},n="",a=0,i=!1,o=!1,u=!1,l=p=>{n+=p;for(let s=n.length-p.length;s<n.length;s++){let c=n[s];if(o){o=!1;continue}if(c==="\\"&&i){o=!0;continue}if(c==='"'){i=!i;continue}i||(c==="{"||c==="["?(a===0&&(u=!0),a++):(c==="}"||c==="]")&&a--)}},m=()=>{let p=d(n);if(!p.json)return{ok:!1,error:{code:"truncated",message:"No JSON found"}};let s=h(p.json),c=S(s.output,t);if(c.ok&&r.schema){let g=f(c.data,r.schema);if(!g.ok)return{ok:!1,error:{code:"schema_mismatch",message:"Schema mismatch",context:JSON.stringify(g.errors)}}}return c};return{get buffer(){return n},get inJson(){return u},get depth(){return a},write(p){return l(p),m()},finish(){return u?a>0||i?{ok:!1,error:{code:"truncated",message:"Incomplete JSON"}}:m():{ok:!1,error:{code:"no_json_found",message:"No JSON found"}}},reset(){n="",a=0,i=!1,o=!1,u=!1}}}function M(e){return e!=null&&typeof e[Symbol.asyncIterator]=="function"}function z(e){return typeof ReadableStream<"u"&&e instanceof ReadableStream}async function*B(e){let r=e.getReader(),t=new TextDecoder;try{for(;;){let{done:n,value:a}=await r.read();if(n)break;yield t.decode(a,{stream:!0})}}finally{r.releaseLock()}}async function w(e,r,t){let n=x({...t,schema:r}),a=z(e)?B(e):M(e)?e:null;if(!a)return{ok:!1,error:{code:"invalid_json",message:"Invalid input"}};for await(let i of a)n.write(i);return n.finish()}function k(e,r){if(!e)return{ok:!1,error:{code:"no_json_found",message:"Empty input"}};let t=d(e);if(!t.json)return{ok:!1,error:{code:"no_json_found",message:"No JSON found"}};try{let a=JSON.parse(t.json);if(r){let i=f(a,r);if(!i.ok)return{ok:!1,error:{code:"schema_mismatch",message:"Schema mismatch",context:JSON.stringify(i.errors)}}}return{ok:!0,data:a}}catch{}let n=h(t.json);try{let a=JSON.parse(n.output);if(r){let i=f(a,r);if(!i.ok)return{ok:!1,error:{code:"schema_mismatch",message:"Schema mismatch",context:JSON.stringify(i.errors)}}}return{ok:!0,data:a,warnings:n.warnings.length?n.warnings:void 0}}catch(a){return{ok:!1,error:{code:"invalid_json",message:a.message}}}}function _(e,r){return k(e,r)}function O(e){return{parse:(r,t)=>k(r,t),parseWithSchema:(r,t)=>_(r,t),createStreamingParser:r=>x(r),parseStream:(r,t,n)=>w(r,t,n),extract:r=>d(r),extractAll:r=>y(r),repair:r=>h(r),parsePartial:(r,t)=>S(r,t),validate:(r,t)=>f(r,t)}}0&&(module.exports={configure,createInstance,createStreamingParser,extract,extractAll,getConfig,parse,parsePartial,parseStream,parseWithSchema,repair,validate});
|
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
interface Success<T> {
|
|
2
|
+
ok: true;
|
|
3
|
+
data: T;
|
|
4
|
+
warnings?: Warning[];
|
|
5
|
+
}
|
|
6
|
+
interface Failure<T> {
|
|
7
|
+
ok: false;
|
|
8
|
+
error: ParseError;
|
|
9
|
+
partial?: PartialResult<T>;
|
|
10
|
+
}
|
|
11
|
+
type Result<T> = Success<T> | Failure<T>;
|
|
12
|
+
interface Warning {
|
|
13
|
+
code: WarningCode;
|
|
14
|
+
message: string;
|
|
15
|
+
position?: number;
|
|
16
|
+
}
|
|
17
|
+
type WarningCode = 'trailing_comma_removed' | 'single_quotes_replaced' | 'unquoted_key_fixed' | 'missing_comma_added' | 'markdown_fence_stripped' | 'prose_stripped' | 'python_literal_converted' | 'truncated_string_closed' | 'unescaped_quote_fixed';
|
|
18
|
+
interface ParseError {
|
|
19
|
+
code: ErrorCode;
|
|
20
|
+
message: string;
|
|
21
|
+
position?: number;
|
|
22
|
+
context?: string;
|
|
23
|
+
}
|
|
24
|
+
type ErrorCode = 'no_json_found' | 'invalid_json' | 'schema_mismatch' | 'truncated' | 'type_error' | 'missing_required';
|
|
25
|
+
interface PartialResult<T> {
|
|
26
|
+
confidence: 'high' | 'medium' | 'low';
|
|
27
|
+
complete: Partial<T>;
|
|
28
|
+
pending: string[];
|
|
29
|
+
}
|
|
30
|
+
type Schema = PrimitiveSchema | ArraySchema | ObjectSchema | UnionSchema | LiteralSchema;
|
|
31
|
+
interface PrimitiveSchema {
|
|
32
|
+
type: 'string' | 'number' | 'boolean' | 'null';
|
|
33
|
+
enum?: string[];
|
|
34
|
+
}
|
|
35
|
+
interface ArraySchema {
|
|
36
|
+
type: 'array';
|
|
37
|
+
items: Schema;
|
|
38
|
+
minItems?: number;
|
|
39
|
+
maxItems?: number;
|
|
40
|
+
}
|
|
41
|
+
interface ObjectSchema {
|
|
42
|
+
type: 'object';
|
|
43
|
+
properties: Record<string, Schema>;
|
|
44
|
+
required?: string[];
|
|
45
|
+
additionalProperties?: boolean;
|
|
46
|
+
}
|
|
47
|
+
interface UnionSchema {
|
|
48
|
+
type: 'union';
|
|
49
|
+
variants: Schema[];
|
|
50
|
+
}
|
|
51
|
+
interface LiteralSchema {
|
|
52
|
+
type: 'literal';
|
|
53
|
+
value: string | number | boolean | null;
|
|
54
|
+
}
|
|
55
|
+
type Infer<S extends Schema> = S extends PrimitiveSchema ? S['type'] extends 'string' ? string : S['type'] extends 'number' ? number : S['type'] extends 'boolean' ? boolean : null : S extends ArraySchema ? Infer<S['items']>[] : S extends ObjectSchema ? {
|
|
56
|
+
[K in keyof S['properties']]: Infer<S['properties'][K]>;
|
|
57
|
+
} : S extends UnionSchema ? Infer<S['variants'][number]> : S extends LiteralSchema ? S['value'] : unknown;
|
|
58
|
+
interface StreamingOptions {
|
|
59
|
+
schema?: Schema;
|
|
60
|
+
onUpdate?: (result: Result<unknown>) => void;
|
|
61
|
+
onJsonStart?: () => void;
|
|
62
|
+
onJsonComplete?: (data: unknown) => void;
|
|
63
|
+
onWarning?: (warning: Warning) => void;
|
|
64
|
+
}
|
|
65
|
+
interface StreamingParser<T = unknown> {
|
|
66
|
+
write(chunk: string): Result<T>;
|
|
67
|
+
finish(): Result<T>;
|
|
68
|
+
reset(): void;
|
|
69
|
+
readonly buffer: string;
|
|
70
|
+
readonly inJson: boolean;
|
|
71
|
+
readonly depth: number;
|
|
72
|
+
}
|
|
73
|
+
interface ExtractResult {
|
|
74
|
+
json: string | null;
|
|
75
|
+
start: number;
|
|
76
|
+
end: number;
|
|
77
|
+
multiple?: string[];
|
|
78
|
+
}
|
|
79
|
+
interface RepairResult {
|
|
80
|
+
output: string;
|
|
81
|
+
warnings: Warning[];
|
|
82
|
+
valid: boolean;
|
|
83
|
+
}
|
|
84
|
+
interface PartialParseOptions {
|
|
85
|
+
allowPartialStrings?: boolean;
|
|
86
|
+
allowPartialObjects?: boolean;
|
|
87
|
+
allowPartialArrays?: boolean;
|
|
88
|
+
allowPartialNumbers?: boolean;
|
|
89
|
+
onIncompleteString?: (str: string) => string;
|
|
90
|
+
}
|
|
91
|
+
interface ValidationResult<T> {
|
|
92
|
+
ok: boolean;
|
|
93
|
+
data?: T;
|
|
94
|
+
errors?: ValidationError[];
|
|
95
|
+
}
|
|
96
|
+
interface ValidationError {
|
|
97
|
+
path: string;
|
|
98
|
+
code: ErrorCode;
|
|
99
|
+
message: string;
|
|
100
|
+
expected?: string;
|
|
101
|
+
actual?: string;
|
|
102
|
+
}
|
|
103
|
+
interface LlmJsonConfig {
|
|
104
|
+
maxBufferSize?: number;
|
|
105
|
+
maxRepairs?: number;
|
|
106
|
+
customRepairs?: RepairRule[];
|
|
107
|
+
collectWarnings?: boolean;
|
|
108
|
+
}
|
|
109
|
+
interface RepairRule {
|
|
110
|
+
name: string;
|
|
111
|
+
pattern: RegExp;
|
|
112
|
+
replace: string | ((match: string) => string);
|
|
113
|
+
}
|
|
114
|
+
interface LlmJsonInstance {
|
|
115
|
+
parse: <T = unknown>(input: string, schema?: Schema) => Result<T>;
|
|
116
|
+
parseWithSchema: <S extends Schema>(input: string, schema: S) => Result<Infer<S>>;
|
|
117
|
+
createStreamingParser: <T = unknown>(options?: StreamingOptions) => StreamingParser<T>;
|
|
118
|
+
parseStream: <T = unknown>(chunks: AsyncIterable<string> | ReadableStream, schema?: Schema, options?: Omit<StreamingOptions, 'schema'>) => Promise<Result<T>>;
|
|
119
|
+
extract: (input: string) => ExtractResult;
|
|
120
|
+
extractAll: (input: string) => ExtractResult;
|
|
121
|
+
repair: (input: string) => RepairResult;
|
|
122
|
+
parsePartial: <T = unknown>(input: string, options?: PartialParseOptions) => Result<T>;
|
|
123
|
+
validate: <T = unknown>(data: unknown, schema: Schema) => ValidationResult<T>;
|
|
124
|
+
}
|
|
125
|
+
declare function configure(config: LlmJsonConfig): void;
|
|
126
|
+
declare function getConfig(): LlmJsonConfig;
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Extract the first JSON object from text. Strips markdown fences
|
|
130
|
+
* and returns the position of the JSON in the original string.
|
|
131
|
+
*
|
|
132
|
+
* @param input - Text containing JSON (possibly with prose/markdown)
|
|
133
|
+
* @returns Extracted JSON string and position info
|
|
134
|
+
*
|
|
135
|
+
* @example
|
|
136
|
+
* const { json } = extract('Result: {"a": 1}');
|
|
137
|
+
* // json === '{"a": 1}'
|
|
138
|
+
*/
|
|
139
|
+
declare function extract(input: string): ExtractResult;
|
|
140
|
+
/**
|
|
141
|
+
* Extract all JSON objects from text.
|
|
142
|
+
*
|
|
143
|
+
* @param input - Text containing multiple JSON objects
|
|
144
|
+
* @returns All extracted JSON strings
|
|
145
|
+
*
|
|
146
|
+
* @example
|
|
147
|
+
* const { multiple } = extractAll('{"a": 1} and {"b": 2}');
|
|
148
|
+
* // multiple === ['{"a": 1}', '{"b": 2}']
|
|
149
|
+
*/
|
|
150
|
+
declare function extractAll(input: string): ExtractResult;
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Repair common JSON issues from LLM output. Handles:
|
|
154
|
+
* - Single quotes (converts to double, preserves apostrophes in strings)
|
|
155
|
+
* - Unquoted keys
|
|
156
|
+
* - Trailing commas
|
|
157
|
+
* - Python literals (None, True, False)
|
|
158
|
+
* - Comments
|
|
159
|
+
* - Markdown fences
|
|
160
|
+
*
|
|
161
|
+
* @param input - Potentially malformed JSON string
|
|
162
|
+
* @returns Repaired JSON, warnings, and validity flag
|
|
163
|
+
*
|
|
164
|
+
* @example
|
|
165
|
+
* const { output, valid } = repair("{name: 'John',}");
|
|
166
|
+
* // output === '{"name": "John"}'
|
|
167
|
+
* // valid === true
|
|
168
|
+
*/
|
|
169
|
+
declare function repair(input: string): RepairResult;
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Parse potentially incomplete JSON. Useful for streaming when you
|
|
173
|
+
* want manual control over the parsing process.
|
|
174
|
+
*
|
|
175
|
+
* @param input - Possibly incomplete JSON string
|
|
176
|
+
* @param options - Control which types can be partial
|
|
177
|
+
* @returns Best-effort parsed result
|
|
178
|
+
*
|
|
179
|
+
* @example
|
|
180
|
+
* const result = parsePartial('{"users": [{"name": "Al');
|
|
181
|
+
* // result.ok === true
|
|
182
|
+
* // result.data === { users: [{ name: "Al" }] }
|
|
183
|
+
*/
|
|
184
|
+
declare function parsePartial<T = unknown>(input: string, options?: PartialParseOptions): Result<T>;
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Validate data against a schema. Separate from parsing for when
|
|
188
|
+
* you already have parsed data and want to check it.
|
|
189
|
+
*
|
|
190
|
+
* @param data - Parsed data to validate
|
|
191
|
+
* @param schema - Schema to validate against
|
|
192
|
+
* @returns Validation result with errors if invalid
|
|
193
|
+
*
|
|
194
|
+
* @example
|
|
195
|
+
* const result = validate(
|
|
196
|
+
* { name: 'test' },
|
|
197
|
+
* { type: 'object', properties: { name: { type: 'string' } } }
|
|
198
|
+
* );
|
|
199
|
+
* // result.ok === true
|
|
200
|
+
*/
|
|
201
|
+
declare function validate<T = unknown>(data: unknown, schema: Schema): ValidationResult<T>;
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Create a stateful streaming parser. Call `write(chunk)` for each
|
|
205
|
+
* chunk from the stream, then `finish()` when done.
|
|
206
|
+
*
|
|
207
|
+
* @param options - Schema, callbacks for update/warning events
|
|
208
|
+
* @returns StreamingParser with write(), finish(), reset() methods
|
|
209
|
+
*
|
|
210
|
+
* @example
|
|
211
|
+
* const parser = createStreamingParser({ schema });
|
|
212
|
+
* for await (const chunk of llmStream) {
|
|
213
|
+
* const result = parser.write(chunk);
|
|
214
|
+
* if (result.ok) updateUI(result.data);
|
|
215
|
+
* }
|
|
216
|
+
* const final = parser.finish();
|
|
217
|
+
*/
|
|
218
|
+
declare function createStreamingParser<T = unknown>(options?: StreamingOptions): StreamingParser<T>;
|
|
219
|
+
/**
|
|
220
|
+
* Parse an async iterable or ReadableStream of chunks.
|
|
221
|
+
* Convenience wrapper around createStreamingParser.
|
|
222
|
+
*
|
|
223
|
+
* @param chunks - AsyncIterable<string> or ReadableStream
|
|
224
|
+
* @param schema - Optional schema for validation
|
|
225
|
+
* @param options - Streaming callbacks
|
|
226
|
+
* @returns Promise resolving to final result
|
|
227
|
+
*
|
|
228
|
+
* @example
|
|
229
|
+
* const stream = openai.chat.completions.create({ stream: true, ... });
|
|
230
|
+
* const result = await parseStream(stream, schema);
|
|
231
|
+
*/
|
|
232
|
+
declare function parseStream<T = unknown>(chunks: AsyncIterable<string> | ReadableStream, schema?: Schema, options?: Omit<StreamingOptions, 'schema'>): Promise<Result<T>>;
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Parse LLM output into structured data. Extracts JSON from text,
|
|
236
|
+
* repairs common issues (single quotes, trailing commas, etc.),
|
|
237
|
+
* and optionally validates against a schema. Never throws.
|
|
238
|
+
*
|
|
239
|
+
* @param input - Raw LLM output (may contain prose, markdown, etc.)
|
|
240
|
+
* @param schema - Optional schema for validation
|
|
241
|
+
* @returns Result object with `ok` flag, data/error, and optional warnings
|
|
242
|
+
*
|
|
243
|
+
* @example
|
|
244
|
+
* const result = parse('{name: "John", age: 30,}');
|
|
245
|
+
* if (result.ok) {
|
|
246
|
+
* console.log(result.data.name); // "John"
|
|
247
|
+
* }
|
|
248
|
+
*/
|
|
249
|
+
declare function parse<T = unknown>(input: string, schema?: Schema): Result<T>;
|
|
250
|
+
/**
|
|
251
|
+
* Parse with schema, inferring the return type from the schema.
|
|
252
|
+
*
|
|
253
|
+
* @param input - Raw LLM output
|
|
254
|
+
* @param schema - Schema to validate against
|
|
255
|
+
* @returns Typed result
|
|
256
|
+
*
|
|
257
|
+
* @example
|
|
258
|
+
* const schema = { type: 'object', properties: { name: { type: 'string' } } } as const;
|
|
259
|
+
* const result = parseWithSchema('{name: "test"}', schema);
|
|
260
|
+
* if (result.ok) result.data.name; // typed as string
|
|
261
|
+
*/
|
|
262
|
+
declare function parseWithSchema<S extends Schema>(input: string, schema: S): Result<Infer<S>>;
|
|
263
|
+
/**
|
|
264
|
+
* Create a configured instance of llm-json with custom settings.
|
|
265
|
+
* Useful when you need different settings for different use cases.
|
|
266
|
+
*
|
|
267
|
+
* @param config - Configuration options
|
|
268
|
+
* @returns Object with all parse/validate/extract functions
|
|
269
|
+
*
|
|
270
|
+
* @example
|
|
271
|
+
* const parser = createInstance({ maxRepairs: 5 });
|
|
272
|
+
* const result = parser.parse(input);
|
|
273
|
+
*/
|
|
274
|
+
declare function createInstance(config?: LlmJsonConfig): LlmJsonInstance;
|
|
275
|
+
|
|
276
|
+
export { type ArraySchema, type ErrorCode, type ExtractResult, type Failure, type Infer, type LiteralSchema, type LlmJsonConfig, type LlmJsonInstance, type ObjectSchema, type ParseError, type PartialParseOptions, type PartialResult, type PrimitiveSchema, type RepairResult, type RepairRule, type Result, type Schema, type StreamingOptions, type StreamingParser, type Success, type UnionSchema, type ValidationError, type ValidationResult, type Warning, type WarningCode, configure, createInstance, createStreamingParser, extract, extractAll, getConfig, parse, parsePartial, parseStream, parseWithSchema, repair, validate };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
interface Success<T> {
|
|
2
|
+
ok: true;
|
|
3
|
+
data: T;
|
|
4
|
+
warnings?: Warning[];
|
|
5
|
+
}
|
|
6
|
+
interface Failure<T> {
|
|
7
|
+
ok: false;
|
|
8
|
+
error: ParseError;
|
|
9
|
+
partial?: PartialResult<T>;
|
|
10
|
+
}
|
|
11
|
+
type Result<T> = Success<T> | Failure<T>;
|
|
12
|
+
interface Warning {
|
|
13
|
+
code: WarningCode;
|
|
14
|
+
message: string;
|
|
15
|
+
position?: number;
|
|
16
|
+
}
|
|
17
|
+
type WarningCode = 'trailing_comma_removed' | 'single_quotes_replaced' | 'unquoted_key_fixed' | 'missing_comma_added' | 'markdown_fence_stripped' | 'prose_stripped' | 'python_literal_converted' | 'truncated_string_closed' | 'unescaped_quote_fixed';
|
|
18
|
+
interface ParseError {
|
|
19
|
+
code: ErrorCode;
|
|
20
|
+
message: string;
|
|
21
|
+
position?: number;
|
|
22
|
+
context?: string;
|
|
23
|
+
}
|
|
24
|
+
type ErrorCode = 'no_json_found' | 'invalid_json' | 'schema_mismatch' | 'truncated' | 'type_error' | 'missing_required';
|
|
25
|
+
interface PartialResult<T> {
|
|
26
|
+
confidence: 'high' | 'medium' | 'low';
|
|
27
|
+
complete: Partial<T>;
|
|
28
|
+
pending: string[];
|
|
29
|
+
}
|
|
30
|
+
type Schema = PrimitiveSchema | ArraySchema | ObjectSchema | UnionSchema | LiteralSchema;
|
|
31
|
+
interface PrimitiveSchema {
|
|
32
|
+
type: 'string' | 'number' | 'boolean' | 'null';
|
|
33
|
+
enum?: string[];
|
|
34
|
+
}
|
|
35
|
+
interface ArraySchema {
|
|
36
|
+
type: 'array';
|
|
37
|
+
items: Schema;
|
|
38
|
+
minItems?: number;
|
|
39
|
+
maxItems?: number;
|
|
40
|
+
}
|
|
41
|
+
interface ObjectSchema {
|
|
42
|
+
type: 'object';
|
|
43
|
+
properties: Record<string, Schema>;
|
|
44
|
+
required?: string[];
|
|
45
|
+
additionalProperties?: boolean;
|
|
46
|
+
}
|
|
47
|
+
interface UnionSchema {
|
|
48
|
+
type: 'union';
|
|
49
|
+
variants: Schema[];
|
|
50
|
+
}
|
|
51
|
+
interface LiteralSchema {
|
|
52
|
+
type: 'literal';
|
|
53
|
+
value: string | number | boolean | null;
|
|
54
|
+
}
|
|
55
|
+
type Infer<S extends Schema> = S extends PrimitiveSchema ? S['type'] extends 'string' ? string : S['type'] extends 'number' ? number : S['type'] extends 'boolean' ? boolean : null : S extends ArraySchema ? Infer<S['items']>[] : S extends ObjectSchema ? {
|
|
56
|
+
[K in keyof S['properties']]: Infer<S['properties'][K]>;
|
|
57
|
+
} : S extends UnionSchema ? Infer<S['variants'][number]> : S extends LiteralSchema ? S['value'] : unknown;
|
|
58
|
+
interface StreamingOptions {
|
|
59
|
+
schema?: Schema;
|
|
60
|
+
onUpdate?: (result: Result<unknown>) => void;
|
|
61
|
+
onJsonStart?: () => void;
|
|
62
|
+
onJsonComplete?: (data: unknown) => void;
|
|
63
|
+
onWarning?: (warning: Warning) => void;
|
|
64
|
+
}
|
|
65
|
+
interface StreamingParser<T = unknown> {
|
|
66
|
+
write(chunk: string): Result<T>;
|
|
67
|
+
finish(): Result<T>;
|
|
68
|
+
reset(): void;
|
|
69
|
+
readonly buffer: string;
|
|
70
|
+
readonly inJson: boolean;
|
|
71
|
+
readonly depth: number;
|
|
72
|
+
}
|
|
73
|
+
interface ExtractResult {
|
|
74
|
+
json: string | null;
|
|
75
|
+
start: number;
|
|
76
|
+
end: number;
|
|
77
|
+
multiple?: string[];
|
|
78
|
+
}
|
|
79
|
+
interface RepairResult {
|
|
80
|
+
output: string;
|
|
81
|
+
warnings: Warning[];
|
|
82
|
+
valid: boolean;
|
|
83
|
+
}
|
|
84
|
+
interface PartialParseOptions {
|
|
85
|
+
allowPartialStrings?: boolean;
|
|
86
|
+
allowPartialObjects?: boolean;
|
|
87
|
+
allowPartialArrays?: boolean;
|
|
88
|
+
allowPartialNumbers?: boolean;
|
|
89
|
+
onIncompleteString?: (str: string) => string;
|
|
90
|
+
}
|
|
91
|
+
interface ValidationResult<T> {
|
|
92
|
+
ok: boolean;
|
|
93
|
+
data?: T;
|
|
94
|
+
errors?: ValidationError[];
|
|
95
|
+
}
|
|
96
|
+
interface ValidationError {
|
|
97
|
+
path: string;
|
|
98
|
+
code: ErrorCode;
|
|
99
|
+
message: string;
|
|
100
|
+
expected?: string;
|
|
101
|
+
actual?: string;
|
|
102
|
+
}
|
|
103
|
+
interface LlmJsonConfig {
|
|
104
|
+
maxBufferSize?: number;
|
|
105
|
+
maxRepairs?: number;
|
|
106
|
+
customRepairs?: RepairRule[];
|
|
107
|
+
collectWarnings?: boolean;
|
|
108
|
+
}
|
|
109
|
+
interface RepairRule {
|
|
110
|
+
name: string;
|
|
111
|
+
pattern: RegExp;
|
|
112
|
+
replace: string | ((match: string) => string);
|
|
113
|
+
}
|
|
114
|
+
interface LlmJsonInstance {
|
|
115
|
+
parse: <T = unknown>(input: string, schema?: Schema) => Result<T>;
|
|
116
|
+
parseWithSchema: <S extends Schema>(input: string, schema: S) => Result<Infer<S>>;
|
|
117
|
+
createStreamingParser: <T = unknown>(options?: StreamingOptions) => StreamingParser<T>;
|
|
118
|
+
parseStream: <T = unknown>(chunks: AsyncIterable<string> | ReadableStream, schema?: Schema, options?: Omit<StreamingOptions, 'schema'>) => Promise<Result<T>>;
|
|
119
|
+
extract: (input: string) => ExtractResult;
|
|
120
|
+
extractAll: (input: string) => ExtractResult;
|
|
121
|
+
repair: (input: string) => RepairResult;
|
|
122
|
+
parsePartial: <T = unknown>(input: string, options?: PartialParseOptions) => Result<T>;
|
|
123
|
+
validate: <T = unknown>(data: unknown, schema: Schema) => ValidationResult<T>;
|
|
124
|
+
}
|
|
125
|
+
declare function configure(config: LlmJsonConfig): void;
|
|
126
|
+
declare function getConfig(): LlmJsonConfig;
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Extract the first JSON object from text. Strips markdown fences
|
|
130
|
+
* and returns the position of the JSON in the original string.
|
|
131
|
+
*
|
|
132
|
+
* @param input - Text containing JSON (possibly with prose/markdown)
|
|
133
|
+
* @returns Extracted JSON string and position info
|
|
134
|
+
*
|
|
135
|
+
* @example
|
|
136
|
+
* const { json } = extract('Result: {"a": 1}');
|
|
137
|
+
* // json === '{"a": 1}'
|
|
138
|
+
*/
|
|
139
|
+
declare function extract(input: string): ExtractResult;
|
|
140
|
+
/**
|
|
141
|
+
* Extract all JSON objects from text.
|
|
142
|
+
*
|
|
143
|
+
* @param input - Text containing multiple JSON objects
|
|
144
|
+
* @returns All extracted JSON strings
|
|
145
|
+
*
|
|
146
|
+
* @example
|
|
147
|
+
* const { multiple } = extractAll('{"a": 1} and {"b": 2}');
|
|
148
|
+
* // multiple === ['{"a": 1}', '{"b": 2}']
|
|
149
|
+
*/
|
|
150
|
+
declare function extractAll(input: string): ExtractResult;
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Repair common JSON issues from LLM output. Handles:
|
|
154
|
+
* - Single quotes (converts to double, preserves apostrophes in strings)
|
|
155
|
+
* - Unquoted keys
|
|
156
|
+
* - Trailing commas
|
|
157
|
+
* - Python literals (None, True, False)
|
|
158
|
+
* - Comments
|
|
159
|
+
* - Markdown fences
|
|
160
|
+
*
|
|
161
|
+
* @param input - Potentially malformed JSON string
|
|
162
|
+
* @returns Repaired JSON, warnings, and validity flag
|
|
163
|
+
*
|
|
164
|
+
* @example
|
|
165
|
+
* const { output, valid } = repair("{name: 'John',}");
|
|
166
|
+
* // output === '{"name": "John"}'
|
|
167
|
+
* // valid === true
|
|
168
|
+
*/
|
|
169
|
+
declare function repair(input: string): RepairResult;
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Parse potentially incomplete JSON. Useful for streaming when you
|
|
173
|
+
* want manual control over the parsing process.
|
|
174
|
+
*
|
|
175
|
+
* @param input - Possibly incomplete JSON string
|
|
176
|
+
* @param options - Control which types can be partial
|
|
177
|
+
* @returns Best-effort parsed result
|
|
178
|
+
*
|
|
179
|
+
* @example
|
|
180
|
+
* const result = parsePartial('{"users": [{"name": "Al');
|
|
181
|
+
* // result.ok === true
|
|
182
|
+
* // result.data === { users: [{ name: "Al" }] }
|
|
183
|
+
*/
|
|
184
|
+
declare function parsePartial<T = unknown>(input: string, options?: PartialParseOptions): Result<T>;
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Validate data against a schema. Separate from parsing for when
|
|
188
|
+
* you already have parsed data and want to check it.
|
|
189
|
+
*
|
|
190
|
+
* @param data - Parsed data to validate
|
|
191
|
+
* @param schema - Schema to validate against
|
|
192
|
+
* @returns Validation result with errors if invalid
|
|
193
|
+
*
|
|
194
|
+
* @example
|
|
195
|
+
* const result = validate(
|
|
196
|
+
* { name: 'test' },
|
|
197
|
+
* { type: 'object', properties: { name: { type: 'string' } } }
|
|
198
|
+
* );
|
|
199
|
+
* // result.ok === true
|
|
200
|
+
*/
|
|
201
|
+
declare function validate<T = unknown>(data: unknown, schema: Schema): ValidationResult<T>;
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Create a stateful streaming parser. Call `write(chunk)` for each
|
|
205
|
+
* chunk from the stream, then `finish()` when done.
|
|
206
|
+
*
|
|
207
|
+
* @param options - Schema, callbacks for update/warning events
|
|
208
|
+
* @returns StreamingParser with write(), finish(), reset() methods
|
|
209
|
+
*
|
|
210
|
+
* @example
|
|
211
|
+
* const parser = createStreamingParser({ schema });
|
|
212
|
+
* for await (const chunk of llmStream) {
|
|
213
|
+
* const result = parser.write(chunk);
|
|
214
|
+
* if (result.ok) updateUI(result.data);
|
|
215
|
+
* }
|
|
216
|
+
* const final = parser.finish();
|
|
217
|
+
*/
|
|
218
|
+
declare function createStreamingParser<T = unknown>(options?: StreamingOptions): StreamingParser<T>;
|
|
219
|
+
/**
|
|
220
|
+
* Parse an async iterable or ReadableStream of chunks.
|
|
221
|
+
* Convenience wrapper around createStreamingParser.
|
|
222
|
+
*
|
|
223
|
+
* @param chunks - AsyncIterable<string> or ReadableStream
|
|
224
|
+
* @param schema - Optional schema for validation
|
|
225
|
+
* @param options - Streaming callbacks
|
|
226
|
+
* @returns Promise resolving to final result
|
|
227
|
+
*
|
|
228
|
+
* @example
|
|
229
|
+
* const stream = openai.chat.completions.create({ stream: true, ... });
|
|
230
|
+
* const result = await parseStream(stream, schema);
|
|
231
|
+
*/
|
|
232
|
+
declare function parseStream<T = unknown>(chunks: AsyncIterable<string> | ReadableStream, schema?: Schema, options?: Omit<StreamingOptions, 'schema'>): Promise<Result<T>>;
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Parse LLM output into structured data. Extracts JSON from text,
|
|
236
|
+
* repairs common issues (single quotes, trailing commas, etc.),
|
|
237
|
+
* and optionally validates against a schema. Never throws.
|
|
238
|
+
*
|
|
239
|
+
* @param input - Raw LLM output (may contain prose, markdown, etc.)
|
|
240
|
+
* @param schema - Optional schema for validation
|
|
241
|
+
* @returns Result object with `ok` flag, data/error, and optional warnings
|
|
242
|
+
*
|
|
243
|
+
* @example
|
|
244
|
+
* const result = parse('{name: "John", age: 30,}');
|
|
245
|
+
* if (result.ok) {
|
|
246
|
+
* console.log(result.data.name); // "John"
|
|
247
|
+
* }
|
|
248
|
+
*/
|
|
249
|
+
declare function parse<T = unknown>(input: string, schema?: Schema): Result<T>;
|
|
250
|
+
/**
|
|
251
|
+
* Parse with schema, inferring the return type from the schema.
|
|
252
|
+
*
|
|
253
|
+
* @param input - Raw LLM output
|
|
254
|
+
* @param schema - Schema to validate against
|
|
255
|
+
* @returns Typed result
|
|
256
|
+
*
|
|
257
|
+
* @example
|
|
258
|
+
* const schema = { type: 'object', properties: { name: { type: 'string' } } } as const;
|
|
259
|
+
* const result = parseWithSchema('{name: "test"}', schema);
|
|
260
|
+
* if (result.ok) result.data.name; // typed as string
|
|
261
|
+
*/
|
|
262
|
+
declare function parseWithSchema<S extends Schema>(input: string, schema: S): Result<Infer<S>>;
|
|
263
|
+
/**
|
|
264
|
+
* Create a configured instance of llm-json with custom settings.
|
|
265
|
+
* Useful when you need different settings for different use cases.
|
|
266
|
+
*
|
|
267
|
+
* @param config - Configuration options
|
|
268
|
+
* @returns Object with all parse/validate/extract functions
|
|
269
|
+
*
|
|
270
|
+
* @example
|
|
271
|
+
* const parser = createInstance({ maxRepairs: 5 });
|
|
272
|
+
* const result = parser.parse(input);
|
|
273
|
+
*/
|
|
274
|
+
declare function createInstance(config?: LlmJsonConfig): LlmJsonInstance;
|
|
275
|
+
|
|
276
|
+
export { type ArraySchema, type ErrorCode, type ExtractResult, type Failure, type Infer, type LiteralSchema, type LlmJsonConfig, type LlmJsonInstance, type ObjectSchema, type ParseError, type PartialParseOptions, type PartialResult, type PrimitiveSchema, type RepairResult, type RepairRule, type Result, type Schema, type StreamingOptions, type StreamingParser, type Success, type UnionSchema, type ValidationError, type ValidationResult, type Warning, type WarningCode, configure, createInstance, createStreamingParser, extract, extractAll, getConfig, parse, parsePartial, parseStream, parseWithSchema, repair, validate };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
var b={maxBufferSize:1048576,maxRepairs:10,collectWarnings:!0};function v(t){b={...b,...t}}function E(){return b}function P(t,r){let e=0,n=!1,a=!1,i=-1,o=[],u=[];for(let l=0;l<t.length;l++){let m=t[l];if(a){a=!1;continue}if(m==="\\"&&n){a=!0;continue}if(m==='"'){n=!n;continue}if(!n){if(m==="{"||m==="[")e===0&&(i=l),e++;else if((m==="}"||m==="]")&&(e--,e===0&&i>=0)){let p=t.slice(i,l+1);if(o.push(p),u.push({start:i,end:l+1}),!r)break}}}return o.length===0?{json:null,start:0,end:0,multiple:[]}:{json:o[0],start:u[0].start,end:u[0].end,multiple:o}}function _(t){return t.replace(/```(?:json)?\s*\n?([\s\S]*?)\n?```/g,"$1").trim()}function d(t){if(!t)return{json:null,start:0,end:0};let r=_(t);return P(r,!1)}function w(t){if(!t)return{json:null,start:0,end:0,multiple:[]};let r=_(t);return P(r,!0)}function h(t){if(!t)return{output:"",warnings:[],valid:!1};let r=[],e=t.trim();e=e.replace(/```json?\s*\n?/gi,"").replace(/```\s*$/g,""),e=e.replace(/\/\/[^\n]*/g,"").replace(/\/\*[\s\S]*?\*\//g,""),e=T(e,r);{let a=e;e=e.replace(/,\s*([}\]])/g,"$1"),a!==e&&r.push({code:"trailing_comma_removed",message:""})}e=e.replace(/,\s*,/g,",");let n=!1;try{JSON.parse(e),n=!0}catch{}return{output:e,warnings:r,valid:n}}function T(t,r){let e="",n=0,a=!1,i="",o=!1,u=!1;for(;n<t.length;){let l=t[n];if(o){if(o=!1,a&&i==="'"){if(l==="'"){e+="'",n++;continue}else if(l==='"'){e+='\\"',n++;continue}}e+=l,n++;continue}if(l==="\\"&&a){if(i==="'"){o=!0,n++;continue}o=!0,e+=l,n++;continue}if(l==='"'){if(a){if(i==='"')a=!1,i="";else if(i==="'"){e+='\\"',n++;continue}}else a=!0,i='"';e+='"',n++;continue}if(l==="'"){if(a)if(i==='"'){e+="'",n++;continue}else i==="'"&&(a=!1,i="",u=!0);else a=!0,i="'",u=!0;e+='"',n++;continue}if(a){e+=l,n++;continue}if(l==="{"||l===","){for(e+=l,n++;n<t.length&&/\s/.test(t[n]);)e+=t[n++];if(n>=t.length)break;if(t[n]==='"'){a=!0,i='"',e+='"',n++;continue}if(t[n]==="'"){a=!0,i="'",u=!0,e+='"',n++;continue}let m=n;for(;n<t.length&&/[\w$_]/.test(t[n]);)n++;n>m&&!/^(true|false|null|undefined|None|True|False)$/.test(t.slice(m,n))&&(e+='"'+t.slice(m,n)+'"',r.push({code:"unquoted_key_fixed",message:""}));continue}if(t.slice(n,n+4)==="None"){e+="null",r.push({code:"python_literal_converted",message:""}),n+=4;continue}if(t.slice(n,n+4)==="True"){e+="true",r.push({code:"python_literal_converted",message:""}),n+=4;continue}if(t.slice(n,n+5)==="False"){e+="false",r.push({code:"python_literal_converted",message:""}),n+=5;continue}e+=l,n++}return u&&r.push({code:"single_quotes_replaced",message:""}),e}function S(t,r){if(!t)return{ok:!1,error:{code:"no_json_found",message:"Empty input"}};try{return{ok:!0,data:JSON.parse(t)}}catch{}let e={allowPartialStrings:!0,allowPartialObjects:!0,allowPartialArrays:!0,allowPartialNumbers:!1,...r};try{return{ok:!0,data:O(t,e)}}catch(n){return{ok:!1,error:{code:"truncated",message:n.message},partial:{confidence:"medium",complete:{},pending:[]}}}}function O(t,r){let e=0,n=()=>{for(;e<t.length&&/\s/.test(t[e]);)e++},a=()=>t[e],i=()=>t[e++],o=()=>{n();let s=a();if(s==="{")return p();if(s==="[")return m();if(s==='"')return u();if(s==="-"||/[0-9]/.test(s))return l();if(t.slice(e,e+4)==="true")return e+=4,!0;if(t.slice(e,e+5)==="false")return e+=5,!1;if(t.slice(e,e+4)==="null")return e+=4,null;throw new Error("Unexpected token at "+e)},u=()=>{i();let s="",c=!1;for(;e<t.length;){let f=i();if(c){c=!1,s+=f;continue}if(f==="\\"){c=!0;continue}if(f==='"')return s;s+=f}if(r.allowPartialStrings)return s;throw new Error("Unterminated string")},l=()=>{let s=e;for(a()==="-"&&i();e<t.length&&/[0-9]/.test(t[e]);)i();if(a()===".")for(i();e<t.length&&/[0-9]/.test(t[e]);)i();if(a()==="e"||a()==="E")for(i(),(a()==="+"||a()==="-")&&i();e<t.length&&/[0-9]/.test(t[e]);)i();let c=t.slice(s,e);return r.allowPartialNumbers&&c.endsWith(".")&&(c=c.slice(0,-1)),parseFloat(c)},m=()=>{i();let s=[];if(n(),a()==="]")return i(),s;for(;e<t.length;){if(n(),a()==="]")return i(),s;if(a()===","){i();continue}s.push(o())}if(r.allowPartialArrays)return s;throw new Error("Unterminated array")},p=()=>{i();let s={};if(n(),a()==="}")return i(),s;for(;e<t.length;){if(n(),a()==="}")return i(),s;if(a()===","){i();continue}let c=u();if(n(),a()!==":"){if(r.allowPartialObjects)return s;throw new Error("Expected colon")}i(),n(),s[c]=o()}if(r.allowPartialObjects)return s;throw new Error("Unterminated object")};return o()}function g(t,r){let e=x(t,r,"");return e.length===0?{ok:!0,data:t}:{ok:!1,errors:e}}function x(t,r,e){switch(r.type){case"null":return t===null?[]:[{path:e,code:"type_error",message:"Expected null",expected:"null",actual:typeof t}];case"string":return typeof t=="string"?I(t,r.enum,e):[{path:e,code:"type_error",message:"Expected string",expected:"string",actual:typeof t}];case"number":return typeof t=="number"?[]:[{path:e,code:"type_error",message:"Expected number",expected:"number",actual:typeof t}];case"boolean":return typeof t=="boolean"?[]:[{path:e,code:"type_error",message:"Expected boolean",expected:"boolean",actual:typeof t}];case"array":return A(t,r,e);case"object":return J(t,r,e);case"union":return N(t,r,e);case"literal":return t===r.value?[]:[{path:e,code:"type_error",message:`Expected ${r.value}`,expected:String(r.value),actual:String(t)}];default:return[]}}function I(t,r,e){return r&&!r.includes(t)?[{path:e,code:"type_error",message:"Not in enum",expected:r.join("|"),actual:t}]:[]}function A(t,r,e){if(!Array.isArray(t))return[{path:e,code:"type_error",message:"Expected array",expected:"array",actual:typeof t}];let n=[];return r.minItems!==void 0&&t.length<r.minItems&&n.push({path:e,code:"type_error",message:`Min ${r.minItems} items`,expected:`>=${r.minItems}`,actual:String(t.length)}),r.maxItems!==void 0&&t.length>r.maxItems&&n.push({path:e,code:"type_error",message:`Max ${r.maxItems} items`,expected:`<=${r.maxItems}`,actual:String(t.length)}),t.forEach((a,i)=>n.push(...x(a,r.items,`${e}/${i}`))),n}function J(t,r,e){if(typeof t!="object"||t===null||Array.isArray(t))return[{path:e,code:"type_error",message:"Expected object",expected:"object",actual:t===null?"null":Array.isArray(t)?"array":typeof t}];let n=[],a=t,i=r.required||[];for(let o of i)o in a||n.push({path:`${e}/${o}`,code:"missing_required",message:`Missing ${o}`,expected:o});for(let[o,u]of Object.entries(a))o in r.properties?n.push(...x(u,r.properties[o],`${e}/${o}`)):r.additionalProperties===!1&&n.push({path:`${e}/${o}`,code:"type_error",message:`Unknown property ${o}`});return n}function N(t,r,e){for(let n of r.variants)if(x(t,n,e).length===0)return[];return[{path:e,code:"type_error",message:"No union variant matched",expected:"union",actual:String(t)}]}function y(t){let r=t||{},e={allowPartialStrings:!0,allowPartialObjects:!0,allowPartialArrays:!0,allowPartialNumbers:!1},n="",a=0,i=!1,o=!1,u=!1,l=p=>{n+=p;for(let s=n.length-p.length;s<n.length;s++){let c=n[s];if(o){o=!1;continue}if(c==="\\"&&i){o=!0;continue}if(c==='"'){i=!i;continue}i||(c==="{"||c==="["?(a===0&&(u=!0),a++):(c==="}"||c==="]")&&a--)}},m=()=>{let p=d(n);if(!p.json)return{ok:!1,error:{code:"truncated",message:"No JSON found"}};let s=h(p.json),c=S(s.output,e);if(c.ok&&r.schema){let f=g(c.data,r.schema);if(!f.ok)return{ok:!1,error:{code:"schema_mismatch",message:"Schema mismatch",context:JSON.stringify(f.errors)}}}return c};return{get buffer(){return n},get inJson(){return u},get depth(){return a},write(p){return l(p),m()},finish(){return u?a>0||i?{ok:!1,error:{code:"truncated",message:"Incomplete JSON"}}:m():{ok:!1,error:{code:"no_json_found",message:"No JSON found"}}},reset(){n="",a=0,i=!1,o=!1,u=!1}}}function W(t){return t!=null&&typeof t[Symbol.asyncIterator]=="function"}function $(t){return typeof ReadableStream<"u"&&t instanceof ReadableStream}async function*C(t){let r=t.getReader(),e=new TextDecoder;try{for(;;){let{done:n,value:a}=await r.read();if(n)break;yield e.decode(a,{stream:!0})}}finally{r.releaseLock()}}async function k(t,r,e){let n=y({...e,schema:r}),a=$(t)?C(t):W(t)?t:null;if(!a)return{ok:!1,error:{code:"invalid_json",message:"Invalid input"}};for await(let i of a)n.write(i);return n.finish()}function R(t,r){if(!t)return{ok:!1,error:{code:"no_json_found",message:"Empty input"}};let e=d(t);if(!e.json)return{ok:!1,error:{code:"no_json_found",message:"No JSON found"}};try{let a=JSON.parse(e.json);if(r){let i=g(a,r);if(!i.ok)return{ok:!1,error:{code:"schema_mismatch",message:"Schema mismatch",context:JSON.stringify(i.errors)}}}return{ok:!0,data:a}}catch{}let n=h(e.json);try{let a=JSON.parse(n.output);if(r){let i=g(a,r);if(!i.ok)return{ok:!1,error:{code:"schema_mismatch",message:"Schema mismatch",context:JSON.stringify(i.errors)}}}return{ok:!0,data:a,warnings:n.warnings.length?n.warnings:void 0}}catch(a){return{ok:!1,error:{code:"invalid_json",message:a.message}}}}function j(t,r){return R(t,r)}function V(t){return{parse:(r,e)=>R(r,e),parseWithSchema:(r,e)=>j(r,e),createStreamingParser:r=>y(r),parseStream:(r,e,n)=>k(r,e,n),extract:r=>d(r),extractAll:r=>w(r),repair:r=>h(r),parsePartial:(r,e)=>S(r,e),validate:(r,e)=>g(r,e)}}export{v as configure,V as createInstance,y as createStreamingParser,d as extract,w as extractAll,E as getConfig,R as parse,S as parsePartial,k as parseStream,j as parseWithSchema,h as repair,g as validate};
|
package/package.json
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "parse-llm-json",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Extract structured data from LLM output with streaming support",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.cjs",
|
|
7
|
+
"module": "./dist/index.js",
|
|
8
|
+
"types": "./dist/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"types": "./dist/index.d.ts",
|
|
12
|
+
"import": "./dist/index.js",
|
|
13
|
+
"require": "./dist/index.cjs"
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"files": ["dist"],
|
|
17
|
+
"scripts": {
|
|
18
|
+
"build": "tsup src/index.ts --format esm,cjs --dts --clean --minify",
|
|
19
|
+
"build:test": "tsup src/test.ts --format esm --clean --minify && node dist/test.js",
|
|
20
|
+
"test": "npm run build && npm run build:test",
|
|
21
|
+
"typecheck": "tsc --noEmit",
|
|
22
|
+
"prepublishOnly": "npm run build"
|
|
23
|
+
},
|
|
24
|
+
"keywords": [
|
|
25
|
+
"llm",
|
|
26
|
+
"json",
|
|
27
|
+
"parse",
|
|
28
|
+
"streaming",
|
|
29
|
+
"ai",
|
|
30
|
+
"openai",
|
|
31
|
+
"claude",
|
|
32
|
+
"gpt",
|
|
33
|
+
"structured-output",
|
|
34
|
+
"partial-json"
|
|
35
|
+
],
|
|
36
|
+
"license": "MIT",
|
|
37
|
+
"repository": {
|
|
38
|
+
"type": "git",
|
|
39
|
+
"url": "git+https://github.com/tita-n/llm-json.git"
|
|
40
|
+
},
|
|
41
|
+
"bugs": {
|
|
42
|
+
"url": "https://github.com/tita-n/llm-json/issues"
|
|
43
|
+
},
|
|
44
|
+
"homepage": "https://github.com/tita-n/llm-json#readme",
|
|
45
|
+
"devDependencies": {
|
|
46
|
+
"tsup": "^8.0.0",
|
|
47
|
+
"typescript": "^5.3.0"
|
|
48
|
+
}
|
|
49
|
+
}
|