parse-llm-json 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,232 @@
1
+ # llm-json
2
+
3
+ Extract structured data from LLM output. Handles malformed JSON, streaming responses, and partial output — never throws.
4
+
5
+ ## The Problem
6
+
7
+ LLMs return broken JSON constantly:
8
+
9
+ ```javascript
10
+ // What GPT-4 returns:
11
+ {name: 'John', age: 30, "bio": "User said "hello"",}
12
+
13
+ // What JSON.parse sees:
14
+ SyntaxError: Expected double-quoted property name
15
+ ```
16
+
17
+ **Common failures:**
18
+ - Single quotes instead of double quotes
19
+ - Unquoted keys (`{name: ...}` instead of `{"name": ...}`)
20
+ - Trailing commas
21
+ - Apostrophes inside strings (`"user's name"`)
22
+ - Markdown code blocks wrapping the JSON
23
+ - Prose before/after the JSON
24
+ - Python literals (`None`, `True`, `False`)
25
+ - Incomplete/truncated JSON from token limits
26
+
27
+ ## Install
28
+
29
+ ```bash
30
+ npm install llm-json
31
+ ```
32
+
33
+ ## Usage
34
+
35
+ ### Basic Parsing
36
+
37
+ ```typescript
38
+ import { parse } from 'llm-json';
39
+
40
+ // Handles all the broken JSON patterns
41
+ const result = parse(`
42
+ Here's the data you requested:
43
+ \`\`\`json
44
+ {name: 'John', age: 30, interests: ["ai", "llm's"]}
45
+ \`\`\`
46
+ `);
47
+
48
+ if (result.ok) {
49
+ console.log(result.data.name); // "John"
50
+ console.log(result.data.interests); // ["ai", "llm's"]
51
+ } else {
52
+ console.log(result.error.code); // "no_json_found" | "invalid_json" | ...
53
+ }
54
+ ```
55
+
56
+ ### With Schema Validation
57
+
58
+ ```typescript
59
+ import { parse } from 'llm-json';
60
+
61
+ const schema = {
62
+ type: 'object',
63
+ properties: {
64
+ name: { type: 'string' },
65
+ age: { type: 'number' }
66
+ },
67
+ required: ['name']
68
+ };
69
+
70
+ const result = parse('{name: "Alice", age: "wrong"}', schema);
71
+ // result.ok === false
72
+ // result.error.code === 'schema_mismatch'
73
+ ```
74
+
75
+ ### Streaming (SSE / Token-by-Token)
76
+
77
+ ```typescript
78
+ import { createStreamingParser } from 'llm-json';
79
+
80
+ const parser = createStreamingParser({
81
+ schema: { type: 'object', properties: { name: { type: 'string' } } }
82
+ });
83
+
84
+ // Feed chunks as they arrive from OpenAI, Claude, etc.
85
+ for await (const chunk of llmStream) {
86
+ const result = parser.write(chunk);
87
+ if (result.ok) {
88
+ updateUI(result.data); // Show partial results in real-time
89
+ }
90
+ }
91
+
92
+ // Get final result
93
+ const final = parser.finish();
94
+ ```
95
+
96
+ ### Extract Multiple JSON Objects
97
+
98
+ ```typescript
99
+ import { extractAll } from 'llm-json';
100
+
101
+ const text = `
102
+ First user: {"id": 1, "name": "Alice"}
103
+ Second user: {"id": 2, "name": "Bob"}
104
+ `;
105
+
106
+ const { multiple } = extractAll(text);
107
+ // multiple === [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]
108
+ ```
109
+
110
+ ## API
111
+
112
+ ### `parse<T>(input: string, schema?: Schema): Result<T>`
113
+
114
+ Main entry point. Extracts JSON from LLM output, repairs common issues, validates against schema.
115
+
116
+ ```typescript
117
+ const result = parse('{"name": "test"}');
118
+ if (result.ok) {
119
+ result.data; // { name: "test" }
120
+ result.warnings; // Repair warnings, if any
121
+ } else {
122
+ result.error; // { code, message, position?, context? }
123
+ }
124
+ ```
125
+
126
+ ### `createStreamingParser<T>(options?): StreamingParser<T>`
127
+
128
+ Stateful parser for streaming responses. Call `write(chunk)` for each chunk, `finish()` when done.
129
+
130
+ ```typescript
131
+ const parser = createStreamingParser({ schema });
132
+ parser.write(chunk1);
133
+ parser.write(chunk2);
134
+ const result = parser.finish();
135
+ ```
136
+
137
+ ### `repair(input: string): RepairResult`
138
+
139
+ Low-level repair function. Returns repaired JSON string plus warnings.
140
+
141
+ ```typescript
142
+ const { output, warnings, valid } = repair("{name: 'test'}");
143
+ // output === '{"name": "test"}'
144
+ // valid === true
145
+ ```
146
+
147
+ ### `extract(input: string): ExtractResult`
148
+
149
+ Extract first JSON object from text. Strips markdown, prose, etc.
150
+
151
+ ```typescript
152
+ const { json, start, end } = extract('prefix {"a": 1} suffix');
153
+ // json === '{"a": 1}'
154
+ ```
155
+
156
+ ### `extractAll(input: string): ExtractResult`
157
+
158
+ Extract all JSON objects from text.
159
+
160
+ ```typescript
161
+ const { multiple } = extractAll('{"a": 1} text {"b": 2}');
162
+ // multiple === ['{"a": 1}', '{"b": 2}']
163
+ ```
164
+
165
+ ### `parsePartial<T>(input: string, options?): Result<T>`
166
+
167
+ Parse potentially incomplete JSON. Useful for streaming when you want manual control.
168
+
169
+ ```typescript
170
+ const result = parsePartial('{"users": [{"name": "Al');
171
+ // result.ok === true
172
+ // result.data === { users: [{ name: "Al" }] }
173
+ ```
174
+
175
+ ### `validate<T>(data: unknown, schema: Schema): ValidationResult<T>`
176
+
177
+ Validate parsed data against schema. Separate from parsing for when you already have data.
178
+
179
+ ```typescript
180
+ const result = validate({ name: "test" }, { type: 'object', properties: { name: { type: 'string' } } });
181
+ // result.ok === true
182
+ ```
183
+
184
+ ### `parseStream<T>(chunks, schema?, options?): Promise<Result<T>>`
185
+
186
+ Parse an async iterable or ReadableStream.
187
+
188
+ ```typescript
189
+ const result = await parseStream(openaiStream, schema);
190
+ ```
191
+
192
+ ## Schema Format
193
+
194
+ Minimal schema format (not JSON Schema — kept small for bundle size):
195
+
196
+ ```typescript
197
+ type Schema =
198
+ | { type: 'string' | 'number' | 'boolean' | 'null', enum?: string[] }
199
+ | { type: 'array', items: Schema, minItems?: number, maxItems?: number }
200
+ | { type: 'object', properties: Record<string, Schema>, required?: string[], additionalProperties?: boolean }
201
+ | { type: 'union', variants: Schema[] }
202
+ | { type: 'literal', value: string | number | boolean | null }
203
+ ```
204
+
205
+ For complex validation, pipe output through Zod.
206
+
207
+ ## Known Limitations
208
+
209
+ **repair() edge cases:**
210
+ - Cannot fix structural errors (mismatched brackets, completely malformed syntax)
211
+ - May produce incorrect output for deeply nested quote escaping (`"a\"b'c\"d"`)
212
+ - Doesn't handle JavaScript-style template literals
213
+ - Numbers with leading zeros or multiple decimal points not repaired
214
+
215
+ **Schema limitations:**
216
+ - No regex patterns, custom validators, or conditional schemas
217
+ - No recursive schema references
218
+ - No `$ref` or JSON Schema standard support
219
+
220
+ **Streaming:**
221
+ - Requires explicit `finish()` call — no auto-detection of complete JSON
222
+ - Very large strings (>64KB) may cause issues in some environments
223
+
224
+ ## Bundle Size
225
+
226
+ - ESM: 9.00 KB minified
227
+ - CJS: 9.61 KB minified
228
+ - Zero dependencies
229
+
230
+ ## License
231
+
232
+ MIT
package/dist/index.cjs ADDED
@@ -0,0 +1 @@
1
+ "use strict";var R=Object.defineProperty;var I=Object.getOwnPropertyDescriptor;var A=Object.getOwnPropertyNames;var J=Object.prototype.hasOwnProperty;var N=(e,r)=>{for(var t in r)R(e,t,{get:r[t],enumerable:!0})},W=(e,r,t,n)=>{if(r&&typeof r=="object"||typeof r=="function")for(let a of A(r))!J.call(e,a)&&a!==t&&R(e,a,{get:()=>r[a],enumerable:!(n=I(r,a))||n.enumerable});return e};var $=e=>W(R({},"__esModule",{value:!0}),e);var K={};N(K,{configure:()=>j,createInstance:()=>O,createStreamingParser:()=>x,extract:()=>d,extractAll:()=>y,getConfig:()=>v,parse:()=>k,parsePartial:()=>S,parseStream:()=>w,parseWithSchema:()=>_,repair:()=>h,validate:()=>f});module.exports=$(K);var P={maxBufferSize:1048576,maxRepairs:10,collectWarnings:!0};function j(e){P={...P,...e}}function v(){return P}function E(e,r){let t=0,n=!1,a=!1,i=-1,o=[],u=[];for(let l=0;l<e.length;l++){let m=e[l];if(a){a=!1;continue}if(m==="\\"&&n){a=!0;continue}if(m==='"'){n=!n;continue}if(!n){if(m==="{"||m==="[")t===0&&(i=l),t++;else if((m==="}"||m==="]")&&(t--,t===0&&i>=0)){let p=e.slice(i,l+1);if(o.push(p),u.push({start:i,end:l+1}),!r)break}}}return o.length===0?{json:null,start:0,end:0,multiple:[]}:{json:o[0],start:u[0].start,end:u[0].end,multiple:o}}function T(e){return e.replace(/```(?:json)?\s*\n?([\s\S]*?)\n?```/g,"$1").trim()}function d(e){if(!e)return{json:null,start:0,end:0};let r=T(e);return E(r,!1)}function y(e){if(!e)return{json:null,start:0,end:0,multiple:[]};let r=T(e);return E(r,!0)}function h(e){if(!e)return{output:"",warnings:[],valid:!1};let r=[],t=e.trim();t=t.replace(/```json?\s*\n?/gi,"").replace(/```\s*$/g,""),t=t.replace(/\/\/[^\n]*/g,"").replace(/\/\*[\s\S]*?\*\//g,""),t=C(t,r);{let a=t;t=t.replace(/,\s*([}\]])/g,"$1"),a!==t&&r.push({code:"trailing_comma_removed",message:""})}t=t.replace(/,\s*,/g,",");let n=!1;try{JSON.parse(t),n=!0}catch{}return{output:t,warnings:r,valid:n}}function C(e,r){let t="",n=0,a=!1,i="",o=!1,u=!1;for(;n<e.length;){let l=e[n];if(o){if(o=!1,a&&i==="'"){if(l==="'"){t+="'",n++;continue}else if(l==='"'){t+='\\"',n++;continue}}t+=l,n++;continue}if(l==="\\"&&a){if(i==="'"){o=!0,n++;continue}o=!0,t+=l,n++;continue}if(l==='"'){if(a){if(i==='"')a=!1,i="";else if(i==="'"){t+='\\"',n++;continue}}else a=!0,i='"';t+='"',n++;continue}if(l==="'"){if(a)if(i==='"'){t+="'",n++;continue}else i==="'"&&(a=!1,i="",u=!0);else a=!0,i="'",u=!0;t+='"',n++;continue}if(a){t+=l,n++;continue}if(l==="{"||l===","){for(t+=l,n++;n<e.length&&/\s/.test(e[n]);)t+=e[n++];if(n>=e.length)break;if(e[n]==='"'){a=!0,i='"',t+='"',n++;continue}if(e[n]==="'"){a=!0,i="'",u=!0,t+='"',n++;continue}let m=n;for(;n<e.length&&/[\w$_]/.test(e[n]);)n++;n>m&&!/^(true|false|null|undefined|None|True|False)$/.test(e.slice(m,n))&&(t+='"'+e.slice(m,n)+'"',r.push({code:"unquoted_key_fixed",message:""}));continue}if(e.slice(n,n+4)==="None"){t+="null",r.push({code:"python_literal_converted",message:""}),n+=4;continue}if(e.slice(n,n+4)==="True"){t+="true",r.push({code:"python_literal_converted",message:""}),n+=4;continue}if(e.slice(n,n+5)==="False"){t+="false",r.push({code:"python_literal_converted",message:""}),n+=5;continue}t+=l,n++}return u&&r.push({code:"single_quotes_replaced",message:""}),t}function S(e,r){if(!e)return{ok:!1,error:{code:"no_json_found",message:"Empty input"}};try{return{ok:!0,data:JSON.parse(e)}}catch{}let t={allowPartialStrings:!0,allowPartialObjects:!0,allowPartialArrays:!0,allowPartialNumbers:!1,...r};try{return{ok:!0,data:V(e,t)}}catch(n){return{ok:!1,error:{code:"truncated",message:n.message},partial:{confidence:"medium",complete:{},pending:[]}}}}function V(e,r){let t=0,n=()=>{for(;t<e.length&&/\s/.test(e[t]);)t++},a=()=>e[t],i=()=>e[t++],o=()=>{n();let s=a();if(s==="{")return p();if(s==="[")return m();if(s==='"')return u();if(s==="-"||/[0-9]/.test(s))return l();if(e.slice(t,t+4)==="true")return t+=4,!0;if(e.slice(t,t+5)==="false")return t+=5,!1;if(e.slice(t,t+4)==="null")return t+=4,null;throw new Error("Unexpected token at "+t)},u=()=>{i();let s="",c=!1;for(;t<e.length;){let g=i();if(c){c=!1,s+=g;continue}if(g==="\\"){c=!0;continue}if(g==='"')return s;s+=g}if(r.allowPartialStrings)return s;throw new Error("Unterminated string")},l=()=>{let s=t;for(a()==="-"&&i();t<e.length&&/[0-9]/.test(e[t]);)i();if(a()===".")for(i();t<e.length&&/[0-9]/.test(e[t]);)i();if(a()==="e"||a()==="E")for(i(),(a()==="+"||a()==="-")&&i();t<e.length&&/[0-9]/.test(e[t]);)i();let c=e.slice(s,t);return r.allowPartialNumbers&&c.endsWith(".")&&(c=c.slice(0,-1)),parseFloat(c)},m=()=>{i();let s=[];if(n(),a()==="]")return i(),s;for(;t<e.length;){if(n(),a()==="]")return i(),s;if(a()===","){i();continue}s.push(o())}if(r.allowPartialArrays)return s;throw new Error("Unterminated array")},p=()=>{i();let s={};if(n(),a()==="}")return i(),s;for(;t<e.length;){if(n(),a()==="}")return i(),s;if(a()===","){i();continue}let c=u();if(n(),a()!==":"){if(r.allowPartialObjects)return s;throw new Error("Expected colon")}i(),n(),s[c]=o()}if(r.allowPartialObjects)return s;throw new Error("Unterminated object")};return o()}function f(e,r){let t=b(e,r,"");return t.length===0?{ok:!0,data:e}:{ok:!1,errors:t}}function b(e,r,t){switch(r.type){case"null":return e===null?[]:[{path:t,code:"type_error",message:"Expected null",expected:"null",actual:typeof e}];case"string":return typeof e=="string"?L(e,r.enum,t):[{path:t,code:"type_error",message:"Expected string",expected:"string",actual:typeof e}];case"number":return typeof e=="number"?[]:[{path:t,code:"type_error",message:"Expected number",expected:"number",actual:typeof e}];case"boolean":return typeof e=="boolean"?[]:[{path:t,code:"type_error",message:"Expected boolean",expected:"boolean",actual:typeof e}];case"array":return U(e,r,t);case"object":return q(e,r,t);case"union":return F(e,r,t);case"literal":return e===r.value?[]:[{path:t,code:"type_error",message:`Expected ${r.value}`,expected:String(r.value),actual:String(e)}];default:return[]}}function L(e,r,t){return r&&!r.includes(e)?[{path:t,code:"type_error",message:"Not in enum",expected:r.join("|"),actual:e}]:[]}function U(e,r,t){if(!Array.isArray(e))return[{path:t,code:"type_error",message:"Expected array",expected:"array",actual:typeof e}];let n=[];return r.minItems!==void 0&&e.length<r.minItems&&n.push({path:t,code:"type_error",message:`Min ${r.minItems} items`,expected:`>=${r.minItems}`,actual:String(e.length)}),r.maxItems!==void 0&&e.length>r.maxItems&&n.push({path:t,code:"type_error",message:`Max ${r.maxItems} items`,expected:`<=${r.maxItems}`,actual:String(e.length)}),e.forEach((a,i)=>n.push(...b(a,r.items,`${t}/${i}`))),n}function q(e,r,t){if(typeof e!="object"||e===null||Array.isArray(e))return[{path:t,code:"type_error",message:"Expected object",expected:"object",actual:e===null?"null":Array.isArray(e)?"array":typeof e}];let n=[],a=e,i=r.required||[];for(let o of i)o in a||n.push({path:`${t}/${o}`,code:"missing_required",message:`Missing ${o}`,expected:o});for(let[o,u]of Object.entries(a))o in r.properties?n.push(...b(u,r.properties[o],`${t}/${o}`)):r.additionalProperties===!1&&n.push({path:`${t}/${o}`,code:"type_error",message:`Unknown property ${o}`});return n}function F(e,r,t){for(let n of r.variants)if(b(e,n,t).length===0)return[];return[{path:t,code:"type_error",message:"No union variant matched",expected:"union",actual:String(e)}]}function x(e){let r=e||{},t={allowPartialStrings:!0,allowPartialObjects:!0,allowPartialArrays:!0,allowPartialNumbers:!1},n="",a=0,i=!1,o=!1,u=!1,l=p=>{n+=p;for(let s=n.length-p.length;s<n.length;s++){let c=n[s];if(o){o=!1;continue}if(c==="\\"&&i){o=!0;continue}if(c==='"'){i=!i;continue}i||(c==="{"||c==="["?(a===0&&(u=!0),a++):(c==="}"||c==="]")&&a--)}},m=()=>{let p=d(n);if(!p.json)return{ok:!1,error:{code:"truncated",message:"No JSON found"}};let s=h(p.json),c=S(s.output,t);if(c.ok&&r.schema){let g=f(c.data,r.schema);if(!g.ok)return{ok:!1,error:{code:"schema_mismatch",message:"Schema mismatch",context:JSON.stringify(g.errors)}}}return c};return{get buffer(){return n},get inJson(){return u},get depth(){return a},write(p){return l(p),m()},finish(){return u?a>0||i?{ok:!1,error:{code:"truncated",message:"Incomplete JSON"}}:m():{ok:!1,error:{code:"no_json_found",message:"No JSON found"}}},reset(){n="",a=0,i=!1,o=!1,u=!1}}}function M(e){return e!=null&&typeof e[Symbol.asyncIterator]=="function"}function z(e){return typeof ReadableStream<"u"&&e instanceof ReadableStream}async function*B(e){let r=e.getReader(),t=new TextDecoder;try{for(;;){let{done:n,value:a}=await r.read();if(n)break;yield t.decode(a,{stream:!0})}}finally{r.releaseLock()}}async function w(e,r,t){let n=x({...t,schema:r}),a=z(e)?B(e):M(e)?e:null;if(!a)return{ok:!1,error:{code:"invalid_json",message:"Invalid input"}};for await(let i of a)n.write(i);return n.finish()}function k(e,r){if(!e)return{ok:!1,error:{code:"no_json_found",message:"Empty input"}};let t=d(e);if(!t.json)return{ok:!1,error:{code:"no_json_found",message:"No JSON found"}};try{let a=JSON.parse(t.json);if(r){let i=f(a,r);if(!i.ok)return{ok:!1,error:{code:"schema_mismatch",message:"Schema mismatch",context:JSON.stringify(i.errors)}}}return{ok:!0,data:a}}catch{}let n=h(t.json);try{let a=JSON.parse(n.output);if(r){let i=f(a,r);if(!i.ok)return{ok:!1,error:{code:"schema_mismatch",message:"Schema mismatch",context:JSON.stringify(i.errors)}}}return{ok:!0,data:a,warnings:n.warnings.length?n.warnings:void 0}}catch(a){return{ok:!1,error:{code:"invalid_json",message:a.message}}}}function _(e,r){return k(e,r)}function O(e){return{parse:(r,t)=>k(r,t),parseWithSchema:(r,t)=>_(r,t),createStreamingParser:r=>x(r),parseStream:(r,t,n)=>w(r,t,n),extract:r=>d(r),extractAll:r=>y(r),repair:r=>h(r),parsePartial:(r,t)=>S(r,t),validate:(r,t)=>f(r,t)}}0&&(module.exports={configure,createInstance,createStreamingParser,extract,extractAll,getConfig,parse,parsePartial,parseStream,parseWithSchema,repair,validate});
@@ -0,0 +1,276 @@
1
+ interface Success<T> {
2
+ ok: true;
3
+ data: T;
4
+ warnings?: Warning[];
5
+ }
6
+ interface Failure<T> {
7
+ ok: false;
8
+ error: ParseError;
9
+ partial?: PartialResult<T>;
10
+ }
11
+ type Result<T> = Success<T> | Failure<T>;
12
+ interface Warning {
13
+ code: WarningCode;
14
+ message: string;
15
+ position?: number;
16
+ }
17
+ type WarningCode = 'trailing_comma_removed' | 'single_quotes_replaced' | 'unquoted_key_fixed' | 'missing_comma_added' | 'markdown_fence_stripped' | 'prose_stripped' | 'python_literal_converted' | 'truncated_string_closed' | 'unescaped_quote_fixed';
18
+ interface ParseError {
19
+ code: ErrorCode;
20
+ message: string;
21
+ position?: number;
22
+ context?: string;
23
+ }
24
+ type ErrorCode = 'no_json_found' | 'invalid_json' | 'schema_mismatch' | 'truncated' | 'type_error' | 'missing_required';
25
+ interface PartialResult<T> {
26
+ confidence: 'high' | 'medium' | 'low';
27
+ complete: Partial<T>;
28
+ pending: string[];
29
+ }
30
+ type Schema = PrimitiveSchema | ArraySchema | ObjectSchema | UnionSchema | LiteralSchema;
31
+ interface PrimitiveSchema {
32
+ type: 'string' | 'number' | 'boolean' | 'null';
33
+ enum?: string[];
34
+ }
35
+ interface ArraySchema {
36
+ type: 'array';
37
+ items: Schema;
38
+ minItems?: number;
39
+ maxItems?: number;
40
+ }
41
+ interface ObjectSchema {
42
+ type: 'object';
43
+ properties: Record<string, Schema>;
44
+ required?: string[];
45
+ additionalProperties?: boolean;
46
+ }
47
+ interface UnionSchema {
48
+ type: 'union';
49
+ variants: Schema[];
50
+ }
51
+ interface LiteralSchema {
52
+ type: 'literal';
53
+ value: string | number | boolean | null;
54
+ }
55
+ type Infer<S extends Schema> = S extends PrimitiveSchema ? S['type'] extends 'string' ? string : S['type'] extends 'number' ? number : S['type'] extends 'boolean' ? boolean : null : S extends ArraySchema ? Infer<S['items']>[] : S extends ObjectSchema ? {
56
+ [K in keyof S['properties']]: Infer<S['properties'][K]>;
57
+ } : S extends UnionSchema ? Infer<S['variants'][number]> : S extends LiteralSchema ? S['value'] : unknown;
58
+ interface StreamingOptions {
59
+ schema?: Schema;
60
+ onUpdate?: (result: Result<unknown>) => void;
61
+ onJsonStart?: () => void;
62
+ onJsonComplete?: (data: unknown) => void;
63
+ onWarning?: (warning: Warning) => void;
64
+ }
65
+ interface StreamingParser<T = unknown> {
66
+ write(chunk: string): Result<T>;
67
+ finish(): Result<T>;
68
+ reset(): void;
69
+ readonly buffer: string;
70
+ readonly inJson: boolean;
71
+ readonly depth: number;
72
+ }
73
+ interface ExtractResult {
74
+ json: string | null;
75
+ start: number;
76
+ end: number;
77
+ multiple?: string[];
78
+ }
79
+ interface RepairResult {
80
+ output: string;
81
+ warnings: Warning[];
82
+ valid: boolean;
83
+ }
84
+ interface PartialParseOptions {
85
+ allowPartialStrings?: boolean;
86
+ allowPartialObjects?: boolean;
87
+ allowPartialArrays?: boolean;
88
+ allowPartialNumbers?: boolean;
89
+ onIncompleteString?: (str: string) => string;
90
+ }
91
+ interface ValidationResult<T> {
92
+ ok: boolean;
93
+ data?: T;
94
+ errors?: ValidationError[];
95
+ }
96
+ interface ValidationError {
97
+ path: string;
98
+ code: ErrorCode;
99
+ message: string;
100
+ expected?: string;
101
+ actual?: string;
102
+ }
103
+ interface LlmJsonConfig {
104
+ maxBufferSize?: number;
105
+ maxRepairs?: number;
106
+ customRepairs?: RepairRule[];
107
+ collectWarnings?: boolean;
108
+ }
109
+ interface RepairRule {
110
+ name: string;
111
+ pattern: RegExp;
112
+ replace: string | ((match: string) => string);
113
+ }
114
+ interface LlmJsonInstance {
115
+ parse: <T = unknown>(input: string, schema?: Schema) => Result<T>;
116
+ parseWithSchema: <S extends Schema>(input: string, schema: S) => Result<Infer<S>>;
117
+ createStreamingParser: <T = unknown>(options?: StreamingOptions) => StreamingParser<T>;
118
+ parseStream: <T = unknown>(chunks: AsyncIterable<string> | ReadableStream, schema?: Schema, options?: Omit<StreamingOptions, 'schema'>) => Promise<Result<T>>;
119
+ extract: (input: string) => ExtractResult;
120
+ extractAll: (input: string) => ExtractResult;
121
+ repair: (input: string) => RepairResult;
122
+ parsePartial: <T = unknown>(input: string, options?: PartialParseOptions) => Result<T>;
123
+ validate: <T = unknown>(data: unknown, schema: Schema) => ValidationResult<T>;
124
+ }
125
+ declare function configure(config: LlmJsonConfig): void;
126
+ declare function getConfig(): LlmJsonConfig;
127
+
128
+ /**
129
+ * Extract the first JSON object from text. Strips markdown fences
130
+ * and returns the position of the JSON in the original string.
131
+ *
132
+ * @param input - Text containing JSON (possibly with prose/markdown)
133
+ * @returns Extracted JSON string and position info
134
+ *
135
+ * @example
136
+ * const { json } = extract('Result: {"a": 1}');
137
+ * // json === '{"a": 1}'
138
+ */
139
+ declare function extract(input: string): ExtractResult;
140
+ /**
141
+ * Extract all JSON objects from text.
142
+ *
143
+ * @param input - Text containing multiple JSON objects
144
+ * @returns All extracted JSON strings
145
+ *
146
+ * @example
147
+ * const { multiple } = extractAll('{"a": 1} and {"b": 2}');
148
+ * // multiple === ['{"a": 1}', '{"b": 2}']
149
+ */
150
+ declare function extractAll(input: string): ExtractResult;
151
+
152
+ /**
153
+ * Repair common JSON issues from LLM output. Handles:
154
+ * - Single quotes (converts to double, preserves apostrophes in strings)
155
+ * - Unquoted keys
156
+ * - Trailing commas
157
+ * - Python literals (None, True, False)
158
+ * - Comments
159
+ * - Markdown fences
160
+ *
161
+ * @param input - Potentially malformed JSON string
162
+ * @returns Repaired JSON, warnings, and validity flag
163
+ *
164
+ * @example
165
+ * const { output, valid } = repair("{name: 'John',}");
166
+ * // output === '{"name": "John"}'
167
+ * // valid === true
168
+ */
169
+ declare function repair(input: string): RepairResult;
170
+
171
+ /**
172
+ * Parse potentially incomplete JSON. Useful for streaming when you
173
+ * want manual control over the parsing process.
174
+ *
175
+ * @param input - Possibly incomplete JSON string
176
+ * @param options - Control which types can be partial
177
+ * @returns Best-effort parsed result
178
+ *
179
+ * @example
180
+ * const result = parsePartial('{"users": [{"name": "Al');
181
+ * // result.ok === true
182
+ * // result.data === { users: [{ name: "Al" }] }
183
+ */
184
+ declare function parsePartial<T = unknown>(input: string, options?: PartialParseOptions): Result<T>;
185
+
186
+ /**
187
+ * Validate data against a schema. Separate from parsing for when
188
+ * you already have parsed data and want to check it.
189
+ *
190
+ * @param data - Parsed data to validate
191
+ * @param schema - Schema to validate against
192
+ * @returns Validation result with errors if invalid
193
+ *
194
+ * @example
195
+ * const result = validate(
196
+ * { name: 'test' },
197
+ * { type: 'object', properties: { name: { type: 'string' } } }
198
+ * );
199
+ * // result.ok === true
200
+ */
201
+ declare function validate<T = unknown>(data: unknown, schema: Schema): ValidationResult<T>;
202
+
203
+ /**
204
+ * Create a stateful streaming parser. Call `write(chunk)` for each
205
+ * chunk from the stream, then `finish()` when done.
206
+ *
207
+ * @param options - Schema, callbacks for update/warning events
208
+ * @returns StreamingParser with write(), finish(), reset() methods
209
+ *
210
+ * @example
211
+ * const parser = createStreamingParser({ schema });
212
+ * for await (const chunk of llmStream) {
213
+ * const result = parser.write(chunk);
214
+ * if (result.ok) updateUI(result.data);
215
+ * }
216
+ * const final = parser.finish();
217
+ */
218
+ declare function createStreamingParser<T = unknown>(options?: StreamingOptions): StreamingParser<T>;
219
+ /**
220
+ * Parse an async iterable or ReadableStream of chunks.
221
+ * Convenience wrapper around createStreamingParser.
222
+ *
223
+ * @param chunks - AsyncIterable<string> or ReadableStream
224
+ * @param schema - Optional schema for validation
225
+ * @param options - Streaming callbacks
226
+ * @returns Promise resolving to final result
227
+ *
228
+ * @example
229
+ * const stream = openai.chat.completions.create({ stream: true, ... });
230
+ * const result = await parseStream(stream, schema);
231
+ */
232
+ declare function parseStream<T = unknown>(chunks: AsyncIterable<string> | ReadableStream, schema?: Schema, options?: Omit<StreamingOptions, 'schema'>): Promise<Result<T>>;
233
+
234
+ /**
235
+ * Parse LLM output into structured data. Extracts JSON from text,
236
+ * repairs common issues (single quotes, trailing commas, etc.),
237
+ * and optionally validates against a schema. Never throws.
238
+ *
239
+ * @param input - Raw LLM output (may contain prose, markdown, etc.)
240
+ * @param schema - Optional schema for validation
241
+ * @returns Result object with `ok` flag, data/error, and optional warnings
242
+ *
243
+ * @example
244
+ * const result = parse('{name: "John", age: 30,}');
245
+ * if (result.ok) {
246
+ * console.log(result.data.name); // "John"
247
+ * }
248
+ */
249
+ declare function parse<T = unknown>(input: string, schema?: Schema): Result<T>;
250
+ /**
251
+ * Parse with schema, inferring the return type from the schema.
252
+ *
253
+ * @param input - Raw LLM output
254
+ * @param schema - Schema to validate against
255
+ * @returns Typed result
256
+ *
257
+ * @example
258
+ * const schema = { type: 'object', properties: { name: { type: 'string' } } } as const;
259
+ * const result = parseWithSchema('{name: "test"}', schema);
260
+ * if (result.ok) result.data.name; // typed as string
261
+ */
262
+ declare function parseWithSchema<S extends Schema>(input: string, schema: S): Result<Infer<S>>;
263
+ /**
264
+ * Create a configured instance of llm-json with custom settings.
265
+ * Useful when you need different settings for different use cases.
266
+ *
267
+ * @param config - Configuration options
268
+ * @returns Object with all parse/validate/extract functions
269
+ *
270
+ * @example
271
+ * const parser = createInstance({ maxRepairs: 5 });
272
+ * const result = parser.parse(input);
273
+ */
274
+ declare function createInstance(config?: LlmJsonConfig): LlmJsonInstance;
275
+
276
+ export { type ArraySchema, type ErrorCode, type ExtractResult, type Failure, type Infer, type LiteralSchema, type LlmJsonConfig, type LlmJsonInstance, type ObjectSchema, type ParseError, type PartialParseOptions, type PartialResult, type PrimitiveSchema, type RepairResult, type RepairRule, type Result, type Schema, type StreamingOptions, type StreamingParser, type Success, type UnionSchema, type ValidationError, type ValidationResult, type Warning, type WarningCode, configure, createInstance, createStreamingParser, extract, extractAll, getConfig, parse, parsePartial, parseStream, parseWithSchema, repair, validate };
@@ -0,0 +1,276 @@
1
+ interface Success<T> {
2
+ ok: true;
3
+ data: T;
4
+ warnings?: Warning[];
5
+ }
6
+ interface Failure<T> {
7
+ ok: false;
8
+ error: ParseError;
9
+ partial?: PartialResult<T>;
10
+ }
11
+ type Result<T> = Success<T> | Failure<T>;
12
+ interface Warning {
13
+ code: WarningCode;
14
+ message: string;
15
+ position?: number;
16
+ }
17
+ type WarningCode = 'trailing_comma_removed' | 'single_quotes_replaced' | 'unquoted_key_fixed' | 'missing_comma_added' | 'markdown_fence_stripped' | 'prose_stripped' | 'python_literal_converted' | 'truncated_string_closed' | 'unescaped_quote_fixed';
18
+ interface ParseError {
19
+ code: ErrorCode;
20
+ message: string;
21
+ position?: number;
22
+ context?: string;
23
+ }
24
+ type ErrorCode = 'no_json_found' | 'invalid_json' | 'schema_mismatch' | 'truncated' | 'type_error' | 'missing_required';
25
+ interface PartialResult<T> {
26
+ confidence: 'high' | 'medium' | 'low';
27
+ complete: Partial<T>;
28
+ pending: string[];
29
+ }
30
+ type Schema = PrimitiveSchema | ArraySchema | ObjectSchema | UnionSchema | LiteralSchema;
31
+ interface PrimitiveSchema {
32
+ type: 'string' | 'number' | 'boolean' | 'null';
33
+ enum?: string[];
34
+ }
35
+ interface ArraySchema {
36
+ type: 'array';
37
+ items: Schema;
38
+ minItems?: number;
39
+ maxItems?: number;
40
+ }
41
+ interface ObjectSchema {
42
+ type: 'object';
43
+ properties: Record<string, Schema>;
44
+ required?: string[];
45
+ additionalProperties?: boolean;
46
+ }
47
+ interface UnionSchema {
48
+ type: 'union';
49
+ variants: Schema[];
50
+ }
51
+ interface LiteralSchema {
52
+ type: 'literal';
53
+ value: string | number | boolean | null;
54
+ }
55
+ type Infer<S extends Schema> = S extends PrimitiveSchema ? S['type'] extends 'string' ? string : S['type'] extends 'number' ? number : S['type'] extends 'boolean' ? boolean : null : S extends ArraySchema ? Infer<S['items']>[] : S extends ObjectSchema ? {
56
+ [K in keyof S['properties']]: Infer<S['properties'][K]>;
57
+ } : S extends UnionSchema ? Infer<S['variants'][number]> : S extends LiteralSchema ? S['value'] : unknown;
58
+ interface StreamingOptions {
59
+ schema?: Schema;
60
+ onUpdate?: (result: Result<unknown>) => void;
61
+ onJsonStart?: () => void;
62
+ onJsonComplete?: (data: unknown) => void;
63
+ onWarning?: (warning: Warning) => void;
64
+ }
65
+ interface StreamingParser<T = unknown> {
66
+ write(chunk: string): Result<T>;
67
+ finish(): Result<T>;
68
+ reset(): void;
69
+ readonly buffer: string;
70
+ readonly inJson: boolean;
71
+ readonly depth: number;
72
+ }
73
+ interface ExtractResult {
74
+ json: string | null;
75
+ start: number;
76
+ end: number;
77
+ multiple?: string[];
78
+ }
79
+ interface RepairResult {
80
+ output: string;
81
+ warnings: Warning[];
82
+ valid: boolean;
83
+ }
84
+ interface PartialParseOptions {
85
+ allowPartialStrings?: boolean;
86
+ allowPartialObjects?: boolean;
87
+ allowPartialArrays?: boolean;
88
+ allowPartialNumbers?: boolean;
89
+ onIncompleteString?: (str: string) => string;
90
+ }
91
+ interface ValidationResult<T> {
92
+ ok: boolean;
93
+ data?: T;
94
+ errors?: ValidationError[];
95
+ }
96
+ interface ValidationError {
97
+ path: string;
98
+ code: ErrorCode;
99
+ message: string;
100
+ expected?: string;
101
+ actual?: string;
102
+ }
103
+ interface LlmJsonConfig {
104
+ maxBufferSize?: number;
105
+ maxRepairs?: number;
106
+ customRepairs?: RepairRule[];
107
+ collectWarnings?: boolean;
108
+ }
109
+ interface RepairRule {
110
+ name: string;
111
+ pattern: RegExp;
112
+ replace: string | ((match: string) => string);
113
+ }
114
+ interface LlmJsonInstance {
115
+ parse: <T = unknown>(input: string, schema?: Schema) => Result<T>;
116
+ parseWithSchema: <S extends Schema>(input: string, schema: S) => Result<Infer<S>>;
117
+ createStreamingParser: <T = unknown>(options?: StreamingOptions) => StreamingParser<T>;
118
+ parseStream: <T = unknown>(chunks: AsyncIterable<string> | ReadableStream, schema?: Schema, options?: Omit<StreamingOptions, 'schema'>) => Promise<Result<T>>;
119
+ extract: (input: string) => ExtractResult;
120
+ extractAll: (input: string) => ExtractResult;
121
+ repair: (input: string) => RepairResult;
122
+ parsePartial: <T = unknown>(input: string, options?: PartialParseOptions) => Result<T>;
123
+ validate: <T = unknown>(data: unknown, schema: Schema) => ValidationResult<T>;
124
+ }
125
+ declare function configure(config: LlmJsonConfig): void;
126
+ declare function getConfig(): LlmJsonConfig;
127
+
128
+ /**
129
+ * Extract the first JSON object from text. Strips markdown fences
130
+ * and returns the position of the JSON in the original string.
131
+ *
132
+ * @param input - Text containing JSON (possibly with prose/markdown)
133
+ * @returns Extracted JSON string and position info
134
+ *
135
+ * @example
136
+ * const { json } = extract('Result: {"a": 1}');
137
+ * // json === '{"a": 1}'
138
+ */
139
+ declare function extract(input: string): ExtractResult;
140
+ /**
141
+ * Extract all JSON objects from text.
142
+ *
143
+ * @param input - Text containing multiple JSON objects
144
+ * @returns All extracted JSON strings
145
+ *
146
+ * @example
147
+ * const { multiple } = extractAll('{"a": 1} and {"b": 2}');
148
+ * // multiple === ['{"a": 1}', '{"b": 2}']
149
+ */
150
+ declare function extractAll(input: string): ExtractResult;
151
+
152
+ /**
153
+ * Repair common JSON issues from LLM output. Handles:
154
+ * - Single quotes (converts to double, preserves apostrophes in strings)
155
+ * - Unquoted keys
156
+ * - Trailing commas
157
+ * - Python literals (None, True, False)
158
+ * - Comments
159
+ * - Markdown fences
160
+ *
161
+ * @param input - Potentially malformed JSON string
162
+ * @returns Repaired JSON, warnings, and validity flag
163
+ *
164
+ * @example
165
+ * const { output, valid } = repair("{name: 'John',}");
166
+ * // output === '{"name": "John"}'
167
+ * // valid === true
168
+ */
169
+ declare function repair(input: string): RepairResult;
170
+
171
+ /**
172
+ * Parse potentially incomplete JSON. Useful for streaming when you
173
+ * want manual control over the parsing process.
174
+ *
175
+ * @param input - Possibly incomplete JSON string
176
+ * @param options - Control which types can be partial
177
+ * @returns Best-effort parsed result
178
+ *
179
+ * @example
180
+ * const result = parsePartial('{"users": [{"name": "Al');
181
+ * // result.ok === true
182
+ * // result.data === { users: [{ name: "Al" }] }
183
+ */
184
+ declare function parsePartial<T = unknown>(input: string, options?: PartialParseOptions): Result<T>;
185
+
186
+ /**
187
+ * Validate data against a schema. Separate from parsing for when
188
+ * you already have parsed data and want to check it.
189
+ *
190
+ * @param data - Parsed data to validate
191
+ * @param schema - Schema to validate against
192
+ * @returns Validation result with errors if invalid
193
+ *
194
+ * @example
195
+ * const result = validate(
196
+ * { name: 'test' },
197
+ * { type: 'object', properties: { name: { type: 'string' } } }
198
+ * );
199
+ * // result.ok === true
200
+ */
201
+ declare function validate<T = unknown>(data: unknown, schema: Schema): ValidationResult<T>;
202
+
203
+ /**
204
+ * Create a stateful streaming parser. Call `write(chunk)` for each
205
+ * chunk from the stream, then `finish()` when done.
206
+ *
207
+ * @param options - Schema, callbacks for update/warning events
208
+ * @returns StreamingParser with write(), finish(), reset() methods
209
+ *
210
+ * @example
211
+ * const parser = createStreamingParser({ schema });
212
+ * for await (const chunk of llmStream) {
213
+ * const result = parser.write(chunk);
214
+ * if (result.ok) updateUI(result.data);
215
+ * }
216
+ * const final = parser.finish();
217
+ */
218
+ declare function createStreamingParser<T = unknown>(options?: StreamingOptions): StreamingParser<T>;
219
+ /**
220
+ * Parse an async iterable or ReadableStream of chunks.
221
+ * Convenience wrapper around createStreamingParser.
222
+ *
223
+ * @param chunks - AsyncIterable<string> or ReadableStream
224
+ * @param schema - Optional schema for validation
225
+ * @param options - Streaming callbacks
226
+ * @returns Promise resolving to final result
227
+ *
228
+ * @example
229
+ * const stream = openai.chat.completions.create({ stream: true, ... });
230
+ * const result = await parseStream(stream, schema);
231
+ */
232
+ declare function parseStream<T = unknown>(chunks: AsyncIterable<string> | ReadableStream, schema?: Schema, options?: Omit<StreamingOptions, 'schema'>): Promise<Result<T>>;
233
+
234
+ /**
235
+ * Parse LLM output into structured data. Extracts JSON from text,
236
+ * repairs common issues (single quotes, trailing commas, etc.),
237
+ * and optionally validates against a schema. Never throws.
238
+ *
239
+ * @param input - Raw LLM output (may contain prose, markdown, etc.)
240
+ * @param schema - Optional schema for validation
241
+ * @returns Result object with `ok` flag, data/error, and optional warnings
242
+ *
243
+ * @example
244
+ * const result = parse('{name: "John", age: 30,}');
245
+ * if (result.ok) {
246
+ * console.log(result.data.name); // "John"
247
+ * }
248
+ */
249
+ declare function parse<T = unknown>(input: string, schema?: Schema): Result<T>;
250
+ /**
251
+ * Parse with schema, inferring the return type from the schema.
252
+ *
253
+ * @param input - Raw LLM output
254
+ * @param schema - Schema to validate against
255
+ * @returns Typed result
256
+ *
257
+ * @example
258
+ * const schema = { type: 'object', properties: { name: { type: 'string' } } } as const;
259
+ * const result = parseWithSchema('{name: "test"}', schema);
260
+ * if (result.ok) result.data.name; // typed as string
261
+ */
262
+ declare function parseWithSchema<S extends Schema>(input: string, schema: S): Result<Infer<S>>;
263
+ /**
264
+ * Create a configured instance of llm-json with custom settings.
265
+ * Useful when you need different settings for different use cases.
266
+ *
267
+ * @param config - Configuration options
268
+ * @returns Object with all parse/validate/extract functions
269
+ *
270
+ * @example
271
+ * const parser = createInstance({ maxRepairs: 5 });
272
+ * const result = parser.parse(input);
273
+ */
274
+ declare function createInstance(config?: LlmJsonConfig): LlmJsonInstance;
275
+
276
+ export { type ArraySchema, type ErrorCode, type ExtractResult, type Failure, type Infer, type LiteralSchema, type LlmJsonConfig, type LlmJsonInstance, type ObjectSchema, type ParseError, type PartialParseOptions, type PartialResult, type PrimitiveSchema, type RepairResult, type RepairRule, type Result, type Schema, type StreamingOptions, type StreamingParser, type Success, type UnionSchema, type ValidationError, type ValidationResult, type Warning, type WarningCode, configure, createInstance, createStreamingParser, extract, extractAll, getConfig, parse, parsePartial, parseStream, parseWithSchema, repair, validate };
package/dist/index.js ADDED
@@ -0,0 +1 @@
1
+ var b={maxBufferSize:1048576,maxRepairs:10,collectWarnings:!0};function v(t){b={...b,...t}}function E(){return b}function P(t,r){let e=0,n=!1,a=!1,i=-1,o=[],u=[];for(let l=0;l<t.length;l++){let m=t[l];if(a){a=!1;continue}if(m==="\\"&&n){a=!0;continue}if(m==='"'){n=!n;continue}if(!n){if(m==="{"||m==="[")e===0&&(i=l),e++;else if((m==="}"||m==="]")&&(e--,e===0&&i>=0)){let p=t.slice(i,l+1);if(o.push(p),u.push({start:i,end:l+1}),!r)break}}}return o.length===0?{json:null,start:0,end:0,multiple:[]}:{json:o[0],start:u[0].start,end:u[0].end,multiple:o}}function _(t){return t.replace(/```(?:json)?\s*\n?([\s\S]*?)\n?```/g,"$1").trim()}function d(t){if(!t)return{json:null,start:0,end:0};let r=_(t);return P(r,!1)}function w(t){if(!t)return{json:null,start:0,end:0,multiple:[]};let r=_(t);return P(r,!0)}function h(t){if(!t)return{output:"",warnings:[],valid:!1};let r=[],e=t.trim();e=e.replace(/```json?\s*\n?/gi,"").replace(/```\s*$/g,""),e=e.replace(/\/\/[^\n]*/g,"").replace(/\/\*[\s\S]*?\*\//g,""),e=T(e,r);{let a=e;e=e.replace(/,\s*([}\]])/g,"$1"),a!==e&&r.push({code:"trailing_comma_removed",message:""})}e=e.replace(/,\s*,/g,",");let n=!1;try{JSON.parse(e),n=!0}catch{}return{output:e,warnings:r,valid:n}}function T(t,r){let e="",n=0,a=!1,i="",o=!1,u=!1;for(;n<t.length;){let l=t[n];if(o){if(o=!1,a&&i==="'"){if(l==="'"){e+="'",n++;continue}else if(l==='"'){e+='\\"',n++;continue}}e+=l,n++;continue}if(l==="\\"&&a){if(i==="'"){o=!0,n++;continue}o=!0,e+=l,n++;continue}if(l==='"'){if(a){if(i==='"')a=!1,i="";else if(i==="'"){e+='\\"',n++;continue}}else a=!0,i='"';e+='"',n++;continue}if(l==="'"){if(a)if(i==='"'){e+="'",n++;continue}else i==="'"&&(a=!1,i="",u=!0);else a=!0,i="'",u=!0;e+='"',n++;continue}if(a){e+=l,n++;continue}if(l==="{"||l===","){for(e+=l,n++;n<t.length&&/\s/.test(t[n]);)e+=t[n++];if(n>=t.length)break;if(t[n]==='"'){a=!0,i='"',e+='"',n++;continue}if(t[n]==="'"){a=!0,i="'",u=!0,e+='"',n++;continue}let m=n;for(;n<t.length&&/[\w$_]/.test(t[n]);)n++;n>m&&!/^(true|false|null|undefined|None|True|False)$/.test(t.slice(m,n))&&(e+='"'+t.slice(m,n)+'"',r.push({code:"unquoted_key_fixed",message:""}));continue}if(t.slice(n,n+4)==="None"){e+="null",r.push({code:"python_literal_converted",message:""}),n+=4;continue}if(t.slice(n,n+4)==="True"){e+="true",r.push({code:"python_literal_converted",message:""}),n+=4;continue}if(t.slice(n,n+5)==="False"){e+="false",r.push({code:"python_literal_converted",message:""}),n+=5;continue}e+=l,n++}return u&&r.push({code:"single_quotes_replaced",message:""}),e}function S(t,r){if(!t)return{ok:!1,error:{code:"no_json_found",message:"Empty input"}};try{return{ok:!0,data:JSON.parse(t)}}catch{}let e={allowPartialStrings:!0,allowPartialObjects:!0,allowPartialArrays:!0,allowPartialNumbers:!1,...r};try{return{ok:!0,data:O(t,e)}}catch(n){return{ok:!1,error:{code:"truncated",message:n.message},partial:{confidence:"medium",complete:{},pending:[]}}}}function O(t,r){let e=0,n=()=>{for(;e<t.length&&/\s/.test(t[e]);)e++},a=()=>t[e],i=()=>t[e++],o=()=>{n();let s=a();if(s==="{")return p();if(s==="[")return m();if(s==='"')return u();if(s==="-"||/[0-9]/.test(s))return l();if(t.slice(e,e+4)==="true")return e+=4,!0;if(t.slice(e,e+5)==="false")return e+=5,!1;if(t.slice(e,e+4)==="null")return e+=4,null;throw new Error("Unexpected token at "+e)},u=()=>{i();let s="",c=!1;for(;e<t.length;){let f=i();if(c){c=!1,s+=f;continue}if(f==="\\"){c=!0;continue}if(f==='"')return s;s+=f}if(r.allowPartialStrings)return s;throw new Error("Unterminated string")},l=()=>{let s=e;for(a()==="-"&&i();e<t.length&&/[0-9]/.test(t[e]);)i();if(a()===".")for(i();e<t.length&&/[0-9]/.test(t[e]);)i();if(a()==="e"||a()==="E")for(i(),(a()==="+"||a()==="-")&&i();e<t.length&&/[0-9]/.test(t[e]);)i();let c=t.slice(s,e);return r.allowPartialNumbers&&c.endsWith(".")&&(c=c.slice(0,-1)),parseFloat(c)},m=()=>{i();let s=[];if(n(),a()==="]")return i(),s;for(;e<t.length;){if(n(),a()==="]")return i(),s;if(a()===","){i();continue}s.push(o())}if(r.allowPartialArrays)return s;throw new Error("Unterminated array")},p=()=>{i();let s={};if(n(),a()==="}")return i(),s;for(;e<t.length;){if(n(),a()==="}")return i(),s;if(a()===","){i();continue}let c=u();if(n(),a()!==":"){if(r.allowPartialObjects)return s;throw new Error("Expected colon")}i(),n(),s[c]=o()}if(r.allowPartialObjects)return s;throw new Error("Unterminated object")};return o()}function g(t,r){let e=x(t,r,"");return e.length===0?{ok:!0,data:t}:{ok:!1,errors:e}}function x(t,r,e){switch(r.type){case"null":return t===null?[]:[{path:e,code:"type_error",message:"Expected null",expected:"null",actual:typeof t}];case"string":return typeof t=="string"?I(t,r.enum,e):[{path:e,code:"type_error",message:"Expected string",expected:"string",actual:typeof t}];case"number":return typeof t=="number"?[]:[{path:e,code:"type_error",message:"Expected number",expected:"number",actual:typeof t}];case"boolean":return typeof t=="boolean"?[]:[{path:e,code:"type_error",message:"Expected boolean",expected:"boolean",actual:typeof t}];case"array":return A(t,r,e);case"object":return J(t,r,e);case"union":return N(t,r,e);case"literal":return t===r.value?[]:[{path:e,code:"type_error",message:`Expected ${r.value}`,expected:String(r.value),actual:String(t)}];default:return[]}}function I(t,r,e){return r&&!r.includes(t)?[{path:e,code:"type_error",message:"Not in enum",expected:r.join("|"),actual:t}]:[]}function A(t,r,e){if(!Array.isArray(t))return[{path:e,code:"type_error",message:"Expected array",expected:"array",actual:typeof t}];let n=[];return r.minItems!==void 0&&t.length<r.minItems&&n.push({path:e,code:"type_error",message:`Min ${r.minItems} items`,expected:`>=${r.minItems}`,actual:String(t.length)}),r.maxItems!==void 0&&t.length>r.maxItems&&n.push({path:e,code:"type_error",message:`Max ${r.maxItems} items`,expected:`<=${r.maxItems}`,actual:String(t.length)}),t.forEach((a,i)=>n.push(...x(a,r.items,`${e}/${i}`))),n}function J(t,r,e){if(typeof t!="object"||t===null||Array.isArray(t))return[{path:e,code:"type_error",message:"Expected object",expected:"object",actual:t===null?"null":Array.isArray(t)?"array":typeof t}];let n=[],a=t,i=r.required||[];for(let o of i)o in a||n.push({path:`${e}/${o}`,code:"missing_required",message:`Missing ${o}`,expected:o});for(let[o,u]of Object.entries(a))o in r.properties?n.push(...x(u,r.properties[o],`${e}/${o}`)):r.additionalProperties===!1&&n.push({path:`${e}/${o}`,code:"type_error",message:`Unknown property ${o}`});return n}function N(t,r,e){for(let n of r.variants)if(x(t,n,e).length===0)return[];return[{path:e,code:"type_error",message:"No union variant matched",expected:"union",actual:String(t)}]}function y(t){let r=t||{},e={allowPartialStrings:!0,allowPartialObjects:!0,allowPartialArrays:!0,allowPartialNumbers:!1},n="",a=0,i=!1,o=!1,u=!1,l=p=>{n+=p;for(let s=n.length-p.length;s<n.length;s++){let c=n[s];if(o){o=!1;continue}if(c==="\\"&&i){o=!0;continue}if(c==='"'){i=!i;continue}i||(c==="{"||c==="["?(a===0&&(u=!0),a++):(c==="}"||c==="]")&&a--)}},m=()=>{let p=d(n);if(!p.json)return{ok:!1,error:{code:"truncated",message:"No JSON found"}};let s=h(p.json),c=S(s.output,e);if(c.ok&&r.schema){let f=g(c.data,r.schema);if(!f.ok)return{ok:!1,error:{code:"schema_mismatch",message:"Schema mismatch",context:JSON.stringify(f.errors)}}}return c};return{get buffer(){return n},get inJson(){return u},get depth(){return a},write(p){return l(p),m()},finish(){return u?a>0||i?{ok:!1,error:{code:"truncated",message:"Incomplete JSON"}}:m():{ok:!1,error:{code:"no_json_found",message:"No JSON found"}}},reset(){n="",a=0,i=!1,o=!1,u=!1}}}function W(t){return t!=null&&typeof t[Symbol.asyncIterator]=="function"}function $(t){return typeof ReadableStream<"u"&&t instanceof ReadableStream}async function*C(t){let r=t.getReader(),e=new TextDecoder;try{for(;;){let{done:n,value:a}=await r.read();if(n)break;yield e.decode(a,{stream:!0})}}finally{r.releaseLock()}}async function k(t,r,e){let n=y({...e,schema:r}),a=$(t)?C(t):W(t)?t:null;if(!a)return{ok:!1,error:{code:"invalid_json",message:"Invalid input"}};for await(let i of a)n.write(i);return n.finish()}function R(t,r){if(!t)return{ok:!1,error:{code:"no_json_found",message:"Empty input"}};let e=d(t);if(!e.json)return{ok:!1,error:{code:"no_json_found",message:"No JSON found"}};try{let a=JSON.parse(e.json);if(r){let i=g(a,r);if(!i.ok)return{ok:!1,error:{code:"schema_mismatch",message:"Schema mismatch",context:JSON.stringify(i.errors)}}}return{ok:!0,data:a}}catch{}let n=h(e.json);try{let a=JSON.parse(n.output);if(r){let i=g(a,r);if(!i.ok)return{ok:!1,error:{code:"schema_mismatch",message:"Schema mismatch",context:JSON.stringify(i.errors)}}}return{ok:!0,data:a,warnings:n.warnings.length?n.warnings:void 0}}catch(a){return{ok:!1,error:{code:"invalid_json",message:a.message}}}}function j(t,r){return R(t,r)}function V(t){return{parse:(r,e)=>R(r,e),parseWithSchema:(r,e)=>j(r,e),createStreamingParser:r=>y(r),parseStream:(r,e,n)=>k(r,e,n),extract:r=>d(r),extractAll:r=>w(r),repair:r=>h(r),parsePartial:(r,e)=>S(r,e),validate:(r,e)=>g(r,e)}}export{v as configure,V as createInstance,y as createStreamingParser,d as extract,w as extractAll,E as getConfig,R as parse,S as parsePartial,k as parseStream,j as parseWithSchema,h as repair,g as validate};
package/package.json ADDED
@@ -0,0 +1,49 @@
1
+ {
2
+ "name": "parse-llm-json",
3
+ "version": "0.1.0",
4
+ "description": "Extract structured data from LLM output with streaming support",
5
+ "type": "module",
6
+ "main": "./dist/index.cjs",
7
+ "module": "./dist/index.js",
8
+ "types": "./dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "types": "./dist/index.d.ts",
12
+ "import": "./dist/index.js",
13
+ "require": "./dist/index.cjs"
14
+ }
15
+ },
16
+ "files": ["dist"],
17
+ "scripts": {
18
+ "build": "tsup src/index.ts --format esm,cjs --dts --clean --minify",
19
+ "build:test": "tsup src/test.ts --format esm --clean --minify && node dist/test.js",
20
+ "test": "npm run build && npm run build:test",
21
+ "typecheck": "tsc --noEmit",
22
+ "prepublishOnly": "npm run build"
23
+ },
24
+ "keywords": [
25
+ "llm",
26
+ "json",
27
+ "parse",
28
+ "streaming",
29
+ "ai",
30
+ "openai",
31
+ "claude",
32
+ "gpt",
33
+ "structured-output",
34
+ "partial-json"
35
+ ],
36
+ "license": "MIT",
37
+ "repository": {
38
+ "type": "git",
39
+ "url": "git+https://github.com/tita-n/llm-json.git"
40
+ },
41
+ "bugs": {
42
+ "url": "https://github.com/tita-n/llm-json/issues"
43
+ },
44
+ "homepage": "https://github.com/tita-n/llm-json#readme",
45
+ "devDependencies": {
46
+ "tsup": "^8.0.0",
47
+ "typescript": "^5.3.0"
48
+ }
49
+ }