json-from-llm 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/LICENSE +21 -0
- package/README.md +82 -0
- package/dist/index.cjs +211 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +69 -0
- package/dist/index.d.ts +69 -0
- package/dist/index.js +178 -0
- package/dist/index.js.map +1 -0
- package/package.json +74 -0
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented here. The format follows
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and the project adheres
|
|
5
|
+
to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
|
+
|
|
7
|
+
## [0.1.0] - 2026-06-04
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- `extractJson(text, options?)` and `tryExtractJson(text, options?)` — recover a
|
|
12
|
+
JSON value from LLM output wrapped in reasoning/thinking tags, markdown fences
|
|
13
|
+
or prose.
|
|
14
|
+
- Strips `<think>` / `<thinking>` / `<reasoning>` blocks before scanning.
|
|
15
|
+
- String-aware balanced-value scanner and trailing-comma repair (never corrupt
|
|
16
|
+
string contents).
|
|
17
|
+
- `expect: 'object' | 'array' | 'any'` option and `repair` toggle.
|
|
18
|
+
- Low-level exports: `stripReasoning`, `fencedBlocks`, `balancedSpans`,
|
|
19
|
+
`removeTrailingCommas`.
|
|
20
|
+
- Zero runtime dependencies; ESM + CJS builds with type declarations.
|
|
21
|
+
|
|
22
|
+
[0.1.0]: https://github.com/slegarraga/json-from-llm/releases/tag/v0.1.0
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Sebastian Legarraga
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# json-from-llm
|
|
2
|
+
|
|
3
|
+
> Extract valid JSON from an LLM response — even when it's wrapped in reasoning/thinking tags, markdown fences or prose. **Zero dependencies.**
|
|
4
|
+
|
|
5
|
+
You asked for JSON. The model gave you:
|
|
6
|
+
|
|
7
|
+
````text
|
|
8
|
+
<think>
|
|
9
|
+
Let me reason about this. The score should reflect... maybe {draft: 6}?
|
|
10
|
+
</think>
|
|
11
|
+
Sure! Here's the result:
|
|
12
|
+
```json
|
|
13
|
+
{"score": 8, "reason": "clear"}
|
|
14
|
+
```
|
|
15
|
+
Hope that helps!
|
|
16
|
+
````
|
|
17
|
+
|
|
18
|
+
`JSON.parse` throws on all of that. `json-from-llm` returns `{ score: 8, reason: "clear" }`.
|
|
19
|
+
|
|
20
|
+
```ts
|
|
21
|
+
import { extractJson } from 'json-from-llm';
|
|
22
|
+
|
|
23
|
+
const data = extractJson<{ score: number }>(modelOutput);
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Why
|
|
27
|
+
|
|
28
|
+
- **Reasoning-model aware.** Strips `<think>` / `<thinking>` blocks first, so brace-laden reasoning (a real cause of `No object generated` failures with DeepSeek R1, Gemini 2.5 thinking, prompted Claude) never gets mistaken for the payload.
|
|
29
|
+
- **Handles the real wrappers.** Markdown fences (`json` and bare ```), conversational prose before/after, and the JSON sitting bare in the text.
|
|
30
|
+
- **String-aware, never corrupts.** The scanner and the trailing-comma repair both respect string contents — a `}` or `,` inside `"a string value"` is left alone.
|
|
31
|
+
- **Conservative repair.** Removes trailing commas (the most common malformation); it will never rewrite your data.
|
|
32
|
+
- **Two entry points.** `extractJson` throws on failure; `tryExtractJson` returns `{ found }`.
|
|
33
|
+
- **Zero dependencies**, ESM + CJS, fully typed.
|
|
34
|
+
|
|
35
|
+
## Install
|
|
36
|
+
|
|
37
|
+
```sh
|
|
38
|
+
npm install json-from-llm
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## API
|
|
42
|
+
|
|
43
|
+
### `extractJson<T>(text, options?) => T`
|
|
44
|
+
|
|
45
|
+
Returns the extracted JSON value, or throws `JsonExtractionError` if none can be recovered.
|
|
46
|
+
|
|
47
|
+
### `tryExtractJson<T>(text, options?) => { found: true, value: T } | { found: false }`
|
|
48
|
+
|
|
49
|
+
The non-throwing variant.
|
|
50
|
+
|
|
51
|
+
### Options
|
|
52
|
+
|
|
53
|
+
```ts
|
|
54
|
+
interface ExtractOptions {
|
|
55
|
+
repair?: boolean; // remove trailing commas (default true)
|
|
56
|
+
expect?: 'object' | 'array' | 'any'; // restrict the top-level type (default 'any')
|
|
57
|
+
}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
`expect` is handy when prose contains a stray array but you want the object:
|
|
61
|
+
|
|
62
|
+
```ts
|
|
63
|
+
extractJson('[1,2] then the answer {"a":1}', { expect: 'object' }); // { a: 1 }
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Algorithm
|
|
67
|
+
|
|
68
|
+
1. Strip `<think>` / `<thinking>` / `<reasoning>` blocks.
|
|
69
|
+
2. Prefer the contents of fenced `json (or bare `) code blocks.
|
|
70
|
+
3. Otherwise scan for the first balanced `{…}` / `[…]` that parses, string-aware.
|
|
71
|
+
4. If parsing fails, apply conservative repair (trailing commas) and retry.
|
|
72
|
+
|
|
73
|
+
The low-level pieces (`stripReasoning`, `fencedBlocks`, `balancedSpans`, `removeTrailingCommas`) are exported too.
|
|
74
|
+
|
|
75
|
+
## Related
|
|
76
|
+
|
|
77
|
+
- [`tool-schema`](https://www.npmjs.com/package/tool-schema) — turn a JSON Schema into a provider tool/function schema (define the shape you then extract).
|
|
78
|
+
- [`llm-sse`](https://www.npmjs.com/package/llm-sse) · [`llm-messages`](https://www.npmjs.com/package/llm-messages) · [`llm-errors`](https://www.npmjs.com/package/llm-errors) — the provider-portability suite.
|
|
79
|
+
|
|
80
|
+
## License
|
|
81
|
+
|
|
82
|
+
MIT © Sebastian Legarraga
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
JsonExtractionError: () => JsonExtractionError,
|
|
24
|
+
balancedSpans: () => balancedSpans,
|
|
25
|
+
extractJson: () => extractJson,
|
|
26
|
+
fencedBlocks: () => fencedBlocks,
|
|
27
|
+
removeTrailingCommas: () => removeTrailingCommas,
|
|
28
|
+
stripReasoning: () => stripReasoning,
|
|
29
|
+
tryExtractJson: () => tryExtractJson
|
|
30
|
+
});
|
|
31
|
+
module.exports = __toCommonJS(index_exports);
|
|
32
|
+
|
|
33
|
+
// src/repair.ts
|
|
34
|
+
function removeTrailingCommas(json) {
|
|
35
|
+
let out = "";
|
|
36
|
+
let inString = false;
|
|
37
|
+
let escaped = false;
|
|
38
|
+
for (let i = 0; i < json.length; i++) {
|
|
39
|
+
const ch = json[i];
|
|
40
|
+
if (inString) {
|
|
41
|
+
out += ch;
|
|
42
|
+
if (escaped) {
|
|
43
|
+
escaped = false;
|
|
44
|
+
} else if (ch === "\\") {
|
|
45
|
+
escaped = true;
|
|
46
|
+
} else if (ch === '"') {
|
|
47
|
+
inString = false;
|
|
48
|
+
}
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
if (ch === '"') {
|
|
52
|
+
inString = true;
|
|
53
|
+
out += ch;
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
if (ch === ",") {
|
|
57
|
+
let j = i + 1;
|
|
58
|
+
while (j < json.length && (json[j] === " " || json[j] === "\n" || json[j] === "\r" || json[j] === " ")) {
|
|
59
|
+
j++;
|
|
60
|
+
}
|
|
61
|
+
if (json[j] === "}" || json[j] === "]") {
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
out += ch;
|
|
66
|
+
}
|
|
67
|
+
return out;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// src/scan.ts
|
|
71
|
+
function balancedSpans(text) {
|
|
72
|
+
const spans = [];
|
|
73
|
+
let i = 0;
|
|
74
|
+
while (i < text.length) {
|
|
75
|
+
const ch = text[i];
|
|
76
|
+
if (ch === "{" || ch === "[") {
|
|
77
|
+
const end = matchBalanced(text, i);
|
|
78
|
+
if (end !== -1) {
|
|
79
|
+
spans.push(text.slice(i, end));
|
|
80
|
+
i = end;
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
i++;
|
|
85
|
+
}
|
|
86
|
+
return spans;
|
|
87
|
+
}
|
|
88
|
+
function matchBalanced(text, start) {
|
|
89
|
+
let depth = 0;
|
|
90
|
+
let inString = false;
|
|
91
|
+
let escaped = false;
|
|
92
|
+
for (let i = start; i < text.length; i++) {
|
|
93
|
+
const ch = text[i];
|
|
94
|
+
if (inString) {
|
|
95
|
+
if (escaped) {
|
|
96
|
+
escaped = false;
|
|
97
|
+
} else if (ch === "\\") {
|
|
98
|
+
escaped = true;
|
|
99
|
+
} else if (ch === '"') {
|
|
100
|
+
inString = false;
|
|
101
|
+
}
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
if (ch === '"') {
|
|
105
|
+
inString = true;
|
|
106
|
+
} else if (ch === "{" || ch === "[") {
|
|
107
|
+
depth++;
|
|
108
|
+
} else if (ch === "}" || ch === "]") {
|
|
109
|
+
depth--;
|
|
110
|
+
if (depth === 0) {
|
|
111
|
+
return i + 1;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return -1;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// src/strip.ts
|
|
119
|
+
var REASONING_TAGS = /<(think|thinking|reasoning|thought)>[\s\S]*?<\/\1>/gi;
|
|
120
|
+
function stripReasoning(text) {
|
|
121
|
+
return text.replace(REASONING_TAGS, "");
|
|
122
|
+
}
|
|
123
|
+
var FENCE = /```[^\S\n]*([a-zA-Z0-9_+-]*)[^\S\n]*\n?([\s\S]*?)```/g;
|
|
124
|
+
function fencedBlocks(text) {
|
|
125
|
+
const blocks = [];
|
|
126
|
+
FENCE.lastIndex = 0;
|
|
127
|
+
let match;
|
|
128
|
+
while ((match = FENCE.exec(text)) !== null) {
|
|
129
|
+
const lang = match[1].toLowerCase();
|
|
130
|
+
const content = match[2].trim();
|
|
131
|
+
if (content.length > 0 && (lang === "" || lang.includes("json"))) {
|
|
132
|
+
blocks.push(content);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return blocks;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// src/types.ts
|
|
139
|
+
var JsonExtractionError = class extends Error {
|
|
140
|
+
constructor(message, text) {
|
|
141
|
+
super(message);
|
|
142
|
+
this.text = text;
|
|
143
|
+
this.name = "JsonExtractionError";
|
|
144
|
+
}
|
|
145
|
+
text;
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
// src/extract.ts
|
|
149
|
+
function parseCandidate(candidate, repair) {
|
|
150
|
+
try {
|
|
151
|
+
return { ok: true, value: JSON.parse(candidate) };
|
|
152
|
+
} catch {
|
|
153
|
+
}
|
|
154
|
+
if (repair) {
|
|
155
|
+
try {
|
|
156
|
+
return { ok: true, value: JSON.parse(removeTrailingCommas(candidate)) };
|
|
157
|
+
} catch {
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
return { ok: false };
|
|
161
|
+
}
|
|
162
|
+
function matchesExpect(value, expect) {
|
|
163
|
+
if (expect === "any") {
|
|
164
|
+
return true;
|
|
165
|
+
}
|
|
166
|
+
if (expect === "array") {
|
|
167
|
+
return Array.isArray(value);
|
|
168
|
+
}
|
|
169
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
170
|
+
}
|
|
171
|
+
function tryExtractJson(text, options = {}) {
|
|
172
|
+
if (typeof text !== "string" || text.length === 0) {
|
|
173
|
+
return { found: false };
|
|
174
|
+
}
|
|
175
|
+
const repair = options.repair ?? true;
|
|
176
|
+
const expect = options.expect ?? "any";
|
|
177
|
+
const cleaned = stripReasoning(text);
|
|
178
|
+
const candidates = [];
|
|
179
|
+
for (const block of fencedBlocks(cleaned)) {
|
|
180
|
+
candidates.push(block, ...balancedSpans(block));
|
|
181
|
+
}
|
|
182
|
+
candidates.push(...balancedSpans(cleaned));
|
|
183
|
+
for (const candidate of candidates) {
|
|
184
|
+
const parsed = parseCandidate(candidate, repair);
|
|
185
|
+
if (parsed.ok && matchesExpect(parsed.value, expect)) {
|
|
186
|
+
return { found: true, value: parsed.value };
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
return { found: false };
|
|
190
|
+
}
|
|
191
|
+
function extractJson(text, options = {}) {
|
|
192
|
+
const result = tryExtractJson(text, options);
|
|
193
|
+
if (!result.found) {
|
|
194
|
+
throw new JsonExtractionError(
|
|
195
|
+
"No JSON value could be extracted from the text.",
|
|
196
|
+
text
|
|
197
|
+
);
|
|
198
|
+
}
|
|
199
|
+
return result.value;
|
|
200
|
+
}
|
|
201
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
202
|
+
0 && (module.exports = {
|
|
203
|
+
JsonExtractionError,
|
|
204
|
+
balancedSpans,
|
|
205
|
+
extractJson,
|
|
206
|
+
fencedBlocks,
|
|
207
|
+
removeTrailingCommas,
|
|
208
|
+
stripReasoning,
|
|
209
|
+
tryExtractJson
|
|
210
|
+
});
|
|
211
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/repair.ts","../src/scan.ts","../src/strip.ts","../src/types.ts","../src/extract.ts"],"sourcesContent":["export { extractJson, tryExtractJson } from './extract.ts';\nexport { stripReasoning, fencedBlocks } from './strip.ts';\nexport { balancedSpans } from './scan.ts';\nexport { removeTrailingCommas } from './repair.ts';\nexport { JsonExtractionError } from './types.ts';\nexport type { ExtractOptions, ExtractResult } from './types.ts';\n","/**\n * Remove trailing commas (`{\"a\":1,}` → `{\"a\":1}`, `[1,2,]` → `[1,2]`), which\n * models emit frequently. String-aware: a comma inside a string value is never\n * touched, so this can only ever fix structure, never corrupt content.\n */\nexport function removeTrailingCommas(json: string): string {\n let out = '';\n let inString = false;\n let escaped = false;\n\n for (let i = 0; i < json.length; i++) {\n const ch = json[i];\n\n if (inString) {\n out += ch;\n if (escaped) {\n escaped = false;\n } else if (ch === '\\\\') {\n escaped = true;\n } else if (ch === '\"') {\n inString = false;\n }\n continue;\n }\n\n if (ch === '\"') {\n inString = true;\n out += ch;\n continue;\n }\n\n if (ch === ',') {\n let j = i + 1;\n while (\n j < json.length &&\n (json[j] === ' ' ||\n json[j] === '\\n' ||\n json[j] === '\\r' ||\n json[j] === '\\t')\n ) {\n j++;\n }\n if (json[j] === '}' || json[j] === ']') {\n continue; // drop the trailing comma\n }\n }\n\n out += ch;\n }\n\n return out;\n}\n","/**\n * Find the substrings of complete, balanced JSON objects/arrays in `text`,\n * in document order. String-aware: braces and brackets inside JSON strings do\n * not affect nesting, so prose like `\"the } char\"` won't break the scan.\n */\nexport function balancedSpans(text: string): string[] {\n const spans: string[] = [];\n let i = 0;\n while (i < text.length) {\n const ch = text[i];\n if (ch === '{' || ch === '[') {\n const end = matchBalanced(text, i);\n if (end !== -1) {\n spans.push(text.slice(i, end));\n i = end;\n continue;\n }\n }\n i++;\n }\n return spans;\n}\n\n/** Return the index just past the balanced value starting at `start`, or -1. */\nfunction matchBalanced(text: string, start: number): number {\n let depth = 0;\n let inString = false;\n let escaped = false;\n\n for (let i = start; i < text.length; i++) {\n const ch = text[i];\n\n if (inString) {\n if (escaped) {\n escaped = false;\n } else if (ch === '\\\\') {\n escaped = true;\n } else if (ch === '\"') {\n inString = false;\n }\n continue;\n }\n\n if (ch === '\"') {\n inString = true;\n } else if (ch === '{' || ch === '[') {\n depth++;\n } else if (ch === '}' || ch === ']') {\n depth--;\n if (depth === 0) {\n return i + 1;\n }\n }\n }\n\n return -1;\n}\n","/**\n * Remove model \"thinking\" / reasoning blocks. Reasoning models (DeepSeek R1,\n * Qwen, and prompted Claude/Gemini setups) emit `<think>…</think>` or\n * `<thinking>…</thinking>` before the answer, and that text frequently contains\n * brace-laden prose that would otherwise be mistaken for the payload.\n */\nconst REASONING_TAGS = /<(think|thinking|reasoning|thought)>[\\s\\S]*?<\\/\\1>/gi;\n\nexport function stripReasoning(text: string): string {\n return text.replace(REASONING_TAGS, '');\n}\n\n/**\n * Return the inner contents of fenced code blocks that could hold JSON: blocks\n * tagged ```json / ```jsonc / ```json5, or untagged ``` blocks. Other languages\n * (```python, ```ts) are skipped — they won't contain the answer JSON.\n */\nconst FENCE = /```[^\\S\\n]*([a-zA-Z0-9_+-]*)[^\\S\\n]*\\n?([\\s\\S]*?)```/g;\n\nexport function fencedBlocks(text: string): string[] {\n const blocks: string[] = [];\n FENCE.lastIndex = 0;\n let match: RegExpExecArray | null;\n while ((match = FENCE.exec(text)) !== null) {\n const lang = match[1].toLowerCase();\n const content = match[2].trim();\n if (content.length > 0 && (lang === '' || lang.includes('json'))) {\n blocks.push(content);\n }\n }\n return blocks;\n}\n","/** Options for {@link extractJson} and {@link tryExtractJson}. */\nexport interface ExtractOptions {\n /**\n * Apply conservative, string-aware repairs before parsing — currently the\n * removal of trailing commas, which models emit often. Never rewrites string\n * contents. Default `true`.\n */\n repair?: boolean;\n /**\n * Restrict which top-level JSON value to accept: an `'object'`, an `'array'`,\n * or `'any'` (the default).\n */\n expect?: 'object' | 'array' | 'any';\n}\n\n/** The result of {@link tryExtractJson}. */\nexport type ExtractResult<T> =\n | { found: true; value: T }\n | { found: false; value?: undefined };\n\n/** Thrown by {@link extractJson} when no JSON value can be recovered. */\nexport class JsonExtractionError extends Error {\n constructor(\n message: string,\n /** The original text that no JSON could be extracted from. */\n public readonly text: string,\n ) {\n super(message);\n this.name = 'JsonExtractionError';\n }\n}\n","import { removeTrailingCommas } from './repair.ts';\nimport { balancedSpans } from './scan.ts';\nimport { fencedBlocks, stripReasoning } from './strip.ts';\nimport { JsonExtractionError } from './types.ts';\nimport type { ExtractOptions, ExtractResult } from './types.ts';\n\nfunction parseCandidate(\n candidate: string,\n repair: boolean,\n): { ok: true; value: unknown } | { ok: false } {\n try {\n return { ok: true, value: JSON.parse(candidate) };\n } catch {\n // fall through to repair\n }\n if (repair) {\n try {\n return { ok: true, value: JSON.parse(removeTrailingCommas(candidate)) };\n } catch {\n // unrecoverable\n }\n }\n return { ok: false };\n}\n\nfunction matchesExpect(\n value: unknown,\n expect: 'object' | 'array' | 'any',\n): boolean {\n if (expect === 'any') {\n return true;\n }\n if (expect === 'array') {\n return Array.isArray(value);\n }\n return typeof value === 'object' && value !== null && !Array.isArray(value);\n}\n\n/**\n * Extract a JSON value from LLM output without throwing.\n *\n * Strips `<think>` / `<thinking>` reasoning blocks, prefers fenced ```json\n * code blocks, then scans for the first balanced object/array that parses\n * (applying conservative repair). Returns `{ found: false }` if nothing parses.\n *\n * @example\n * ```ts\n * const r = tryExtractJson<{ score: number }>('<think>...</think>\\n{\"score\":7}');\n * if (r.found) console.log(r.value.score); // 7\n * ```\n */\nexport function tryExtractJson<T = unknown>(\n text: string,\n options: ExtractOptions = {},\n): ExtractResult<T> {\n if (typeof text !== 'string' || text.length === 0) {\n return { found: false };\n }\n\n const repair = options.repair ?? true;\n const expect = options.expect ?? 'any';\n const cleaned = stripReasoning(text);\n\n // Candidate substrings, highest confidence first: fenced blocks (and any\n // balanced values inside them), then balanced values anywhere in the text.\n const candidates: string[] = [];\n for (const block of fencedBlocks(cleaned)) {\n candidates.push(block, ...balancedSpans(block));\n }\n candidates.push(...balancedSpans(cleaned));\n\n for (const candidate of candidates) {\n const parsed = parseCandidate(candidate, repair);\n if (parsed.ok && matchesExpect(parsed.value, expect)) {\n return { found: true, value: parsed.value as T };\n }\n }\n return { found: false };\n}\n\n/**\n * Extract a JSON value from LLM output, throwing {@link JsonExtractionError}\n * if none can be recovered. See {@link tryExtractJson} for the algorithm.\n */\nexport function extractJson<T = unknown>(\n text: string,\n options: ExtractOptions = {},\n): T {\n const result = tryExtractJson<T>(text, options);\n if (!result.found) {\n throw new JsonExtractionError(\n 'No JSON value could be extracted from the text.',\n text,\n );\n }\n return result.value;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACKO,SAAS,qBAAqB,MAAsB;AACzD,MAAI,MAAM;AACV,MAAI,WAAW;AACf,MAAI,UAAU;AAEd,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,KAAK,KAAK,CAAC;AAEjB,QAAI,UAAU;AACZ,aAAO;AACP,UAAI,SAAS;AACX,kBAAU;AAAA,MACZ,WAAW,OAAO,MAAM;AACtB,kBAAU;AAAA,MACZ,WAAW,OAAO,KAAK;AACrB,mBAAW;AAAA,MACb;AACA;AAAA,IACF;AAEA,QAAI,OAAO,KAAK;AACd,iBAAW;AACX,aAAO;AACP;AAAA,IACF;AAEA,QAAI,OAAO,KAAK;AACd,UAAI,IAAI,IAAI;AACZ,aACE,IAAI,KAAK,WACR,KAAK,CAAC,MAAM,OACX,KAAK,CAAC,MAAM,QACZ,KAAK,CAAC,MAAM,QACZ,KAAK,CAAC,MAAM,MACd;AACA;AAAA,MACF;AACA,UAAI,KAAK,CAAC,MAAM,OAAO,KAAK,CAAC,MAAM,KAAK;AACtC;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;;;AC9CO,SAAS,cAAc,MAAwB;AACpD,QAAM,QAAkB,CAAC;AACzB,MAAI,IAAI;AACR,SAAO,IAAI,KAAK,QAAQ;AACtB,UAAM,KAAK,KAAK,CAAC;AACjB,QAAI,OAAO,OAAO,OAAO,KAAK;AAC5B,YAAM,MAAM,cAAc,MAAM,CAAC;AACjC,UAAI,QAAQ,IAAI;AACd,cAAM,KAAK,KAAK,MAAM,GAAG,GAAG,CAAC;AAC7B,YAAI;AACJ;AAAA,MACF;AAAA,IACF;AACA;AAAA,EACF;AACA,SAAO;AACT;AAGA,SAAS,cAAc,MAAc,OAAuB;AAC1D,MAAI,QAAQ;AACZ,MAAI,WAAW;AACf,MAAI,UAAU;AAEd,WAAS,IAAI,OAAO,IAAI,KAAK,QAAQ,KAAK;AACxC,UAAM,KAAK,KAAK,CAAC;AAEjB,QAAI,UAAU;AACZ,UAAI,SAAS;AACX,kBAAU;AAAA,MACZ,WAAW,OAAO,MAAM;AACtB,kBAAU;AAAA,MACZ,WAAW,OAAO,KAAK;AACrB,mBAAW;AAAA,MACb;AACA;AAAA,IACF;AAEA,QAAI,OAAO,KAAK;AACd,iBAAW;AAAA,IACb,WAAW,OAAO,OAAO,OAAO,KAAK;AACnC;AAAA,IACF,WAAW,OAAO,OAAO,OAAO,KAAK;AACnC;AACA,UAAI,UAAU,GAAG;AACf,eAAO,IAAI;AAAA,MACb;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;;;AClDA,IAAM,iBAAiB;AAEhB,SAAS,eAAe,MAAsB;AACnD,SAAO,KAAK,QAAQ,gBAAgB,EAAE;AACxC;AAOA,IAAM,QAAQ;AAEP,SAAS,aAAa,MAAwB;AACnD,QAAM,SAAmB,CAAC;AAC1B,QAAM,YAAY;AAClB,MAAI;AACJ,UAAQ,QAAQ,MAAM,KAAK,IAAI,OAAO,MAAM;AAC1C,UAAM,OAAO,MAAM,CAAC,EAAE,YAAY;AAClC,UAAM,UAAU,MAAM,CAAC,EAAE,KAAK;AAC9B,QAAI,QAAQ,SAAS,MAAM,SAAS,MAAM,KAAK,SAAS,MAAM,IAAI;AAChE,aAAO,KAAK,OAAO;AAAA,IACrB;AAAA,EACF;AACA,SAAO;AACT;;;ACVO,IAAM,sBAAN,cAAkC,MAAM;AAAA,EAC7C,YACE,SAEgB,MAChB;AACA,UAAM,OAAO;AAFG;AAGhB,SAAK,OAAO;AAAA,EACd;AAAA,EAJkB;AAKpB;;;ACxBA,SAAS,eACP,WACA,QAC8C;AAC9C,MAAI;AACF,WAAO,EAAE,IAAI,MAAM,OAAO,KAAK,MAAM,SAAS,EAAE;AAAA,EAClD,QAAQ;AAAA,EAER;AACA,MAAI,QAAQ;AACV,QAAI;AACF,aAAO,EAAE,IAAI,MAAM,OAAO,KAAK,MAAM,qBAAqB,SAAS,CAAC,EAAE;AAAA,IACxE,QAAQ;AAAA,IAER;AAAA,EACF;AACA,SAAO,EAAE,IAAI,MAAM;AACrB;AAEA,SAAS,cACP,OACA,QACS;AACT,MAAI,WAAW,OAAO;AACpB,WAAO;AAAA,EACT;AACA,MAAI,WAAW,SAAS;AACtB,WAAO,MAAM,QAAQ,KAAK;AAAA,EAC5B;AACA,SAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,CAAC,MAAM,QAAQ,KAAK;AAC5E;AAeO,SAAS,eACd,MACA,UAA0B,CAAC,GACT;AAClB,MAAI,OAAO,SAAS,YAAY,KAAK,WAAW,GAAG;AACjD,WAAO,EAAE,OAAO,MAAM;AAAA,EACxB;AAEA,QAAM,SAAS,QAAQ,UAAU;AACjC,QAAM,SAAS,QAAQ,UAAU;AACjC,QAAM,UAAU,eAAe,IAAI;AAInC,QAAM,aAAuB,CAAC;AAC9B,aAAW,SAAS,aAAa,OAAO,GAAG;AACzC,eAAW,KAAK,OAAO,GAAG,cAAc,KAAK,CAAC;AAAA,EAChD;AACA,aAAW,KAAK,GAAG,cAAc,OAAO,CAAC;AAEzC,aAAW,aAAa,YAAY;AAClC,UAAM,SAAS,eAAe,WAAW,MAAM;AAC/C,QAAI,OAAO,MAAM,cAAc,OAAO,OAAO,MAAM,GAAG;AACpD,aAAO,EAAE,OAAO,MAAM,OAAO,OAAO,MAAW;AAAA,IACjD;AAAA,EACF;AACA,SAAO,EAAE,OAAO,MAAM;AACxB;AAMO,SAAS,YACd,MACA,UAA0B,CAAC,GACxB;AACH,QAAM,SAAS,eAAkB,MAAM,OAAO;AAC9C,MAAI,CAAC,OAAO,OAAO;AACjB,UAAM,IAAI;AAAA,MACR;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACA,SAAO,OAAO;AAChB;","names":[]}
|
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/** Options for {@link extractJson} and {@link tryExtractJson}. */
|
|
2
|
+
interface ExtractOptions {
|
|
3
|
+
/**
|
|
4
|
+
* Apply conservative, string-aware repairs before parsing — currently the
|
|
5
|
+
* removal of trailing commas, which models emit often. Never rewrites string
|
|
6
|
+
* contents. Default `true`.
|
|
7
|
+
*/
|
|
8
|
+
repair?: boolean;
|
|
9
|
+
/**
|
|
10
|
+
* Restrict which top-level JSON value to accept: an `'object'`, an `'array'`,
|
|
11
|
+
* or `'any'` (the default).
|
|
12
|
+
*/
|
|
13
|
+
expect?: 'object' | 'array' | 'any';
|
|
14
|
+
}
|
|
15
|
+
/** The result of {@link tryExtractJson}. */
|
|
16
|
+
type ExtractResult<T> = {
|
|
17
|
+
found: true;
|
|
18
|
+
value: T;
|
|
19
|
+
} | {
|
|
20
|
+
found: false;
|
|
21
|
+
value?: undefined;
|
|
22
|
+
};
|
|
23
|
+
/** Thrown by {@link extractJson} when no JSON value can be recovered. */
|
|
24
|
+
declare class JsonExtractionError extends Error {
|
|
25
|
+
/** The original text that no JSON could be extracted from. */
|
|
26
|
+
readonly text: string;
|
|
27
|
+
constructor(message: string,
|
|
28
|
+
/** The original text that no JSON could be extracted from. */
|
|
29
|
+
text: string);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Extract a JSON value from LLM output without throwing.
|
|
34
|
+
*
|
|
35
|
+
* Strips `<think>` / `<thinking>` reasoning blocks, prefers fenced ```json
|
|
36
|
+
* code blocks, then scans for the first balanced object/array that parses
|
|
37
|
+
* (applying conservative repair). Returns `{ found: false }` if nothing parses.
|
|
38
|
+
*
|
|
39
|
+
* @example
|
|
40
|
+
* ```ts
|
|
41
|
+
* const r = tryExtractJson<{ score: number }>('<think>...</think>\n{"score":7}');
|
|
42
|
+
* if (r.found) console.log(r.value.score); // 7
|
|
43
|
+
* ```
|
|
44
|
+
*/
|
|
45
|
+
declare function tryExtractJson<T = unknown>(text: string, options?: ExtractOptions): ExtractResult<T>;
|
|
46
|
+
/**
|
|
47
|
+
* Extract a JSON value from LLM output, throwing {@link JsonExtractionError}
|
|
48
|
+
* if none can be recovered. See {@link tryExtractJson} for the algorithm.
|
|
49
|
+
*/
|
|
50
|
+
declare function extractJson<T = unknown>(text: string, options?: ExtractOptions): T;
|
|
51
|
+
|
|
52
|
+
declare function stripReasoning(text: string): string;
|
|
53
|
+
declare function fencedBlocks(text: string): string[];
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Find the substrings of complete, balanced JSON objects/arrays in `text`,
|
|
57
|
+
* in document order. String-aware: braces and brackets inside JSON strings do
|
|
58
|
+
* not affect nesting, so prose like `"the } char"` won't break the scan.
|
|
59
|
+
*/
|
|
60
|
+
declare function balancedSpans(text: string): string[];
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Remove trailing commas (`{"a":1,}` → `{"a":1}`, `[1,2,]` → `[1,2]`), which
|
|
64
|
+
* models emit frequently. String-aware: a comma inside a string value is never
|
|
65
|
+
* touched, so this can only ever fix structure, never corrupt content.
|
|
66
|
+
*/
|
|
67
|
+
declare function removeTrailingCommas(json: string): string;
|
|
68
|
+
|
|
69
|
+
export { type ExtractOptions, type ExtractResult, JsonExtractionError, balancedSpans, extractJson, fencedBlocks, removeTrailingCommas, stripReasoning, tryExtractJson };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/** Options for {@link extractJson} and {@link tryExtractJson}. */
|
|
2
|
+
interface ExtractOptions {
|
|
3
|
+
/**
|
|
4
|
+
* Apply conservative, string-aware repairs before parsing — currently the
|
|
5
|
+
* removal of trailing commas, which models emit often. Never rewrites string
|
|
6
|
+
* contents. Default `true`.
|
|
7
|
+
*/
|
|
8
|
+
repair?: boolean;
|
|
9
|
+
/**
|
|
10
|
+
* Restrict which top-level JSON value to accept: an `'object'`, an `'array'`,
|
|
11
|
+
* or `'any'` (the default).
|
|
12
|
+
*/
|
|
13
|
+
expect?: 'object' | 'array' | 'any';
|
|
14
|
+
}
|
|
15
|
+
/** The result of {@link tryExtractJson}. */
|
|
16
|
+
type ExtractResult<T> = {
|
|
17
|
+
found: true;
|
|
18
|
+
value: T;
|
|
19
|
+
} | {
|
|
20
|
+
found: false;
|
|
21
|
+
value?: undefined;
|
|
22
|
+
};
|
|
23
|
+
/** Thrown by {@link extractJson} when no JSON value can be recovered. */
|
|
24
|
+
declare class JsonExtractionError extends Error {
|
|
25
|
+
/** The original text that no JSON could be extracted from. */
|
|
26
|
+
readonly text: string;
|
|
27
|
+
constructor(message: string,
|
|
28
|
+
/** The original text that no JSON could be extracted from. */
|
|
29
|
+
text: string);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Extract a JSON value from LLM output without throwing.
|
|
34
|
+
*
|
|
35
|
+
* Strips `<think>` / `<thinking>` reasoning blocks, prefers fenced ```json
|
|
36
|
+
* code blocks, then scans for the first balanced object/array that parses
|
|
37
|
+
* (applying conservative repair). Returns `{ found: false }` if nothing parses.
|
|
38
|
+
*
|
|
39
|
+
* @example
|
|
40
|
+
* ```ts
|
|
41
|
+
* const r = tryExtractJson<{ score: number }>('<think>...</think>\n{"score":7}');
|
|
42
|
+
* if (r.found) console.log(r.value.score); // 7
|
|
43
|
+
* ```
|
|
44
|
+
*/
|
|
45
|
+
declare function tryExtractJson<T = unknown>(text: string, options?: ExtractOptions): ExtractResult<T>;
|
|
46
|
+
/**
|
|
47
|
+
* Extract a JSON value from LLM output, throwing {@link JsonExtractionError}
|
|
48
|
+
* if none can be recovered. See {@link tryExtractJson} for the algorithm.
|
|
49
|
+
*/
|
|
50
|
+
declare function extractJson<T = unknown>(text: string, options?: ExtractOptions): T;
|
|
51
|
+
|
|
52
|
+
declare function stripReasoning(text: string): string;
|
|
53
|
+
declare function fencedBlocks(text: string): string[];
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Find the substrings of complete, balanced JSON objects/arrays in `text`,
|
|
57
|
+
* in document order. String-aware: braces and brackets inside JSON strings do
|
|
58
|
+
* not affect nesting, so prose like `"the } char"` won't break the scan.
|
|
59
|
+
*/
|
|
60
|
+
declare function balancedSpans(text: string): string[];
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Remove trailing commas (`{"a":1,}` → `{"a":1}`, `[1,2,]` → `[1,2]`), which
|
|
64
|
+
* models emit frequently. String-aware: a comma inside a string value is never
|
|
65
|
+
* touched, so this can only ever fix structure, never corrupt content.
|
|
66
|
+
*/
|
|
67
|
+
declare function removeTrailingCommas(json: string): string;
|
|
68
|
+
|
|
69
|
+
export { type ExtractOptions, type ExtractResult, JsonExtractionError, balancedSpans, extractJson, fencedBlocks, removeTrailingCommas, stripReasoning, tryExtractJson };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
// src/repair.ts
|
|
2
|
+
function removeTrailingCommas(json) {
|
|
3
|
+
let out = "";
|
|
4
|
+
let inString = false;
|
|
5
|
+
let escaped = false;
|
|
6
|
+
for (let i = 0; i < json.length; i++) {
|
|
7
|
+
const ch = json[i];
|
|
8
|
+
if (inString) {
|
|
9
|
+
out += ch;
|
|
10
|
+
if (escaped) {
|
|
11
|
+
escaped = false;
|
|
12
|
+
} else if (ch === "\\") {
|
|
13
|
+
escaped = true;
|
|
14
|
+
} else if (ch === '"') {
|
|
15
|
+
inString = false;
|
|
16
|
+
}
|
|
17
|
+
continue;
|
|
18
|
+
}
|
|
19
|
+
if (ch === '"') {
|
|
20
|
+
inString = true;
|
|
21
|
+
out += ch;
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
if (ch === ",") {
|
|
25
|
+
let j = i + 1;
|
|
26
|
+
while (j < json.length && (json[j] === " " || json[j] === "\n" || json[j] === "\r" || json[j] === " ")) {
|
|
27
|
+
j++;
|
|
28
|
+
}
|
|
29
|
+
if (json[j] === "}" || json[j] === "]") {
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
out += ch;
|
|
34
|
+
}
|
|
35
|
+
return out;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// src/scan.ts
|
|
39
|
+
function balancedSpans(text) {
|
|
40
|
+
const spans = [];
|
|
41
|
+
let i = 0;
|
|
42
|
+
while (i < text.length) {
|
|
43
|
+
const ch = text[i];
|
|
44
|
+
if (ch === "{" || ch === "[") {
|
|
45
|
+
const end = matchBalanced(text, i);
|
|
46
|
+
if (end !== -1) {
|
|
47
|
+
spans.push(text.slice(i, end));
|
|
48
|
+
i = end;
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
i++;
|
|
53
|
+
}
|
|
54
|
+
return spans;
|
|
55
|
+
}
|
|
56
|
+
function matchBalanced(text, start) {
|
|
57
|
+
let depth = 0;
|
|
58
|
+
let inString = false;
|
|
59
|
+
let escaped = false;
|
|
60
|
+
for (let i = start; i < text.length; i++) {
|
|
61
|
+
const ch = text[i];
|
|
62
|
+
if (inString) {
|
|
63
|
+
if (escaped) {
|
|
64
|
+
escaped = false;
|
|
65
|
+
} else if (ch === "\\") {
|
|
66
|
+
escaped = true;
|
|
67
|
+
} else if (ch === '"') {
|
|
68
|
+
inString = false;
|
|
69
|
+
}
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
if (ch === '"') {
|
|
73
|
+
inString = true;
|
|
74
|
+
} else if (ch === "{" || ch === "[") {
|
|
75
|
+
depth++;
|
|
76
|
+
} else if (ch === "}" || ch === "]") {
|
|
77
|
+
depth--;
|
|
78
|
+
if (depth === 0) {
|
|
79
|
+
return i + 1;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return -1;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// src/strip.ts
|
|
87
|
+
var REASONING_TAGS = /<(think|thinking|reasoning|thought)>[\s\S]*?<\/\1>/gi;
|
|
88
|
+
function stripReasoning(text) {
|
|
89
|
+
return text.replace(REASONING_TAGS, "");
|
|
90
|
+
}
|
|
91
|
+
var FENCE = /```[^\S\n]*([a-zA-Z0-9_+-]*)[^\S\n]*\n?([\s\S]*?)```/g;
|
|
92
|
+
function fencedBlocks(text) {
|
|
93
|
+
const blocks = [];
|
|
94
|
+
FENCE.lastIndex = 0;
|
|
95
|
+
let match;
|
|
96
|
+
while ((match = FENCE.exec(text)) !== null) {
|
|
97
|
+
const lang = match[1].toLowerCase();
|
|
98
|
+
const content = match[2].trim();
|
|
99
|
+
if (content.length > 0 && (lang === "" || lang.includes("json"))) {
|
|
100
|
+
blocks.push(content);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return blocks;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// src/types.ts
|
|
107
|
+
var JsonExtractionError = class extends Error {
|
|
108
|
+
constructor(message, text) {
|
|
109
|
+
super(message);
|
|
110
|
+
this.text = text;
|
|
111
|
+
this.name = "JsonExtractionError";
|
|
112
|
+
}
|
|
113
|
+
text;
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
// src/extract.ts
|
|
117
|
+
function parseCandidate(candidate, repair) {
|
|
118
|
+
try {
|
|
119
|
+
return { ok: true, value: JSON.parse(candidate) };
|
|
120
|
+
} catch {
|
|
121
|
+
}
|
|
122
|
+
if (repair) {
|
|
123
|
+
try {
|
|
124
|
+
return { ok: true, value: JSON.parse(removeTrailingCommas(candidate)) };
|
|
125
|
+
} catch {
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
return { ok: false };
|
|
129
|
+
}
|
|
130
|
+
function matchesExpect(value, expect) {
|
|
131
|
+
if (expect === "any") {
|
|
132
|
+
return true;
|
|
133
|
+
}
|
|
134
|
+
if (expect === "array") {
|
|
135
|
+
return Array.isArray(value);
|
|
136
|
+
}
|
|
137
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
138
|
+
}
|
|
139
|
+
function tryExtractJson(text, options = {}) {
|
|
140
|
+
if (typeof text !== "string" || text.length === 0) {
|
|
141
|
+
return { found: false };
|
|
142
|
+
}
|
|
143
|
+
const repair = options.repair ?? true;
|
|
144
|
+
const expect = options.expect ?? "any";
|
|
145
|
+
const cleaned = stripReasoning(text);
|
|
146
|
+
const candidates = [];
|
|
147
|
+
for (const block of fencedBlocks(cleaned)) {
|
|
148
|
+
candidates.push(block, ...balancedSpans(block));
|
|
149
|
+
}
|
|
150
|
+
candidates.push(...balancedSpans(cleaned));
|
|
151
|
+
for (const candidate of candidates) {
|
|
152
|
+
const parsed = parseCandidate(candidate, repair);
|
|
153
|
+
if (parsed.ok && matchesExpect(parsed.value, expect)) {
|
|
154
|
+
return { found: true, value: parsed.value };
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return { found: false };
|
|
158
|
+
}
|
|
159
|
+
function extractJson(text, options = {}) {
|
|
160
|
+
const result = tryExtractJson(text, options);
|
|
161
|
+
if (!result.found) {
|
|
162
|
+
throw new JsonExtractionError(
|
|
163
|
+
"No JSON value could be extracted from the text.",
|
|
164
|
+
text
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
return result.value;
|
|
168
|
+
}
|
|
169
|
+
export {
|
|
170
|
+
JsonExtractionError,
|
|
171
|
+
balancedSpans,
|
|
172
|
+
extractJson,
|
|
173
|
+
fencedBlocks,
|
|
174
|
+
removeTrailingCommas,
|
|
175
|
+
stripReasoning,
|
|
176
|
+
tryExtractJson
|
|
177
|
+
};
|
|
178
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/repair.ts","../src/scan.ts","../src/strip.ts","../src/types.ts","../src/extract.ts"],"sourcesContent":["/**\n * Remove trailing commas (`{\"a\":1,}` → `{\"a\":1}`, `[1,2,]` → `[1,2]`), which\n * models emit frequently. String-aware: a comma inside a string value is never\n * touched, so this can only ever fix structure, never corrupt content.\n */\nexport function removeTrailingCommas(json: string): string {\n let out = '';\n let inString = false;\n let escaped = false;\n\n for (let i = 0; i < json.length; i++) {\n const ch = json[i];\n\n if (inString) {\n out += ch;\n if (escaped) {\n escaped = false;\n } else if (ch === '\\\\') {\n escaped = true;\n } else if (ch === '\"') {\n inString = false;\n }\n continue;\n }\n\n if (ch === '\"') {\n inString = true;\n out += ch;\n continue;\n }\n\n if (ch === ',') {\n let j = i + 1;\n while (\n j < json.length &&\n (json[j] === ' ' ||\n json[j] === '\\n' ||\n json[j] === '\\r' ||\n json[j] === '\\t')\n ) {\n j++;\n }\n if (json[j] === '}' || json[j] === ']') {\n continue; // drop the trailing comma\n }\n }\n\n out += ch;\n }\n\n return out;\n}\n","/**\n * Find the substrings of complete, balanced JSON objects/arrays in `text`,\n * in document order. String-aware: braces and brackets inside JSON strings do\n * not affect nesting, so prose like `\"the } char\"` won't break the scan.\n */\nexport function balancedSpans(text: string): string[] {\n const spans: string[] = [];\n let i = 0;\n while (i < text.length) {\n const ch = text[i];\n if (ch === '{' || ch === '[') {\n const end = matchBalanced(text, i);\n if (end !== -1) {\n spans.push(text.slice(i, end));\n i = end;\n continue;\n }\n }\n i++;\n }\n return spans;\n}\n\n/** Return the index just past the balanced value starting at `start`, or -1. */\nfunction matchBalanced(text: string, start: number): number {\n let depth = 0;\n let inString = false;\n let escaped = false;\n\n for (let i = start; i < text.length; i++) {\n const ch = text[i];\n\n if (inString) {\n if (escaped) {\n escaped = false;\n } else if (ch === '\\\\') {\n escaped = true;\n } else if (ch === '\"') {\n inString = false;\n }\n continue;\n }\n\n if (ch === '\"') {\n inString = true;\n } else if (ch === '{' || ch === '[') {\n depth++;\n } else if (ch === '}' || ch === ']') {\n depth--;\n if (depth === 0) {\n return i + 1;\n }\n }\n }\n\n return -1;\n}\n","/**\n * Remove model \"thinking\" / reasoning blocks. Reasoning models (DeepSeek R1,\n * Qwen, and prompted Claude/Gemini setups) emit `<think>…</think>` or\n * `<thinking>…</thinking>` before the answer, and that text frequently contains\n * brace-laden prose that would otherwise be mistaken for the payload.\n */\nconst REASONING_TAGS = /<(think|thinking|reasoning|thought)>[\\s\\S]*?<\\/\\1>/gi;\n\nexport function stripReasoning(text: string): string {\n return text.replace(REASONING_TAGS, '');\n}\n\n/**\n * Return the inner contents of fenced code blocks that could hold JSON: blocks\n * tagged ```json / ```jsonc / ```json5, or untagged ``` blocks. Other languages\n * (```python, ```ts) are skipped — they won't contain the answer JSON.\n */\nconst FENCE = /```[^\\S\\n]*([a-zA-Z0-9_+-]*)[^\\S\\n]*\\n?([\\s\\S]*?)```/g;\n\nexport function fencedBlocks(text: string): string[] {\n const blocks: string[] = [];\n FENCE.lastIndex = 0;\n let match: RegExpExecArray | null;\n while ((match = FENCE.exec(text)) !== null) {\n const lang = match[1].toLowerCase();\n const content = match[2].trim();\n if (content.length > 0 && (lang === '' || lang.includes('json'))) {\n blocks.push(content);\n }\n }\n return blocks;\n}\n","/** Options for {@link extractJson} and {@link tryExtractJson}. */\nexport interface ExtractOptions {\n /**\n * Apply conservative, string-aware repairs before parsing — currently the\n * removal of trailing commas, which models emit often. Never rewrites string\n * contents. Default `true`.\n */\n repair?: boolean;\n /**\n * Restrict which top-level JSON value to accept: an `'object'`, an `'array'`,\n * or `'any'` (the default).\n */\n expect?: 'object' | 'array' | 'any';\n}\n\n/** The result of {@link tryExtractJson}. */\nexport type ExtractResult<T> =\n | { found: true; value: T }\n | { found: false; value?: undefined };\n\n/** Thrown by {@link extractJson} when no JSON value can be recovered. */\nexport class JsonExtractionError extends Error {\n constructor(\n message: string,\n /** The original text that no JSON could be extracted from. */\n public readonly text: string,\n ) {\n super(message);\n this.name = 'JsonExtractionError';\n }\n}\n","import { removeTrailingCommas } from './repair.ts';\nimport { balancedSpans } from './scan.ts';\nimport { fencedBlocks, stripReasoning } from './strip.ts';\nimport { JsonExtractionError } from './types.ts';\nimport type { ExtractOptions, ExtractResult } from './types.ts';\n\nfunction parseCandidate(\n candidate: string,\n repair: boolean,\n): { ok: true; value: unknown } | { ok: false } {\n try {\n return { ok: true, value: JSON.parse(candidate) };\n } catch {\n // fall through to repair\n }\n if (repair) {\n try {\n return { ok: true, value: JSON.parse(removeTrailingCommas(candidate)) };\n } catch {\n // unrecoverable\n }\n }\n return { ok: false };\n}\n\nfunction matchesExpect(\n value: unknown,\n expect: 'object' | 'array' | 'any',\n): boolean {\n if (expect === 'any') {\n return true;\n }\n if (expect === 'array') {\n return Array.isArray(value);\n }\n return typeof value === 'object' && value !== null && !Array.isArray(value);\n}\n\n/**\n * Extract a JSON value from LLM output without throwing.\n *\n * Strips `<think>` / `<thinking>` reasoning blocks, prefers fenced ```json\n * code blocks, then scans for the first balanced object/array that parses\n * (applying conservative repair). Returns `{ found: false }` if nothing parses.\n *\n * @example\n * ```ts\n * const r = tryExtractJson<{ score: number }>('<think>...</think>\\n{\"score\":7}');\n * if (r.found) console.log(r.value.score); // 7\n * ```\n */\nexport function tryExtractJson<T = unknown>(\n text: string,\n options: ExtractOptions = {},\n): ExtractResult<T> {\n if (typeof text !== 'string' || text.length === 0) {\n return { found: false };\n }\n\n const repair = options.repair ?? true;\n const expect = options.expect ?? 'any';\n const cleaned = stripReasoning(text);\n\n // Candidate substrings, highest confidence first: fenced blocks (and any\n // balanced values inside them), then balanced values anywhere in the text.\n const candidates: string[] = [];\n for (const block of fencedBlocks(cleaned)) {\n candidates.push(block, ...balancedSpans(block));\n }\n candidates.push(...balancedSpans(cleaned));\n\n for (const candidate of candidates) {\n const parsed = parseCandidate(candidate, repair);\n if (parsed.ok && matchesExpect(parsed.value, expect)) {\n return { found: true, value: parsed.value as T };\n }\n }\n return { found: false };\n}\n\n/**\n * Extract a JSON value from LLM output, throwing {@link JsonExtractionError}\n * if none can be recovered. See {@link tryExtractJson} for the algorithm.\n */\nexport function extractJson<T = unknown>(\n text: string,\n options: ExtractOptions = {},\n): T {\n const result = tryExtractJson<T>(text, options);\n if (!result.found) {\n throw new JsonExtractionError(\n 'No JSON value could be extracted from the text.',\n text,\n );\n }\n return result.value;\n}\n"],"mappings":";AAKO,SAAS,qBAAqB,MAAsB;AACzD,MAAI,MAAM;AACV,MAAI,WAAW;AACf,MAAI,UAAU;AAEd,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,KAAK,KAAK,CAAC;AAEjB,QAAI,UAAU;AACZ,aAAO;AACP,UAAI,SAAS;AACX,kBAAU;AAAA,MACZ,WAAW,OAAO,MAAM;AACtB,kBAAU;AAAA,MACZ,WAAW,OAAO,KAAK;AACrB,mBAAW;AAAA,MACb;AACA;AAAA,IACF;AAEA,QAAI,OAAO,KAAK;AACd,iBAAW;AACX,aAAO;AACP;AAAA,IACF;AAEA,QAAI,OAAO,KAAK;AACd,UAAI,IAAI,IAAI;AACZ,aACE,IAAI,KAAK,WACR,KAAK,CAAC,MAAM,OACX,KAAK,CAAC,MAAM,QACZ,KAAK,CAAC,MAAM,QACZ,KAAK,CAAC,MAAM,MACd;AACA;AAAA,MACF;AACA,UAAI,KAAK,CAAC,MAAM,OAAO,KAAK,CAAC,MAAM,KAAK;AACtC;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;;;AC9CO,SAAS,cAAc,MAAwB;AACpD,QAAM,QAAkB,CAAC;AACzB,MAAI,IAAI;AACR,SAAO,IAAI,KAAK,QAAQ;AACtB,UAAM,KAAK,KAAK,CAAC;AACjB,QAAI,OAAO,OAAO,OAAO,KAAK;AAC5B,YAAM,MAAM,cAAc,MAAM,CAAC;AACjC,UAAI,QAAQ,IAAI;AACd,cAAM,KAAK,KAAK,MAAM,GAAG,GAAG,CAAC;AAC7B,YAAI;AACJ;AAAA,MACF;AAAA,IACF;AACA;AAAA,EACF;AACA,SAAO;AACT;AAGA,SAAS,cAAc,MAAc,OAAuB;AAC1D,MAAI,QAAQ;AACZ,MAAI,WAAW;AACf,MAAI,UAAU;AAEd,WAAS,IAAI,OAAO,IAAI,KAAK,QAAQ,KAAK;AACxC,UAAM,KAAK,KAAK,CAAC;AAEjB,QAAI,UAAU;AACZ,UAAI,SAAS;AACX,kBAAU;AAAA,MACZ,WAAW,OAAO,MAAM;AACtB,kBAAU;AAAA,MACZ,WAAW,OAAO,KAAK;AACrB,mBAAW;AAAA,MACb;AACA;AAAA,IACF;AAEA,QAAI,OAAO,KAAK;AACd,iBAAW;AAAA,IACb,WAAW,OAAO,OAAO,OAAO,KAAK;AACnC;AAAA,IACF,WAAW,OAAO,OAAO,OAAO,KAAK;AACnC;AACA,UAAI,UAAU,GAAG;AACf,eAAO,IAAI;AAAA,MACb;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;;;AClDA,IAAM,iBAAiB;AAEhB,SAAS,eAAe,MAAsB;AACnD,SAAO,KAAK,QAAQ,gBAAgB,EAAE;AACxC;AAOA,IAAM,QAAQ;AAEP,SAAS,aAAa,MAAwB;AACnD,QAAM,SAAmB,CAAC;AAC1B,QAAM,YAAY;AAClB,MAAI;AACJ,UAAQ,QAAQ,MAAM,KAAK,IAAI,OAAO,MAAM;AAC1C,UAAM,OAAO,MAAM,CAAC,EAAE,YAAY;AAClC,UAAM,UAAU,MAAM,CAAC,EAAE,KAAK;AAC9B,QAAI,QAAQ,SAAS,MAAM,SAAS,MAAM,KAAK,SAAS,MAAM,IAAI;AAChE,aAAO,KAAK,OAAO;AAAA,IACrB;AAAA,EACF;AACA,SAAO;AACT;;;ACVO,IAAM,sBAAN,cAAkC,MAAM;AAAA,EAC7C,YACE,SAEgB,MAChB;AACA,UAAM,OAAO;AAFG;AAGhB,SAAK,OAAO;AAAA,EACd;AAAA,EAJkB;AAKpB;;;ACxBA,SAAS,eACP,WACA,QAC8C;AAC9C,MAAI;AACF,WAAO,EAAE,IAAI,MAAM,OAAO,KAAK,MAAM,SAAS,EAAE;AAAA,EAClD,QAAQ;AAAA,EAER;AACA,MAAI,QAAQ;AACV,QAAI;AACF,aAAO,EAAE,IAAI,MAAM,OAAO,KAAK,MAAM,qBAAqB,SAAS,CAAC,EAAE;AAAA,IACxE,QAAQ;AAAA,IAER;AAAA,EACF;AACA,SAAO,EAAE,IAAI,MAAM;AACrB;AAEA,SAAS,cACP,OACA,QACS;AACT,MAAI,WAAW,OAAO;AACpB,WAAO;AAAA,EACT;AACA,MAAI,WAAW,SAAS;AACtB,WAAO,MAAM,QAAQ,KAAK;AAAA,EAC5B;AACA,SAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,CAAC,MAAM,QAAQ,KAAK;AAC5E;AAeO,SAAS,eACd,MACA,UAA0B,CAAC,GACT;AAClB,MAAI,OAAO,SAAS,YAAY,KAAK,WAAW,GAAG;AACjD,WAAO,EAAE,OAAO,MAAM;AAAA,EACxB;AAEA,QAAM,SAAS,QAAQ,UAAU;AACjC,QAAM,SAAS,QAAQ,UAAU;AACjC,QAAM,UAAU,eAAe,IAAI;AAInC,QAAM,aAAuB,CAAC;AAC9B,aAAW,SAAS,aAAa,OAAO,GAAG;AACzC,eAAW,KAAK,OAAO,GAAG,cAAc,KAAK,CAAC;AAAA,EAChD;AACA,aAAW,KAAK,GAAG,cAAc,OAAO,CAAC;AAEzC,aAAW,aAAa,YAAY;AAClC,UAAM,SAAS,eAAe,WAAW,MAAM;AAC/C,QAAI,OAAO,MAAM,cAAc,OAAO,OAAO,MAAM,GAAG;AACpD,aAAO,EAAE,OAAO,MAAM,OAAO,OAAO,MAAW;AAAA,IACjD;AAAA,EACF;AACA,SAAO,EAAE,OAAO,MAAM;AACxB;AAMO,SAAS,YACd,MACA,UAA0B,CAAC,GACxB;AACH,QAAM,SAAS,eAAkB,MAAM,OAAO;AAC9C,MAAI,CAAC,OAAO,OAAO;AACjB,UAAM,IAAI;AAAA,MACR;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACA,SAAO,OAAO;AAChB;","names":[]}
|
package/package.json
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "json-from-llm",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Extract valid JSON from an LLM response, even when it is wrapped in reasoning/thinking tags, markdown fences or prose. Zero dependencies.",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"llm",
|
|
7
|
+
"json",
|
|
8
|
+
"extract",
|
|
9
|
+
"parse",
|
|
10
|
+
"structured-output",
|
|
11
|
+
"openai",
|
|
12
|
+
"anthropic",
|
|
13
|
+
"gemini",
|
|
14
|
+
"deepseek",
|
|
15
|
+
"reasoning",
|
|
16
|
+
"thinking",
|
|
17
|
+
"repair",
|
|
18
|
+
"ai",
|
|
19
|
+
"agents"
|
|
20
|
+
],
|
|
21
|
+
"license": "MIT",
|
|
22
|
+
"author": "Sebastian Legarraga",
|
|
23
|
+
"repository": {
|
|
24
|
+
"type": "git",
|
|
25
|
+
"url": "git+https://github.com/slegarraga/json-from-llm.git"
|
|
26
|
+
},
|
|
27
|
+
"homepage": "https://github.com/slegarraga/json-from-llm#readme",
|
|
28
|
+
"bugs": {
|
|
29
|
+
"url": "https://github.com/slegarraga/json-from-llm/issues"
|
|
30
|
+
},
|
|
31
|
+
"type": "module",
|
|
32
|
+
"main": "./dist/index.cjs",
|
|
33
|
+
"module": "./dist/index.js",
|
|
34
|
+
"types": "./dist/index.d.ts",
|
|
35
|
+
"exports": {
|
|
36
|
+
".": {
|
|
37
|
+
"types": "./dist/index.d.ts",
|
|
38
|
+
"import": "./dist/index.js",
|
|
39
|
+
"require": "./dist/index.cjs"
|
|
40
|
+
},
|
|
41
|
+
"./package.json": "./package.json"
|
|
42
|
+
},
|
|
43
|
+
"files": [
|
|
44
|
+
"dist",
|
|
45
|
+
"README.md",
|
|
46
|
+
"LICENSE",
|
|
47
|
+
"CHANGELOG.md"
|
|
48
|
+
],
|
|
49
|
+
"engines": {
|
|
50
|
+
"node": ">=18"
|
|
51
|
+
},
|
|
52
|
+
"sideEffects": false,
|
|
53
|
+
"scripts": {
|
|
54
|
+
"build": "tsup",
|
|
55
|
+
"typecheck": "tsc --noEmit",
|
|
56
|
+
"test": "vitest run",
|
|
57
|
+
"test:watch": "vitest",
|
|
58
|
+
"lint": "eslint .",
|
|
59
|
+
"format": "prettier --write .",
|
|
60
|
+
"format:check": "prettier --check .",
|
|
61
|
+
"prepublishOnly": "npm run build",
|
|
62
|
+
"prepare": "npm run build"
|
|
63
|
+
},
|
|
64
|
+
"devDependencies": {
|
|
65
|
+
"@eslint/js": "^10.0.1",
|
|
66
|
+
"@types/node": "^25.9.1",
|
|
67
|
+
"eslint": "^10.4.1",
|
|
68
|
+
"prettier": "^3.4.2",
|
|
69
|
+
"tsup": "^8.3.5",
|
|
70
|
+
"typescript": "^5.7.2",
|
|
71
|
+
"typescript-eslint": "^8.60.0",
|
|
72
|
+
"vitest": "^2.1.8"
|
|
73
|
+
}
|
|
74
|
+
}
|