jsonmend 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jsonmend/__init__.py +269 -0
- jsonmend/_engine.py +1889 -0
- jsonmend-0.1.0.dist-info/METADATA +193 -0
- jsonmend-0.1.0.dist-info/RECORD +7 -0
- jsonmend-0.1.0.dist-info/WHEEL +5 -0
- jsonmend-0.1.0.dist-info/licenses/LICENSE +21 -0
- jsonmend-0.1.0.dist-info/top_level.txt +1 -0
jsonmend/__init__.py
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""jsonmend — mends the JSON your LLM almost wrote.
|
|
2
|
+
|
|
3
|
+
Batch API (drop-in for json_repair):
|
|
4
|
+
|
|
5
|
+
from jsonmend import repair_json, loads, load, from_file
|
|
6
|
+
|
|
7
|
+
Streaming API (true incremental, O(new bytes) per feed):
|
|
8
|
+
|
|
9
|
+
from jsonmend import Mender
|
|
10
|
+
m = Mender()
|
|
11
|
+
for chunk in stream:
|
|
12
|
+
partial = m.feed(chunk) # best-effort value so far
|
|
13
|
+
value = m.close()
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import json as _json
|
|
19
|
+
import math as _math
|
|
20
|
+
|
|
21
|
+
from ._engine import SKIP, JSONMendError, MendMachine
|
|
22
|
+
|
|
23
|
+
__version__ = "0.1.0"
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"repair_json", "loads", "load", "from_file",
|
|
27
|
+
"mend", "Mender", "JSONMendError", "__version__",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def mend(text, *, strict=False, _doom_hint=None):
|
|
32
|
+
"""Repair ``text`` and return the parsed Python value.
|
|
33
|
+
|
|
34
|
+
This always runs the repair machine (no ``json.loads`` fast path).
|
|
35
|
+
Returns ``""`` for unmendable input, or raises :class:`JSONMendError`
|
|
36
|
+
when ``strict`` is true.
|
|
37
|
+
"""
|
|
38
|
+
if not isinstance(text, str):
|
|
39
|
+
text = _coerce_text(text)
|
|
40
|
+
if text and text[0] == "":
|
|
41
|
+
text = text.lstrip("")
|
|
42
|
+
_doom_hint = None
|
|
43
|
+
machine = MendMachine()
|
|
44
|
+
machine.final = True
|
|
45
|
+
if _doom_hint is not None:
|
|
46
|
+
machine.doomed_from = _doom_hint
|
|
47
|
+
machine.feed(text)
|
|
48
|
+
result = machine.close()
|
|
49
|
+
if result is SKIP:
|
|
50
|
+
if strict:
|
|
51
|
+
raise JSONMendError("no JSON content found in input")
|
|
52
|
+
return ""
|
|
53
|
+
return result
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def loads(json_str, *, skip_json_loads=False, strict=False, **_compat):
|
|
57
|
+
"""Repair and parse, returning Python objects.
|
|
58
|
+
|
|
59
|
+
Valid JSON takes a C-speed ``json.loads`` fast path unless
|
|
60
|
+
``skip_json_loads`` is true.
|
|
61
|
+
"""
|
|
62
|
+
if not isinstance(json_str, str):
|
|
63
|
+
json_str = _coerce_text(json_str)
|
|
64
|
+
if not skip_json_loads:
|
|
65
|
+
try:
|
|
66
|
+
return _json.loads(json_str)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
p = getattr(e, "pos", None)
|
|
69
|
+
if p is not None and p >= len(json_str):
|
|
70
|
+
# truncated input: the machine need not rescan the root
|
|
71
|
+
return mend(json_str, strict=strict, _doom_hint=p)
|
|
72
|
+
return mend(json_str, strict=strict)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def repair_json(json_str="", return_objects=False, skip_json_loads=False,
|
|
76
|
+
ensure_ascii=True, strict=False, **json_dumps_args):
|
|
77
|
+
"""Repair broken JSON. Returns a JSON string (or objects).
|
|
78
|
+
|
|
79
|
+
API-compatible with ``json_repair.repair_json`` for the core
|
|
80
|
+
parameters. Unlike json_repair, the output is always *valid* JSON:
|
|
81
|
+
non-finite numbers (NaN/Infinity) are serialized as ``null``.
|
|
82
|
+
"""
|
|
83
|
+
if json_dumps_args.pop("logging", False):
|
|
84
|
+
raise TypeError(
|
|
85
|
+
"jsonmend does not support json_repair's logging=True "
|
|
86
|
+
"(incompatible with single-pass repair); remove the flag")
|
|
87
|
+
json_dumps_args.pop("stream_stable", None) # Mender is always stable
|
|
88
|
+
if not isinstance(json_str, str):
|
|
89
|
+
json_str = _coerce_text(json_str)
|
|
90
|
+
value = None
|
|
91
|
+
hint = None
|
|
92
|
+
if not skip_json_loads:
|
|
93
|
+
try:
|
|
94
|
+
value = _json.loads(json_str)
|
|
95
|
+
parsed = True
|
|
96
|
+
except Exception as e:
|
|
97
|
+
parsed = False
|
|
98
|
+
p = getattr(e, "pos", None)
|
|
99
|
+
if p is not None and p >= len(json_str):
|
|
100
|
+
hint = p
|
|
101
|
+
else:
|
|
102
|
+
parsed = False
|
|
103
|
+
if not parsed:
|
|
104
|
+
value = mend(json_str, strict=strict, _doom_hint=hint)
|
|
105
|
+
if return_objects:
|
|
106
|
+
return value
|
|
107
|
+
return _dumps(value, ensure_ascii=ensure_ascii, **json_dumps_args)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def load(fd, **kwargs):
|
|
111
|
+
"""Repair and parse JSON from a file-like object."""
|
|
112
|
+
return loads(fd.read(), **kwargs)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def from_file(filename, **kwargs):
|
|
116
|
+
"""Repair and parse JSON from a file path."""
|
|
117
|
+
with open(filename, encoding="utf-8-sig", newline="") as fd:
|
|
118
|
+
return loads(fd.read(), **kwargs)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class Mender:
|
|
122
|
+
"""Stateful incremental mender.
|
|
123
|
+
|
|
124
|
+
Each :meth:`feed` consumes one chunk and returns the best-effort
|
|
125
|
+
parsed value so far; the cost of a feed is proportional to the new
|
|
126
|
+
bytes, not to everything fed so far. The returned value is a *live
|
|
127
|
+
view* that later feeds may extend in place; call :meth:`close` to get
|
|
128
|
+
the final result.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
def __init__(self):
|
|
132
|
+
self._machine = MendMachine()
|
|
133
|
+
self._closed = False
|
|
134
|
+
self._result = None
|
|
135
|
+
|
|
136
|
+
def feed(self, chunk):
|
|
137
|
+
"""Feed one chunk; returns the current best-effort value."""
|
|
138
|
+
if self._closed:
|
|
139
|
+
raise ValueError("Mender is closed")
|
|
140
|
+
if not isinstance(chunk, str):
|
|
141
|
+
chunk = _coerce_text(chunk)
|
|
142
|
+
self._machine.feed(chunk)
|
|
143
|
+
return self._machine.current()
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def value(self):
|
|
147
|
+
"""Current best-effort value without feeding."""
|
|
148
|
+
if self._closed:
|
|
149
|
+
return self._result
|
|
150
|
+
return self._machine.current()
|
|
151
|
+
|
|
152
|
+
def close(self):
|
|
153
|
+
"""Finish parsing and return the final mended value."""
|
|
154
|
+
if not self._closed:
|
|
155
|
+
result = self._machine.close()
|
|
156
|
+
self._result = "" if result is SKIP else result
|
|
157
|
+
self._closed = True
|
|
158
|
+
return self._result
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
# serialization helpers
|
|
163
|
+
# ---------------------------------------------------------------------------
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _coerce_text(obj):
|
|
167
|
+
if isinstance(obj, (bytes, bytearray)):
|
|
168
|
+
return obj.decode("utf-8", errors="replace")
|
|
169
|
+
return str(obj)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _sanitize_nonfinite(value):
|
|
173
|
+
"""Replace NaN/Infinity floats with None (iterative, no recursion)."""
|
|
174
|
+
if isinstance(value, float):
|
|
175
|
+
return value if _math.isfinite(value) else None
|
|
176
|
+
if not isinstance(value, (dict, list)):
|
|
177
|
+
return value
|
|
178
|
+
root = [] if isinstance(value, list) else {}
|
|
179
|
+
todo = [(value, root)]
|
|
180
|
+
while todo:
|
|
181
|
+
src, dst = todo.pop()
|
|
182
|
+
items = src.items() if isinstance(src, dict) else enumerate(src)
|
|
183
|
+
for k, v in items:
|
|
184
|
+
if isinstance(v, float) and not _math.isfinite(v):
|
|
185
|
+
v = None
|
|
186
|
+
elif isinstance(v, dict):
|
|
187
|
+
new = {}
|
|
188
|
+
todo.append((v, new))
|
|
189
|
+
v = new
|
|
190
|
+
elif isinstance(v, list):
|
|
191
|
+
new = []
|
|
192
|
+
todo.append((v, new))
|
|
193
|
+
v = new
|
|
194
|
+
if isinstance(dst, dict):
|
|
195
|
+
dst[k] = v
|
|
196
|
+
else:
|
|
197
|
+
dst.append(v)
|
|
198
|
+
return root
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _has_nonfinite(value):
|
|
202
|
+
todo = [value]
|
|
203
|
+
while todo:
|
|
204
|
+
v = todo.pop()
|
|
205
|
+
if isinstance(v, float):
|
|
206
|
+
if not _math.isfinite(v):
|
|
207
|
+
return True
|
|
208
|
+
elif isinstance(v, dict):
|
|
209
|
+
todo.extend(v.values())
|
|
210
|
+
elif isinstance(v, list):
|
|
211
|
+
todo.extend(v)
|
|
212
|
+
return False
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _dumps(value, ensure_ascii=True, **kw):
|
|
216
|
+
kw.setdefault("separators", (", ", ": "))
|
|
217
|
+
try:
|
|
218
|
+
return _json.dumps(value, ensure_ascii=ensure_ascii,
|
|
219
|
+
allow_nan=False, **kw)
|
|
220
|
+
except ValueError:
|
|
221
|
+
return _json.dumps(_sanitize_nonfinite(value),
|
|
222
|
+
ensure_ascii=ensure_ascii, **kw)
|
|
223
|
+
except RecursionError:
|
|
224
|
+
return _iter_dumps(value, ensure_ascii=ensure_ascii)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _iter_dumps(value, ensure_ascii=True):
|
|
228
|
+
"""Iterative serializer for absurdly deep structures."""
|
|
229
|
+
out = []
|
|
230
|
+
enc = _json.encoder.encode_basestring_ascii if ensure_ascii \
|
|
231
|
+
else _json.encoder.encode_basestring
|
|
232
|
+
stack = [("v", value)]
|
|
233
|
+
while stack:
|
|
234
|
+
op, v = stack.pop()
|
|
235
|
+
if op == "t": # literal text
|
|
236
|
+
out.append(v)
|
|
237
|
+
continue
|
|
238
|
+
if isinstance(v, dict):
|
|
239
|
+
out.append("{")
|
|
240
|
+
stack.append(("t", "}"))
|
|
241
|
+
items = list(v.items())
|
|
242
|
+
for idx in range(len(items) - 1, -1, -1):
|
|
243
|
+
k, val = items[idx]
|
|
244
|
+
stack.append(("v", val))
|
|
245
|
+
stack.append(("t", enc(k) + ": "))
|
|
246
|
+
if idx:
|
|
247
|
+
stack.append(("t", ", "))
|
|
248
|
+
continue
|
|
249
|
+
if isinstance(v, list):
|
|
250
|
+
out.append("[")
|
|
251
|
+
stack.append(("t", "]"))
|
|
252
|
+
for idx in range(len(v) - 1, -1, -1):
|
|
253
|
+
stack.append(("v", v[idx]))
|
|
254
|
+
if idx:
|
|
255
|
+
stack.append(("t", ", "))
|
|
256
|
+
continue
|
|
257
|
+
if v is True:
|
|
258
|
+
out.append("true")
|
|
259
|
+
elif v is False:
|
|
260
|
+
out.append("false")
|
|
261
|
+
elif v is None:
|
|
262
|
+
out.append("null")
|
|
263
|
+
elif isinstance(v, str):
|
|
264
|
+
out.append(enc(v))
|
|
265
|
+
elif isinstance(v, float):
|
|
266
|
+
out.append(repr(v) if _math.isfinite(v) else "null")
|
|
267
|
+
else:
|
|
268
|
+
out.append(str(v))
|
|
269
|
+
return "".join(out)
|