jsonmend 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jsonmend/__init__.py ADDED
@@ -0,0 +1,269 @@
1
+ """jsonmend — mends the JSON your LLM almost wrote.
2
+
3
+ Batch API (drop-in for json_repair):
4
+
5
+ from jsonmend import repair_json, loads, load, from_file
6
+
7
+ Streaming API (true incremental, O(new bytes) per feed):
8
+
9
+ from jsonmend import Mender
10
+ m = Mender()
11
+ for chunk in stream:
12
+ partial = m.feed(chunk) # best-effort value so far
13
+ value = m.close()
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json as _json
19
+ import math as _math
20
+
21
+ from ._engine import SKIP, JSONMendError, MendMachine
22
+
23
+ __version__ = "0.1.0"
24
+
25
+ __all__ = [
26
+ "repair_json", "loads", "load", "from_file",
27
+ "mend", "Mender", "JSONMendError", "__version__",
28
+ ]
29
+
30
+
31
+ def mend(text, *, strict=False, _doom_hint=None):
32
+ """Repair ``text`` and return the parsed Python value.
33
+
34
+ This always runs the repair machine (no ``json.loads`` fast path).
35
+ Returns ``""`` for unmendable input, or raises :class:`JSONMendError`
36
+ when ``strict`` is true.
37
+ """
38
+ if not isinstance(text, str):
39
+ text = _coerce_text(text)
40
+ if text and text[0] == "":
41
+ text = text.lstrip("")
42
+ _doom_hint = None
43
+ machine = MendMachine()
44
+ machine.final = True
45
+ if _doom_hint is not None:
46
+ machine.doomed_from = _doom_hint
47
+ machine.feed(text)
48
+ result = machine.close()
49
+ if result is SKIP:
50
+ if strict:
51
+ raise JSONMendError("no JSON content found in input")
52
+ return ""
53
+ return result
54
+
55
+
56
+ def loads(json_str, *, skip_json_loads=False, strict=False, **_compat):
57
+ """Repair and parse, returning Python objects.
58
+
59
+ Valid JSON takes a C-speed ``json.loads`` fast path unless
60
+ ``skip_json_loads`` is true.
61
+ """
62
+ if not isinstance(json_str, str):
63
+ json_str = _coerce_text(json_str)
64
+ if not skip_json_loads:
65
+ try:
66
+ return _json.loads(json_str)
67
+ except Exception as e:
68
+ p = getattr(e, "pos", None)
69
+ if p is not None and p >= len(json_str):
70
+ # truncated input: the machine need not rescan the root
71
+ return mend(json_str, strict=strict, _doom_hint=p)
72
+ return mend(json_str, strict=strict)
73
+
74
+
75
+ def repair_json(json_str="", return_objects=False, skip_json_loads=False,
76
+ ensure_ascii=True, strict=False, **json_dumps_args):
77
+ """Repair broken JSON. Returns a JSON string (or objects).
78
+
79
+ API-compatible with ``json_repair.repair_json`` for the core
80
+ parameters. Unlike json_repair, the output is always *valid* JSON:
81
+ non-finite numbers (NaN/Infinity) are serialized as ``null``.
82
+ """
83
+ if json_dumps_args.pop("logging", False):
84
+ raise TypeError(
85
+ "jsonmend does not support json_repair's logging=True "
86
+ "(incompatible with single-pass repair); remove the flag")
87
+ json_dumps_args.pop("stream_stable", None) # Mender is always stable
88
+ if not isinstance(json_str, str):
89
+ json_str = _coerce_text(json_str)
90
+ value = None
91
+ hint = None
92
+ if not skip_json_loads:
93
+ try:
94
+ value = _json.loads(json_str)
95
+ parsed = True
96
+ except Exception as e:
97
+ parsed = False
98
+ p = getattr(e, "pos", None)
99
+ if p is not None and p >= len(json_str):
100
+ hint = p
101
+ else:
102
+ parsed = False
103
+ if not parsed:
104
+ value = mend(json_str, strict=strict, _doom_hint=hint)
105
+ if return_objects:
106
+ return value
107
+ return _dumps(value, ensure_ascii=ensure_ascii, **json_dumps_args)
108
+
109
+
110
+ def load(fd, **kwargs):
111
+ """Repair and parse JSON from a file-like object."""
112
+ return loads(fd.read(), **kwargs)
113
+
114
+
115
+ def from_file(filename, **kwargs):
116
+ """Repair and parse JSON from a file path."""
117
+ with open(filename, encoding="utf-8-sig", newline="") as fd:
118
+ return loads(fd.read(), **kwargs)
119
+
120
+
121
+ class Mender:
122
+ """Stateful incremental mender.
123
+
124
+ Each :meth:`feed` consumes one chunk and returns the best-effort
125
+ parsed value so far; the cost of a feed is proportional to the new
126
+ bytes, not to everything fed so far. The returned value is a *live
127
+ view* that later feeds may extend in place; call :meth:`close` to get
128
+ the final result.
129
+ """
130
+
131
+ def __init__(self):
132
+ self._machine = MendMachine()
133
+ self._closed = False
134
+ self._result = None
135
+
136
+ def feed(self, chunk):
137
+ """Feed one chunk; returns the current best-effort value."""
138
+ if self._closed:
139
+ raise ValueError("Mender is closed")
140
+ if not isinstance(chunk, str):
141
+ chunk = _coerce_text(chunk)
142
+ self._machine.feed(chunk)
143
+ return self._machine.current()
144
+
145
+ @property
146
+ def value(self):
147
+ """Current best-effort value without feeding."""
148
+ if self._closed:
149
+ return self._result
150
+ return self._machine.current()
151
+
152
+ def close(self):
153
+ """Finish parsing and return the final mended value."""
154
+ if not self._closed:
155
+ result = self._machine.close()
156
+ self._result = "" if result is SKIP else result
157
+ self._closed = True
158
+ return self._result
159
+
160
+
161
+ # ---------------------------------------------------------------------------
162
+ # serialization helpers
163
+ # ---------------------------------------------------------------------------
164
+
165
+
166
+ def _coerce_text(obj):
167
+ if isinstance(obj, (bytes, bytearray)):
168
+ return obj.decode("utf-8", errors="replace")
169
+ return str(obj)
170
+
171
+
172
+ def _sanitize_nonfinite(value):
173
+ """Replace NaN/Infinity floats with None (iterative, no recursion)."""
174
+ if isinstance(value, float):
175
+ return value if _math.isfinite(value) else None
176
+ if not isinstance(value, (dict, list)):
177
+ return value
178
+ root = [] if isinstance(value, list) else {}
179
+ todo = [(value, root)]
180
+ while todo:
181
+ src, dst = todo.pop()
182
+ items = src.items() if isinstance(src, dict) else enumerate(src)
183
+ for k, v in items:
184
+ if isinstance(v, float) and not _math.isfinite(v):
185
+ v = None
186
+ elif isinstance(v, dict):
187
+ new = {}
188
+ todo.append((v, new))
189
+ v = new
190
+ elif isinstance(v, list):
191
+ new = []
192
+ todo.append((v, new))
193
+ v = new
194
+ if isinstance(dst, dict):
195
+ dst[k] = v
196
+ else:
197
+ dst.append(v)
198
+ return root
199
+
200
+
201
+ def _has_nonfinite(value):
202
+ todo = [value]
203
+ while todo:
204
+ v = todo.pop()
205
+ if isinstance(v, float):
206
+ if not _math.isfinite(v):
207
+ return True
208
+ elif isinstance(v, dict):
209
+ todo.extend(v.values())
210
+ elif isinstance(v, list):
211
+ todo.extend(v)
212
+ return False
213
+
214
+
215
+ def _dumps(value, ensure_ascii=True, **kw):
216
+ kw.setdefault("separators", (", ", ": "))
217
+ try:
218
+ return _json.dumps(value, ensure_ascii=ensure_ascii,
219
+ allow_nan=False, **kw)
220
+ except ValueError:
221
+ return _json.dumps(_sanitize_nonfinite(value),
222
+ ensure_ascii=ensure_ascii, **kw)
223
+ except RecursionError:
224
+ return _iter_dumps(value, ensure_ascii=ensure_ascii)
225
+
226
+
227
+ def _iter_dumps(value, ensure_ascii=True):
228
+ """Iterative serializer for absurdly deep structures."""
229
+ out = []
230
+ enc = _json.encoder.encode_basestring_ascii if ensure_ascii \
231
+ else _json.encoder.encode_basestring
232
+ stack = [("v", value)]
233
+ while stack:
234
+ op, v = stack.pop()
235
+ if op == "t": # literal text
236
+ out.append(v)
237
+ continue
238
+ if isinstance(v, dict):
239
+ out.append("{")
240
+ stack.append(("t", "}"))
241
+ items = list(v.items())
242
+ for idx in range(len(items) - 1, -1, -1):
243
+ k, val = items[idx]
244
+ stack.append(("v", val))
245
+ stack.append(("t", enc(k) + ": "))
246
+ if idx:
247
+ stack.append(("t", ", "))
248
+ continue
249
+ if isinstance(v, list):
250
+ out.append("[")
251
+ stack.append(("t", "]"))
252
+ for idx in range(len(v) - 1, -1, -1):
253
+ stack.append(("v", v[idx]))
254
+ if idx:
255
+ stack.append(("t", ", "))
256
+ continue
257
+ if v is True:
258
+ out.append("true")
259
+ elif v is False:
260
+ out.append("false")
261
+ elif v is None:
262
+ out.append("null")
263
+ elif isinstance(v, str):
264
+ out.append(enc(v))
265
+ elif isinstance(v, float):
266
+ out.append(repr(v) if _math.isfinite(v) else "null")
267
+ else:
268
+ out.append(str(v))
269
+ return "".join(out)