json-repair 0.39.0__tar.gz → 0.39.1__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {json_repair-0.39.0/src/json_repair.egg-info → json_repair-0.39.1}/PKG-INFO +7 -7
- {json_repair-0.39.0 → json_repair-0.39.1}/README.md +6 -6
- {json_repair-0.39.0 → json_repair-0.39.1}/pyproject.toml +1 -1
- {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair/json_parser.py +21 -35
- {json_repair-0.39.0 → json_repair-0.39.1/src/json_repair.egg-info}/PKG-INFO +7 -7
- {json_repair-0.39.0 → json_repair-0.39.1}/tests/test_json_repair.py +5 -3
- {json_repair-0.39.0 → json_repair-0.39.1}/LICENSE +0 -0
- {json_repair-0.39.0 → json_repair-0.39.1}/setup.cfg +0 -0
- {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair/__init__.py +0 -0
- {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair/__main__.py +0 -0
- {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair/json_context.py +0 -0
- {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair/json_repair.py +0 -0
- {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair/py.typed +0 -0
- {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair/string_file_wrapper.py +0 -0
- {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair.egg-info/entry_points.txt +0 -0
- {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair.egg-info/top_level.txt +0 -0
- {json_repair-0.39.0 → json_repair-0.39.1}/tests/test_coverage.py +0 -0
- {json_repair-0.39.0 → json_repair-0.39.1}/tests/test_performance.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.39.
|
3
|
+
Version: 0.39.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -196,12 +196,12 @@ pipx install json-repair
|
|
196
196
|
to know all options available:
|
197
197
|
```
|
198
198
|
$ json_repair -h
|
199
|
-
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
|
199
|
+
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] [filename]
|
200
200
|
|
201
201
|
Repair and parse JSON files.
|
202
202
|
|
203
203
|
positional arguments:
|
204
|
-
filename The JSON file to repair
|
204
|
+
filename The JSON file to repair (if omitted, reads from stdin)
|
205
205
|
|
206
206
|
options:
|
207
207
|
-h, --help show this help message and exit
|
@@ -226,13 +226,13 @@ In this example, any version that starts with `0.` will be acceptable, allowing
|
|
226
226
|
# How to cite
|
227
227
|
If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
|
228
228
|
|
229
|
-
@software{Baccianella_JSON_Repair_-
|
229
|
+
@software{Baccianella_JSON_Repair_-_2025,
|
230
230
|
author = {Baccianella, Stefano},
|
231
|
-
month =
|
231
|
+
month = feb,
|
232
232
|
title = {{JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs}},
|
233
233
|
url = {https://github.com/mangiucugna/json_repair},
|
234
|
-
version = {0.
|
235
|
-
year = {
|
234
|
+
version = {0.39.0},
|
235
|
+
year = {2025}
|
236
236
|
}
|
237
237
|
|
238
238
|
Thank you for citing my work and please send me a link to the paper if you can!
|
@@ -158,12 +158,12 @@ pipx install json-repair
|
|
158
158
|
to know all options available:
|
159
159
|
```
|
160
160
|
$ json_repair -h
|
161
|
-
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
|
161
|
+
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] [filename]
|
162
162
|
|
163
163
|
Repair and parse JSON files.
|
164
164
|
|
165
165
|
positional arguments:
|
166
|
-
filename The JSON file to repair
|
166
|
+
filename The JSON file to repair (if omitted, reads from stdin)
|
167
167
|
|
168
168
|
options:
|
169
169
|
-h, --help show this help message and exit
|
@@ -188,13 +188,13 @@ In this example, any version that starts with `0.` will be acceptable, allowing
|
|
188
188
|
# How to cite
|
189
189
|
If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
|
190
190
|
|
191
|
-
@software{Baccianella_JSON_Repair_-
|
191
|
+
@software{Baccianella_JSON_Repair_-_2025,
|
192
192
|
author = {Baccianella, Stefano},
|
193
|
-
month =
|
193
|
+
month = feb,
|
194
194
|
title = {{JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs}},
|
195
195
|
url = {https://github.com/mangiucugna/json_repair},
|
196
|
-
version = {0.
|
197
|
-
year = {
|
196
|
+
version = {0.39.0},
|
197
|
+
year = {2025}
|
198
198
|
}
|
199
199
|
|
200
200
|
Thank you for citing my work and please send me a link to the paper if you can!
|
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.39.
|
6
|
+
version = "0.39.1"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -9,6 +9,7 @@ JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
|
|
9
9
|
class JSONParser:
|
10
10
|
# Constants
|
11
11
|
STRING_DELIMITERS = ['"', "'", "“", "”"]
|
12
|
+
NUMBER_CHARS = set("0123456789-.eE/,")
|
12
13
|
|
13
14
|
def __init__(
|
14
15
|
self,
|
@@ -129,8 +130,6 @@ class JSONParser:
|
|
129
130
|
# Context is used in the string parser to manage the lack of quotes
|
130
131
|
self.context.set(ContextValues.OBJECT_KEY)
|
131
132
|
|
132
|
-
self.skip_whitespaces_at()
|
133
|
-
|
134
133
|
# Save this index in case we need find a duplicate key
|
135
134
|
rollback_index = self.index
|
136
135
|
|
@@ -219,18 +218,13 @@ class JSONParser:
|
|
219
218
|
char = self.get_char_at()
|
220
219
|
|
221
220
|
# Especially at the end of an LLM generated json you might miss the last "]"
|
222
|
-
char = self.get_char_at()
|
223
221
|
if char and char != "]":
|
224
222
|
self.log(
|
225
|
-
"While parsing an array we missed the closing ],
|
226
|
-
)
|
227
|
-
self.index -= 1
|
228
|
-
# Add the missing closing bracket
|
229
|
-
self.json_str = (
|
230
|
-
self.json_str[: self.index + 1] + "]" + self.json_str[self.index + 1 :]
|
223
|
+
"While parsing an array we missed the closing ], ignoring it",
|
231
224
|
)
|
232
225
|
|
233
226
|
self.index += 1
|
227
|
+
|
234
228
|
self.context.reset()
|
235
229
|
return arr
|
236
230
|
|
@@ -275,15 +269,11 @@ class JSONParser:
|
|
275
269
|
self.log(
|
276
270
|
"While parsing a string, we found a literal instead of a quote",
|
277
271
|
)
|
278
|
-
self.log(
|
279
|
-
"While parsing a string, we found no starting quote. Will add the quote back",
|
280
|
-
)
|
281
272
|
missing_quotes = True
|
282
273
|
|
283
274
|
if not missing_quotes:
|
284
275
|
self.index += 1
|
285
276
|
|
286
|
-
self.skip_whitespaces_at()
|
287
277
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
288
278
|
if self.get_char_at() in self.STRING_DELIMITERS:
|
289
279
|
# If the next character is the same type of quote, then we manage it as double quotes
|
@@ -583,6 +573,13 @@ class JSONParser:
|
|
583
573
|
elif (
|
584
574
|
next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\"
|
585
575
|
):
|
576
|
+
# Check if self.index:self.index+i is only whitespaces, break if that's the case
|
577
|
+
if all(
|
578
|
+
str(self.get_char_at(j)).isspace()
|
579
|
+
for j in range(1, i)
|
580
|
+
if self.get_char_at(j)
|
581
|
+
):
|
582
|
+
break
|
586
583
|
if self.context.current == ContextValues.OBJECT_VALUE:
|
587
584
|
# But this might not be it! This could be just a missing comma
|
588
585
|
# We found a delimiter and we need to check if this is a key
|
@@ -610,26 +607,16 @@ class JSONParser:
|
|
610
607
|
self.index += 1
|
611
608
|
char = self.get_char_at()
|
612
609
|
elif self.context.current == ContextValues.ARRAY:
|
613
|
-
#
|
614
|
-
#
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
)
|
624
|
-
next_c = self.get_char_at(i)
|
625
|
-
if next_c and next_c in [",", "]"]:
|
626
|
-
self.log(
|
627
|
-
"While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
|
628
|
-
)
|
629
|
-
unmatched_delimiter = not unmatched_delimiter
|
630
|
-
string_acc += str(char)
|
631
|
-
self.index += 1
|
632
|
-
char = self.get_char_at()
|
610
|
+
# If we got up to here it means that this is a situation like this:
|
611
|
+
# ["bla bla bla "puppy" bla bla bla "kitty" bla bla"]
|
612
|
+
# So we need to ignore this quote
|
613
|
+
self.log(
|
614
|
+
"While parsing a string in Array context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
|
615
|
+
)
|
616
|
+
unmatched_delimiter = not unmatched_delimiter
|
617
|
+
string_acc += str(char)
|
618
|
+
self.index += 1
|
619
|
+
char = self.get_char_at()
|
633
620
|
|
634
621
|
if (
|
635
622
|
char
|
@@ -663,10 +650,9 @@ class JSONParser:
|
|
663
650
|
def parse_number(self) -> Union[float, int, str, JSONReturnType]:
|
664
651
|
# <number> is a valid real number expressed in one of a number of given formats
|
665
652
|
number_str = ""
|
666
|
-
number_chars = set("0123456789-.eE/,")
|
667
653
|
char = self.get_char_at()
|
668
654
|
is_array = self.context.current == ContextValues.ARRAY
|
669
|
-
while char and char in
|
655
|
+
while char and char in self.NUMBER_CHARS and (not is_array or char != ","):
|
670
656
|
number_str += char
|
671
657
|
self.index += 1
|
672
658
|
char = self.get_char_at()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.39.
|
3
|
+
Version: 0.39.1
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -196,12 +196,12 @@ pipx install json-repair
|
|
196
196
|
to know all options available:
|
197
197
|
```
|
198
198
|
$ json_repair -h
|
199
|
-
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
|
199
|
+
usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] [filename]
|
200
200
|
|
201
201
|
Repair and parse JSON files.
|
202
202
|
|
203
203
|
positional arguments:
|
204
|
-
filename The JSON file to repair
|
204
|
+
filename The JSON file to repair (if omitted, reads from stdin)
|
205
205
|
|
206
206
|
options:
|
207
207
|
-h, --help show this help message and exit
|
@@ -226,13 +226,13 @@ In this example, any version that starts with `0.` will be acceptable, allowing
|
|
226
226
|
# How to cite
|
227
227
|
If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
|
228
228
|
|
229
|
-
@software{Baccianella_JSON_Repair_-
|
229
|
+
@software{Baccianella_JSON_Repair_-_2025,
|
230
230
|
author = {Baccianella, Stefano},
|
231
|
-
month =
|
231
|
+
month = feb,
|
232
232
|
title = {{JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs}},
|
233
233
|
url = {https://github.com/mangiucugna/json_repair},
|
234
|
-
version = {0.
|
235
|
-
year = {
|
234
|
+
version = {0.39.0},
|
235
|
+
year = {2025}
|
236
236
|
}
|
237
237
|
|
238
238
|
Thank you for citing my work and please send me a link to the paper if you can!
|
@@ -126,6 +126,8 @@ def test_array_edge_cases():
|
|
126
126
|
assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
|
127
127
|
assert repair_json('{"key1": ["value1", "value2"}, "key2": ["value3", "value4"]}') == '{"key1": ["value1", "value2"], "key2": ["value3", "value4"]}'
|
128
128
|
assert repair_json('[ "value", /* comment */ "value2" ]') == '["value", "value2"]'
|
129
|
+
assert repair_json('{"key": ["value" "value1" "value2"]}') == '{"key": ["value", "value1", "value2"]}'
|
130
|
+
assert repair_json('{"key": ["lorem "ipsum" dolor "sit" amet, "consectetur" ", "lorem "ipsum" dolor", "lorem"]}') == '{"key": ["lorem \\"ipsum\\" dolor \\"sit\\" amet, \\"consectetur\\" ", "lorem \\"ipsum\\" dolor", "lorem"]}'
|
129
131
|
|
130
132
|
def test_escaping():
|
131
133
|
assert repair_json("'\"'") == '""'
|
@@ -150,7 +152,7 @@ def test_object_edge_cases():
|
|
150
152
|
assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
|
151
153
|
assert repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}') == '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
|
152
154
|
assert repair_json('{"lorem_ipsum": "sic tamet, quick brown fox. }') == '{"lorem_ipsum": "sic tamet, quick brown fox."}'
|
153
|
-
assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", "key2": "value2"}'
|
155
|
+
assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", " key2": "value2"}'
|
154
156
|
assert repair_json('{"key":value "key2":"value2" }') == '{"key": "value", "key2": "value2"}'
|
155
157
|
assert repair_json("{'text': 'words{words in brackets}more words'}") == '{"text": "words{words in brackets}more words"}'
|
156
158
|
assert repair_json('{text:words{words in brackets}}') == '{"text": "words{words in brackets}"}'
|
@@ -265,8 +267,8 @@ def test_repair_json_from_file():
|
|
265
267
|
# Write content to the temporary file
|
266
268
|
with os.fdopen(temp_fd, 'w') as tmp:
|
267
269
|
tmp.write("{key:value}")
|
268
|
-
assert from_file(filename=temp_path, logging=True) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'
|
269
|
-
assert from_file(filename=temp_path, logging=True, chunk_length=2) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'
|
270
|
+
assert from_file(filename=temp_path, logging=True) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object key context, we found a :, stopping here',}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}, {'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn\'t determine that a right delimiter was present. Stopping here'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}])
|
271
|
+
assert from_file(filename=temp_path, logging=True, chunk_length=2) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object key context, we found a :, stopping here',}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}, {'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn\'t determine that a right delimiter was present. Stopping here'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}])
|
270
272
|
finally:
|
271
273
|
# Clean up - delete the temporary file
|
272
274
|
os.remove(temp_path)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|