json-repair 0.46.0__tar.gz → 0.46.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {json_repair-0.46.0/src/json_repair.egg-info → json_repair-0.46.2}/PKG-INFO +3 -2
- {json_repair-0.46.0 → json_repair-0.46.2}/README.md +2 -1
- {json_repair-0.46.0 → json_repair-0.46.2}/pyproject.toml +19 -9
- json_repair-0.46.2/src/json_repair/__init__.py +3 -0
- {json_repair-0.46.0 → json_repair-0.46.2}/src/json_repair/json_parser.py +39 -91
- {json_repair-0.46.0 → json_repair-0.46.2}/src/json_repair/json_repair.py +1 -4
- {json_repair-0.46.0 → json_repair-0.46.2}/src/json_repair/object_comparer.py +2 -5
- {json_repair-0.46.0 → json_repair-0.46.2}/src/json_repair/string_file_wrapper.py +9 -17
- {json_repair-0.46.0 → json_repair-0.46.2/src/json_repair.egg-info}/PKG-INFO +3 -2
- {json_repair-0.46.0 → json_repair-0.46.2}/tests/test_json_repair.py +65 -170
- {json_repair-0.46.0 → json_repair-0.46.2}/tests/test_performance.py +14 -30
- json_repair-0.46.0/src/json_repair/__init__.py +0 -4
- {json_repair-0.46.0 → json_repair-0.46.2}/LICENSE +0 -0
- {json_repair-0.46.0 → json_repair-0.46.2}/setup.cfg +0 -0
- {json_repair-0.46.0 → json_repair-0.46.2}/src/json_repair/__main__.py +0 -0
- {json_repair-0.46.0 → json_repair-0.46.2}/src/json_repair/json_context.py +0 -0
- {json_repair-0.46.0 → json_repair-0.46.2}/src/json_repair/py.typed +0 -0
- {json_repair-0.46.0 → json_repair-0.46.2}/src/json_repair.egg-info/SOURCES.txt +0 -0
- {json_repair-0.46.0 → json_repair-0.46.2}/src/json_repair.egg-info/dependency_links.txt +0 -0
- {json_repair-0.46.0 → json_repair-0.46.2}/src/json_repair.egg-info/entry_points.txt +0 -0
- {json_repair-0.46.0 → json_repair-0.46.2}/src/json_repair.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.46.
|
3
|
+
Version: 0.46.2
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -38,7 +38,7 @@ License-File: LICENSE
|
|
38
38
|
Dynamic: license-file
|
39
39
|
|
40
40
|
[](https://pypi.org/project/json-repair/)
|
41
|
-

|
42
42
|
[](https://pypi.org/project/json-repair/)
|
43
43
|
[](https://github.com/sponsors/mangiucugna)
|
44
44
|
[](https://github.com/mangiucugna/json_repair/stargazers)
|
@@ -289,6 +289,7 @@ You will need owner access to this repository
|
|
289
289
|
- Typescript: https://github.com/josdejong/jsonrepair
|
290
290
|
- Go: https://github.com/RealAlexandreAI/json-repair
|
291
291
|
- Ruby: https://github.com/sashazykov/json-repair-rb
|
292
|
+
- Rust: https://github.com/oramasearch/llm_json
|
292
293
|
---
|
293
294
|
## Star History
|
294
295
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
[](https://pypi.org/project/json-repair/)
|
2
|
-

|
3
3
|
[](https://pypi.org/project/json-repair/)
|
4
4
|
[](https://github.com/sponsors/mangiucugna)
|
5
5
|
[](https://github.com/mangiucugna/json_repair/stargazers)
|
@@ -250,6 +250,7 @@ You will need owner access to this repository
|
|
250
250
|
- Typescript: https://github.com/josdejong/jsonrepair
|
251
251
|
- Go: https://github.com/RealAlexandreAI/json-repair
|
252
252
|
- Ruby: https://github.com/sashazykov/json-repair-rb
|
253
|
+
- Rust: https://github.com/oramasearch/llm_json
|
253
254
|
---
|
254
255
|
## Star History
|
255
256
|
|
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
4
4
|
[project]
|
5
5
|
name = "json_repair"
|
6
|
-
version = "0.46.
|
6
|
+
version = "0.46.2"
|
7
7
|
license = {file = "LICENSE"}
|
8
8
|
authors = [
|
9
9
|
{ name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
|
@@ -53,26 +53,32 @@ json_repair = "json_repair.__main__:cli"
|
|
53
53
|
[tool.ruff]
|
54
54
|
# Same as Black.
|
55
55
|
indent-width = 4
|
56
|
-
|
56
|
+
line-length = 120
|
57
57
|
# Assume Python 3.13
|
58
58
|
target-version = "py313"
|
59
59
|
[tool.ruff.lint]
|
60
60
|
# Read more here https://docs.astral.sh/ruff/rules/
|
61
61
|
# By default, Ruff enables Flake8's E and F rules
|
62
|
+
# FastAPI - FAST
|
62
63
|
# Flake8-bandit - S
|
63
64
|
# Flake8-bugbear – catches real-world Python footguns - B
|
64
65
|
# Flake8-builtins - A
|
66
|
+
# Flake8-comprehensions - C4
|
67
|
+
# Flake8-commas - COM
|
68
|
+
# Flake8-quotes - Q
|
65
69
|
# Flake8-tidy-imports - TID
|
70
|
+
# Flake8-unused-arguments - ARG
|
66
71
|
# Isort - I
|
67
72
|
# Mccabe – code complexity warnings - C90
|
73
|
+
# PEP 8 Naming convention - N
|
68
74
|
# Pycodestyle - E, W
|
69
75
|
# Pyflakes - F
|
70
|
-
# Pylint - PLC, PLE, PLW
|
76
|
+
# Pylint - PLC, PLE, PLR, PLW
|
77
|
+
# PyTest - PT
|
78
|
+
# Pyupgrade – safe modernization (e.g., str() → f"") - UP
|
71
79
|
# Ruff specific - RUF
|
72
80
|
# Simplifications (e.g., if x == True → if x) - SIM
|
73
|
-
|
74
|
-
|
75
|
-
select = ['A', 'B', 'C90', 'E', 'F', 'I', 'PLC', 'PLE', 'PLW', 'S', 'SIM', 'TID', 'UP', 'W']
|
81
|
+
select = ['A', 'ARG', 'B', 'C4', 'COM', 'C90', 'E', 'F', 'I', 'N', 'PLC', 'PLE', 'PLW', 'PT', 'Q', 'S', 'SIM', 'TID', 'UP', 'W']
|
76
82
|
# Only enable these RUF rules
|
77
83
|
extend-select = [
|
78
84
|
"RUF001", # ambiguous Unicode
|
@@ -81,8 +87,12 @@ extend-select = [
|
|
81
87
|
"RUF016", # unnecessary else after return (optional)
|
82
88
|
"RUF018", # unnecessary else after raise (optional)
|
83
89
|
]
|
84
|
-
ignore = [
|
85
|
-
|
90
|
+
ignore = [
|
91
|
+
"S101", # assert: Use of assert detected. We like assert
|
92
|
+
"COM812", # Ruff: The following rule may cause conflicts when used with the formatter
|
93
|
+
"E501", # Line too long
|
94
|
+
"C901", # `function` is too complex
|
95
|
+
]
|
86
96
|
# Allow fix for all enabled rules (when `--fix`) is provided.
|
87
97
|
fixable = ["ALL"]
|
88
98
|
unfixable = []
|
@@ -101,4 +111,4 @@ line-ending = "auto"
|
|
101
111
|
|
102
112
|
[tool.ruff.lint.per-file-ignores]
|
103
113
|
# Explicit re-exports is fine in __init__.py, still a code smell elsewhere.
|
104
|
-
"__init__.py" = ["PLC0414"]
|
114
|
+
"__init__.py" = ["PLC0414"]
|
@@ -41,7 +41,7 @@ class JSONParser:
|
|
41
41
|
self.log = self._log
|
42
42
|
else:
|
43
43
|
# No-op
|
44
|
-
self.log = lambda *args, **kwargs: None
|
44
|
+
self.log = lambda *args, **kwargs: None # noqa: ARG005
|
45
45
|
# When the json to be repaired is the accumulation of streaming json at a certain moment.
|
46
46
|
# e.g. json obtained from llm response.
|
47
47
|
# If this parameter to True will keep the repair results stable. For example:
|
@@ -67,6 +67,9 @@ class JSONParser:
|
|
67
67
|
# replace the last entry with the new one since the new one seems an update
|
68
68
|
json.pop()
|
69
69
|
json.append(j)
|
70
|
+
else:
|
71
|
+
# this was a bust, move the index
|
72
|
+
self.index += 1
|
70
73
|
# If nothing extra was found, don't return an array
|
71
74
|
if len(json) == 1:
|
72
75
|
self.log(
|
@@ -102,14 +105,10 @@ class JSONParser:
|
|
102
105
|
)
|
103
106
|
return ""
|
104
107
|
# <string> starts with a quote
|
105
|
-
elif not self.context.empty and (
|
106
|
-
char in self.STRING_DELIMITERS or char.isalpha()
|
107
|
-
):
|
108
|
+
elif not self.context.empty and (char in self.STRING_DELIMITERS or char.isalpha()):
|
108
109
|
return self.parse_string()
|
109
110
|
# <number> starts with [0-9] or minus
|
110
|
-
elif not self.context.empty and (
|
111
|
-
char.isdigit() or char == "-" or char == "."
|
112
|
-
):
|
111
|
+
elif not self.context.empty and (char.isdigit() or char == "-" or char == "."):
|
113
112
|
return self.parse_number()
|
114
113
|
elif char in ["#", "/"]:
|
115
114
|
return self.parse_comment()
|
@@ -161,8 +160,7 @@ class JSONParser:
|
|
161
160
|
if isinstance(prev_value, list):
|
162
161
|
prev_value.extend(
|
163
162
|
new_array[0]
|
164
|
-
if len(new_array) == 1
|
165
|
-
and isinstance(new_array[0], list)
|
163
|
+
if len(new_array) == 1 and isinstance(new_array[0], list)
|
166
164
|
else new_array
|
167
165
|
)
|
168
166
|
self.skip_whitespaces_at()
|
@@ -182,11 +180,7 @@ class JSONParser:
|
|
182
180
|
)
|
183
181
|
self.index = rollback_index - 1
|
184
182
|
# add an opening curly brace to make this work
|
185
|
-
self.json_str =
|
186
|
-
self.json_str[: self.index + 1]
|
187
|
-
+ "{"
|
188
|
-
+ self.json_str[self.index + 1 :]
|
189
|
-
)
|
183
|
+
self.json_str = self.json_str[: self.index + 1] + "{" + self.json_str[self.index + 1 :]
|
190
184
|
break
|
191
185
|
|
192
186
|
# Skip filler whitespaces
|
@@ -239,10 +233,7 @@ class JSONParser:
|
|
239
233
|
i = 1
|
240
234
|
i = self.skip_to_character(char, i)
|
241
235
|
i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
|
242
|
-
if self.get_char_at(i) == ":"
|
243
|
-
value = self.parse_object()
|
244
|
-
else:
|
245
|
-
value = self.parse_string()
|
236
|
+
value = self.parse_object() if self.get_char_at(i) == ":" else self.parse_string()
|
246
237
|
else:
|
247
238
|
value = self.parse_json()
|
248
239
|
|
@@ -304,10 +295,7 @@ class JSONParser:
|
|
304
295
|
elif char.isalnum():
|
305
296
|
# This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
|
306
297
|
# But remember, object keys are only of type string
|
307
|
-
if (
|
308
|
-
char.lower() in ["t", "f", "n"]
|
309
|
-
and self.context.current != ContextValues.OBJECT_KEY
|
310
|
-
):
|
298
|
+
if char.lower() in ["t", "f", "n"] and self.context.current != ContextValues.OBJECT_KEY:
|
311
299
|
value = self.parse_boolean_or_null()
|
312
300
|
if value != "":
|
313
301
|
return value
|
@@ -320,15 +308,9 @@ class JSONParser:
|
|
320
308
|
self.index += 1
|
321
309
|
|
322
310
|
# There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
|
323
|
-
if (
|
324
|
-
self.get_char_at() in self.STRING_DELIMITERS
|
325
|
-
and self.get_char_at() == lstring_delimiter
|
326
|
-
):
|
311
|
+
if self.get_char_at() in self.STRING_DELIMITERS and self.get_char_at() == lstring_delimiter:
|
327
312
|
# If it's an empty key, this was easy
|
328
|
-
if (
|
329
|
-
self.context.current == ContextValues.OBJECT_KEY
|
330
|
-
and self.get_char_at(1) == ":"
|
331
|
-
):
|
313
|
+
if self.context.current == ContextValues.OBJECT_KEY and self.get_char_at(1) == ":":
|
332
314
|
self.index += 1
|
333
315
|
return ""
|
334
316
|
if self.get_char_at(1) == lstring_delimiter:
|
@@ -377,11 +359,7 @@ class JSONParser:
|
|
377
359
|
char = self.get_char_at()
|
378
360
|
unmatched_delimiter = False
|
379
361
|
while char and char != rstring_delimiter:
|
380
|
-
if (
|
381
|
-
missing_quotes
|
382
|
-
and self.context.current == ContextValues.OBJECT_KEY
|
383
|
-
and (char == ":" or char.isspace())
|
384
|
-
):
|
362
|
+
if missing_quotes and self.context.current == ContextValues.OBJECT_KEY and (char == ":" or char.isspace()):
|
385
363
|
self.log(
|
386
364
|
"While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
|
387
365
|
)
|
@@ -418,9 +396,7 @@ class JSONParser:
|
|
418
396
|
else:
|
419
397
|
# But again, this could just be something a bit stupid like "lorem, "ipsum" sic"
|
420
398
|
# Check if we find a : afterwards (skipping space)
|
421
|
-
i = self.skip_whitespaces_at(
|
422
|
-
idx=i + 1, move_main_index=False
|
423
|
-
)
|
399
|
+
i = self.skip_whitespaces_at(idx=i + 1, move_main_index=False)
|
424
400
|
next_c = self.get_char_at(i)
|
425
401
|
if next_c and next_c != ":":
|
426
402
|
rstring_delimiter_missing = False
|
@@ -483,12 +459,19 @@ class JSONParser:
|
|
483
459
|
string_acc += escape_seqs.get(char, char) or char
|
484
460
|
self.index += 1
|
485
461
|
char = self.get_char_at()
|
462
|
+
elif char in ["u", "x"]:
|
463
|
+
# If we find a unicode escape sequence, normalize it
|
464
|
+
num_chars = 4 if char == "u" else 2
|
465
|
+
next_chars = self.json_str[self.index + 1 : self.index + 1 + num_chars]
|
466
|
+
if len(next_chars) == num_chars and all(c in "0123456789abcdefABCDEF" for c in next_chars):
|
467
|
+
self.log("Found a unicode escape sequence, normalizing it")
|
468
|
+
string_acc = string_acc[:-1]
|
469
|
+
string_acc += chr(int(next_chars, 16))
|
470
|
+
self.index += 1 + num_chars
|
471
|
+
char = self.get_char_at()
|
472
|
+
continue
|
486
473
|
# If we are in object key context and we find a colon, it could be a missing right quote
|
487
|
-
if
|
488
|
-
char == ":"
|
489
|
-
and not missing_quotes
|
490
|
-
and self.context.current == ContextValues.OBJECT_KEY
|
491
|
-
):
|
474
|
+
if char == ":" and not missing_quotes and self.context.current == ContextValues.OBJECT_KEY:
|
492
475
|
# Ok now we need to check if this is followed by a value like "..."
|
493
476
|
i = self.skip_to_character(character=lstring_delimiter, idx=1)
|
494
477
|
next_c = self.get_char_at(i)
|
@@ -519,14 +502,9 @@ class JSONParser:
|
|
519
502
|
if char == rstring_delimiter:
|
520
503
|
# Special case here, in case of double quotes one after another
|
521
504
|
if doubled_quotes and self.get_char_at(1) == rstring_delimiter:
|
522
|
-
self.log(
|
523
|
-
"While parsing a string, we found a doubled quote, ignoring it"
|
524
|
-
)
|
505
|
+
self.log("While parsing a string, we found a doubled quote, ignoring it")
|
525
506
|
self.index += 1
|
526
|
-
elif
|
527
|
-
missing_quotes
|
528
|
-
and self.context.current == ContextValues.OBJECT_VALUE
|
529
|
-
):
|
507
|
+
elif missing_quotes and self.context.current == ContextValues.OBJECT_VALUE:
|
530
508
|
# In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
|
531
509
|
i = 1
|
532
510
|
next_c = self.get_char_at(i)
|
@@ -570,18 +548,9 @@ class JSONParser:
|
|
570
548
|
check_comma_in_object_value = False
|
571
549
|
# If we are in an object context, let's check for the right delimiters
|
572
550
|
if (
|
573
|
-
(
|
574
|
-
|
575
|
-
|
576
|
-
)
|
577
|
-
or (
|
578
|
-
ContextValues.OBJECT_VALUE in self.context.context
|
579
|
-
and next_c == "}"
|
580
|
-
)
|
581
|
-
or (
|
582
|
-
ContextValues.ARRAY in self.context.context
|
583
|
-
and next_c in ["]", ","]
|
584
|
-
)
|
551
|
+
(ContextValues.OBJECT_KEY in self.context.context and next_c in [":", "}"])
|
552
|
+
or (ContextValues.OBJECT_VALUE in self.context.context and next_c == "}")
|
553
|
+
or (ContextValues.ARRAY in self.context.context and next_c in ["]", ","])
|
585
554
|
or (
|
586
555
|
check_comma_in_object_value
|
587
556
|
and self.context.current == ContextValues.OBJECT_VALUE
|
@@ -592,10 +561,7 @@ class JSONParser:
|
|
592
561
|
i += 1
|
593
562
|
next_c = self.get_char_at(i)
|
594
563
|
# If we stopped for a comma in object_value context, let's check if find a "} at the end of the string
|
595
|
-
if
|
596
|
-
next_c == ","
|
597
|
-
and self.context.current == ContextValues.OBJECT_VALUE
|
598
|
-
):
|
564
|
+
if next_c == "," and self.context.current == ContextValues.OBJECT_VALUE:
|
599
565
|
i += 1
|
600
566
|
i = self.skip_to_character(character=rstring_delimiter, idx=i)
|
601
567
|
next_c = self.get_char_at(i)
|
@@ -603,29 +569,20 @@ class JSONParser:
|
|
603
569
|
i += 1
|
604
570
|
i = self.skip_whitespaces_at(idx=i, move_main_index=False)
|
605
571
|
next_c = self.get_char_at(i)
|
606
|
-
elif (
|
607
|
-
next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\"
|
608
|
-
):
|
572
|
+
elif next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\":
|
609
573
|
# Check if self.index:self.index+i is only whitespaces, break if that's the case
|
610
|
-
if all(
|
611
|
-
str(self.get_char_at(j)).isspace()
|
612
|
-
for j in range(1, i)
|
613
|
-
if self.get_char_at(j)
|
614
|
-
):
|
574
|
+
if all(str(self.get_char_at(j)).isspace() for j in range(1, i) if self.get_char_at(j)):
|
615
575
|
break
|
616
576
|
if self.context.current == ContextValues.OBJECT_VALUE:
|
617
577
|
# But this might not be it! This could be just a missing comma
|
618
578
|
# We found a delimiter and we need to check if this is a key
|
619
579
|
# so find a rstring_delimiter and a colon after
|
620
|
-
i = self.skip_to_character(
|
621
|
-
character=rstring_delimiter, idx=i + 1
|
622
|
-
)
|
580
|
+
i = self.skip_to_character(character=rstring_delimiter, idx=i + 1)
|
623
581
|
i += 1
|
624
582
|
next_c = self.get_char_at(i)
|
625
583
|
while next_c and next_c != ":":
|
626
584
|
if next_c in [",", "]", "}"] or (
|
627
|
-
next_c == rstring_delimiter
|
628
|
-
and self.get_char_at(i - 1) != "\\"
|
585
|
+
next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\"
|
629
586
|
):
|
630
587
|
break
|
631
588
|
i += 1
|
@@ -658,12 +615,7 @@ class JSONParser:
|
|
658
615
|
string_acc += str(char)
|
659
616
|
self.index += 1
|
660
617
|
char = self.get_char_at()
|
661
|
-
if (
|
662
|
-
char
|
663
|
-
and missing_quotes
|
664
|
-
and self.context.current == ContextValues.OBJECT_KEY
|
665
|
-
and char.isspace()
|
666
|
-
):
|
618
|
+
if char and missing_quotes and self.context.current == ContextValues.OBJECT_KEY and char.isspace():
|
667
619
|
self.log(
|
668
620
|
"While parsing a string, handling an extreme corner case in which the LLM added a comment instead of valid string, invalidate the string and return an empty value",
|
669
621
|
)
|
@@ -683,9 +635,7 @@ class JSONParser:
|
|
683
635
|
else:
|
684
636
|
self.index += 1
|
685
637
|
|
686
|
-
if not self.stream_stable and (
|
687
|
-
missing_quotes or (string_acc and string_acc[-1] == "\n")
|
688
|
-
):
|
638
|
+
if not self.stream_stable and (missing_quotes or (string_acc and string_acc[-1] == "\n")):
|
689
639
|
# Clean the whitespaces for some corner cases
|
690
640
|
string_acc = string_acc.rstrip()
|
691
641
|
|
@@ -793,9 +743,7 @@ class JSONParser:
|
|
793
743
|
while True:
|
794
744
|
char = self.get_char_at()
|
795
745
|
if not char:
|
796
|
-
self.log(
|
797
|
-
"Reached end-of-string while parsing block comment; unclosed block comment."
|
798
|
-
)
|
746
|
+
self.log("Reached end-of-string while parsing block comment; unclosed block comment.")
|
799
747
|
break
|
800
748
|
comment += char
|
801
749
|
self.index += 1
|
@@ -236,10 +236,7 @@ def cli(inline_args: list[str] | None = None) -> int:
|
|
236
236
|
help="Number of spaces for indentation (Default 2)",
|
237
237
|
)
|
238
238
|
|
239
|
-
if inline_args is None
|
240
|
-
args = parser.parse_args()
|
241
|
-
else:
|
242
|
-
args = parser.parse_args(inline_args)
|
239
|
+
args = parser.parse_args() if inline_args is None else parser.parse_args(inline_args)
|
243
240
|
|
244
241
|
# Inline mode requires a filename, so error out if none was provided.
|
245
242
|
if args.inline and not args.filename: # pragma: no cover
|
@@ -6,7 +6,7 @@ class ObjectComparer: # pragma: no cover
|
|
6
6
|
pass # No operation performed in the constructor
|
7
7
|
|
8
8
|
@staticmethod
|
9
|
-
def is_same_object(obj1: Any, obj2: Any
|
9
|
+
def is_same_object(obj1: Any, obj2: Any) -> bool:
|
10
10
|
"""
|
11
11
|
Recursively compares two objects and ensures that:
|
12
12
|
- Their types match
|
@@ -30,10 +30,7 @@ class ObjectComparer: # pragma: no cover
|
|
30
30
|
elif isinstance(obj1, list):
|
31
31
|
if len(obj1) != len(obj2):
|
32
32
|
return False
|
33
|
-
for i in range(len(obj1))
|
34
|
-
if not ObjectComparer.is_same_object(obj1[i], obj2[i]):
|
35
|
-
return False
|
36
|
-
return True
|
33
|
+
return all(ObjectComparer.is_same_object(obj1[i], obj2[i]) for i in range(len(obj1)))
|
37
34
|
|
38
35
|
# For atoms: types already match, so just return True
|
39
36
|
return True
|
@@ -4,7 +4,7 @@ from typing import TextIO
|
|
4
4
|
|
5
5
|
class StringFileWrapper:
|
6
6
|
# This is a trick to simplify the code, transform the filedescriptor handling into a string handling
|
7
|
-
def __init__(self, fd: TextIO,
|
7
|
+
def __init__(self, fd: TextIO, chunk_length: int) -> None:
|
8
8
|
"""
|
9
9
|
Initialize the StringFileWrapper with a file descriptor and chunk length.
|
10
10
|
|
@@ -23,10 +23,10 @@ class StringFileWrapper:
|
|
23
23
|
# Buffers are 1MB strings that are read from the file
|
24
24
|
# and kept in memory to keep reads low
|
25
25
|
self.buffers: dict[int, str] = {}
|
26
|
-
#
|
27
|
-
if not
|
28
|
-
|
29
|
-
self.buffer_length =
|
26
|
+
# chunk_length is in bytes
|
27
|
+
if not chunk_length or chunk_length < 2:
|
28
|
+
chunk_length = 1_000_000
|
29
|
+
self.buffer_length = chunk_length
|
30
30
|
|
31
31
|
def get_buffer(self, index: int) -> str:
|
32
32
|
"""
|
@@ -65,19 +65,11 @@ class StringFileWrapper:
|
|
65
65
|
buffer_index = index.start // self.buffer_length
|
66
66
|
buffer_end = index.stop // self.buffer_length
|
67
67
|
if buffer_index == buffer_end:
|
68
|
-
return self.get_buffer(buffer_index)[
|
69
|
-
index.start % self.buffer_length : index.stop % self.buffer_length
|
70
|
-
]
|
68
|
+
return self.get_buffer(buffer_index)[index.start % self.buffer_length : index.stop % self.buffer_length]
|
71
69
|
else:
|
72
|
-
start_slice = self.get_buffer(buffer_index)[
|
73
|
-
|
74
|
-
]
|
75
|
-
end_slice = self.get_buffer(buffer_end)[
|
76
|
-
: index.stop % self.buffer_length
|
77
|
-
]
|
78
|
-
middle_slices = [
|
79
|
-
self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
|
80
|
-
]
|
70
|
+
start_slice = self.get_buffer(buffer_index)[index.start % self.buffer_length :]
|
71
|
+
end_slice = self.get_buffer(buffer_end)[: index.stop % self.buffer_length]
|
72
|
+
middle_slices = [self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)]
|
81
73
|
return start_slice + "".join(middle_slices) + end_slice
|
82
74
|
else:
|
83
75
|
buffer_index = index // self.buffer_length
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: json_repair
|
3
|
-
Version: 0.46.
|
3
|
+
Version: 0.46.2
|
4
4
|
Summary: A package to repair broken json strings
|
5
5
|
Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -38,7 +38,7 @@ License-File: LICENSE
|
|
38
38
|
Dynamic: license-file
|
39
39
|
|
40
40
|
[](https://pypi.org/project/json-repair/)
|
41
|
-

|
42
42
|
[](https://pypi.org/project/json-repair/)
|
43
43
|
[](https://github.com/sponsors/mangiucugna)
|
44
44
|
[](https://github.com/mangiucugna/json_repair/stargazers)
|
@@ -289,6 +289,7 @@ You will need owner access to this repository
|
|
289
289
|
- Typescript: https://github.com/josdejong/jsonrepair
|
290
290
|
- Go: https://github.com/RealAlexandreAI/json-repair
|
291
291
|
- Ruby: https://github.com/sashazykov/json-repair-rb
|
292
|
+
- Rust: https://github.com/oramasearch/llm_json
|
292
293
|
---
|
293
294
|
## Star History
|
294
295
|
|
@@ -17,9 +17,11 @@ def test_basic_types_valid():
|
|
17
17
|
assert repair_json("[]", return_objects=True) == []
|
18
18
|
assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
|
19
19
|
assert repair_json("{}", return_objects=True) == {}
|
20
|
-
assert repair_json(
|
21
|
-
|
22
|
-
|
20
|
+
assert repair_json('{ "key": "value", "key2": 1, "key3": True }', return_objects=True) == {
|
21
|
+
"key": "value",
|
22
|
+
"key2": 1,
|
23
|
+
"key3": True,
|
24
|
+
}
|
23
25
|
|
24
26
|
|
25
27
|
def test_basic_types_invalid():
|
@@ -30,9 +32,11 @@ def test_basic_types_invalid():
|
|
30
32
|
assert repair_json("[", return_objects=True) == []
|
31
33
|
assert repair_json("[1, 2, 3, 4", return_objects=True) == [1, 2, 3, 4]
|
32
34
|
assert repair_json("{", return_objects=True) == {}
|
33
|
-
assert repair_json(
|
34
|
-
|
35
|
-
|
35
|
+
assert repair_json('{ "key": value, "key2": 1 "key3": null }', return_objects=True) == {
|
36
|
+
"key": "value",
|
37
|
+
"key2": 1,
|
38
|
+
"key3": None,
|
39
|
+
}
|
36
40
|
|
37
41
|
|
38
42
|
def test_valid_json():
|
@@ -40,26 +44,13 @@ def test_valid_json():
|
|
40
44
|
repair_json('{"name": "John", "age": 30, "city": "New York"}')
|
41
45
|
== '{"name": "John", "age": 30, "city": "New York"}'
|
42
46
|
)
|
43
|
-
assert (
|
44
|
-
repair_json('{"employees":["John", "Anna", "Peter"]} ')
|
45
|
-
== '{"employees": ["John", "Anna", "Peter"]}'
|
46
|
-
)
|
47
|
+
assert repair_json('{"employees":["John", "Anna", "Peter"]} ') == '{"employees": ["John", "Anna", "Peter"]}'
|
47
48
|
assert repair_json('{"key": "value:value"}') == '{"key": "value:value"}'
|
48
|
-
assert (
|
49
|
-
|
50
|
-
== '{"text": "The quick brown fox,"}'
|
51
|
-
)
|
52
|
-
assert (
|
53
|
-
repair_json('{"text": "The quick brown fox won\'t jump"}')
|
54
|
-
== '{"text": "The quick brown fox won\'t jump"}'
|
55
|
-
)
|
49
|
+
assert repair_json('{"text": "The quick brown fox,"}') == '{"text": "The quick brown fox,"}'
|
50
|
+
assert repair_json('{"text": "The quick brown fox won\'t jump"}') == '{"text": "The quick brown fox won\'t jump"}'
|
56
51
|
assert repair_json('{"key": ""') == '{"key": ""}'
|
57
|
-
assert (
|
58
|
-
|
59
|
-
)
|
60
|
-
assert (
|
61
|
-
repair_json('{"key": 12345678901234567890}') == '{"key": 12345678901234567890}'
|
62
|
-
)
|
52
|
+
assert repair_json('{"key1": {"key2": [1, 2, 3]}}') == '{"key1": {"key2": [1, 2, 3]}}'
|
53
|
+
assert repair_json('{"key": 12345678901234567890}') == '{"key": 12345678901234567890}'
|
63
54
|
assert repair_json('{"key": "value\u263a"}') == '{"key": "value\\u263a"}'
|
64
55
|
assert repair_json('{"key": "value\\nvalue"}') == '{"key": "value\\nvalue"}'
|
65
56
|
|
@@ -87,21 +78,13 @@ def test_general_edge_cases():
|
|
87
78
|
|
88
79
|
|
89
80
|
def test_mixed_data_types():
|
90
|
-
assert (
|
91
|
-
|
92
|
-
== '{"key": true, "key2": false, "key3": null}'
|
93
|
-
)
|
94
|
-
assert (
|
95
|
-
repair_json('{"key": TRUE, "key2": FALSE, "key3": Null} ')
|
96
|
-
== '{"key": true, "key2": false, "key3": null}'
|
97
|
-
)
|
81
|
+
assert repair_json(' {"key": true, "key2": false, "key3": null}') == '{"key": true, "key2": false, "key3": null}'
|
82
|
+
assert repair_json('{"key": TRUE, "key2": FALSE, "key3": Null} ') == '{"key": true, "key2": false, "key3": null}'
|
98
83
|
|
99
84
|
|
100
85
|
def test_missing_and_mixed_quotes():
|
101
86
|
assert (
|
102
|
-
repair_json(
|
103
|
-
"{'key': 'string', 'key2': false, \"key3\": null, \"key4\": unquoted}"
|
104
|
-
)
|
87
|
+
repair_json("{'key': 'string', 'key2': false, \"key3\": null, \"key4\": unquoted}")
|
105
88
|
== '{"key": "string", "key2": false, "key3": null, "key4": "unquoted"}'
|
106
89
|
)
|
107
90
|
assert (
|
@@ -120,14 +103,8 @@ def test_missing_and_mixed_quotes():
|
|
120
103
|
repair_json('{"name": John, "age": 30, "city": "New York"}')
|
121
104
|
== '{"name": "John", "age": 30, "city": "New York"}'
|
122
105
|
)
|
123
|
-
assert (
|
124
|
-
|
125
|
-
== '{"slanted_delimiter": "value"}'
|
126
|
-
)
|
127
|
-
assert (
|
128
|
-
repair_json('{"name": "John", "age": 30, "city": "New')
|
129
|
-
== '{"name": "John", "age": 30, "city": "New"}'
|
130
|
-
)
|
106
|
+
assert repair_json('{“slanted_delimiter”: "value"}') == '{"slanted_delimiter": "value"}'
|
107
|
+
assert repair_json('{"name": "John", "age": 30, "city": "New') == '{"name": "John", "age": 30, "city": "New"}'
|
131
108
|
assert (
|
132
109
|
repair_json('{"name": "John", "age": 30, "city": "New York, "gender": "male"}')
|
133
110
|
== '{"name": "John", "age": 30, "city": "New York", "gender": "male"}'
|
@@ -138,18 +115,12 @@ def test_missing_and_mixed_quotes():
|
|
138
115
|
== '[{"key": "value", "notes": "lorem \\"ipsum\\", sic."}]'
|
139
116
|
)
|
140
117
|
assert repair_json('{"key": ""value"}') == '{"key": "value"}'
|
141
|
-
assert (
|
142
|
-
repair_json('{"key": "value", 5: "value"}') == '{"key": "value", "5": "value"}'
|
143
|
-
)
|
118
|
+
assert repair_json('{"key": "value", 5: "value"}') == '{"key": "value", "5": "value"}'
|
144
119
|
assert repair_json('{"foo": "\\"bar\\""') == '{"foo": "\\"bar\\""}'
|
145
120
|
assert repair_json('{"" key":"val"') == '{" key": "val"}'
|
121
|
+
assert repair_json('{"key": value "key2" : "value2" ') == '{"key": "value", "key2": "value2"}'
|
146
122
|
assert (
|
147
|
-
repair_json('{"key":
|
148
|
-
== '{"key": "value", "key2": "value2"}'
|
149
|
-
)
|
150
|
-
assert (
|
151
|
-
repair_json('{"key": "lorem ipsum ... "sic " tamet. ...}')
|
152
|
-
== '{"key": "lorem ipsum ... \\"sic \\" tamet. ..."}'
|
123
|
+
repair_json('{"key": "lorem ipsum ... "sic " tamet. ...}') == '{"key": "lorem ipsum ... \\"sic \\" tamet. ..."}'
|
153
124
|
)
|
154
125
|
assert repair_json('{"key": value , }') == '{"key": "value"}'
|
155
126
|
assert (
|
@@ -166,14 +137,8 @@ def test_array_edge_cases():
|
|
166
137
|
assert repair_json("[1, 2, '...', 3]") == '[1, 2, "...", 3]'
|
167
138
|
assert repair_json("[true, false, null, ...]") == "[true, false, null]"
|
168
139
|
assert repair_json('["a" "b" "c" 1') == '["a", "b", "c", 1]'
|
169
|
-
assert (
|
170
|
-
|
171
|
-
== '{"employees": ["John", "Anna"]}'
|
172
|
-
)
|
173
|
-
assert (
|
174
|
-
repair_json('{"employees":["John", "Anna", "Peter')
|
175
|
-
== '{"employees": ["John", "Anna", "Peter"]}'
|
176
|
-
)
|
140
|
+
assert repair_json('{"employees":["John", "Anna",') == '{"employees": ["John", "Anna"]}'
|
141
|
+
assert repair_json('{"employees":["John", "Anna", "Peter') == '{"employees": ["John", "Anna", "Peter"]}'
|
177
142
|
assert repair_json('{"key1": {"key2": [1, 2, 3') == '{"key1": {"key2": [1, 2, 3]}}'
|
178
143
|
assert repair_json('{"key": ["value]}') == '{"key": ["value"]}'
|
179
144
|
assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
|
@@ -182,14 +147,9 @@ def test_array_edge_cases():
|
|
182
147
|
== '{"key1": ["value1", "value2"], "key2": ["value3", "value4"]}'
|
183
148
|
)
|
184
149
|
assert repair_json('[ "value", /* comment */ "value2" ]') == '["value", "value2"]'
|
150
|
+
assert repair_json('{"key": ["value" "value1" "value2"]}') == '{"key": ["value", "value1", "value2"]}'
|
185
151
|
assert (
|
186
|
-
repair_json('{"key": ["
|
187
|
-
== '{"key": ["value", "value1", "value2"]}'
|
188
|
-
)
|
189
|
-
assert (
|
190
|
-
repair_json(
|
191
|
-
'{"key": ["lorem "ipsum" dolor "sit" amet, "consectetur" ", "lorem "ipsum" dolor", "lorem"]}'
|
192
|
-
)
|
152
|
+
repair_json('{"key": ["lorem "ipsum" dolor "sit" amet, "consectetur" ", "lorem "ipsum" dolor", "lorem"]}')
|
193
153
|
== '{"key": ["lorem \\"ipsum\\" dolor \\"sit\\" amet, \\"consectetur\\" ", "lorem \\"ipsum\\" dolor", "lorem"]}'
|
194
154
|
)
|
195
155
|
assert repair_json('{"k"e"y": "value"}') == '{"k\\"e\\"y": "value"}'
|
@@ -198,9 +158,7 @@ def test_array_edge_cases():
|
|
198
158
|
|
199
159
|
def test_escaping():
|
200
160
|
assert repair_json("'\"'") == ""
|
201
|
-
assert (
|
202
|
-
repair_json('{"key": \'string"\n\t\\le\'') == '{"key": "string\\"\\n\\t\\\\le"}'
|
203
|
-
)
|
161
|
+
assert repair_json('{"key": \'string"\n\t\\le\'') == '{"key": "string\\"\\n\\t\\\\le"}'
|
204
162
|
assert (
|
205
163
|
repair_json(
|
206
164
|
r'{"real_content": "Some string: Some other string \t Some string <a href=\"https://domain.com\">Some link</a>"'
|
@@ -209,26 +167,21 @@ def test_escaping():
|
|
209
167
|
)
|
210
168
|
assert repair_json('{"key_1\n": "value"}') == '{"key_1": "value"}'
|
211
169
|
assert repair_json('{"key\t_": "value"}') == '{"key\\t_": "value"}'
|
170
|
+
assert repair_json("{\"key\": '\u0076\u0061\u006c\u0075\u0065'}") == '{"key": "value"}'
|
171
|
+
assert repair_json('{"key": "\\u0076\\u0061\\u006C\\u0075\\u0065"}', skip_json_loads=True) == '{"key": "value"}'
|
212
172
|
|
213
173
|
|
214
174
|
def test_object_edge_cases():
|
215
175
|
assert repair_json("{ ") == "{}"
|
216
176
|
assert repair_json('{"": "value"') == '{"": "value"}'
|
217
|
-
assert (
|
218
|
-
repair_json('{"value_1": true, COMMENT "value_2": "data"}')
|
219
|
-
== '{"value_1": true, "value_2": "data"}'
|
220
|
-
)
|
177
|
+
assert repair_json('{"value_1": true, COMMENT "value_2": "data"}') == '{"value_1": true, "value_2": "data"}'
|
221
178
|
assert (
|
222
179
|
repair_json('{"value_1": true, SHOULD_NOT_EXIST "value_2": "data" AAAA }')
|
223
180
|
== '{"value_1": true, "value_2": "data"}'
|
224
181
|
)
|
182
|
+
assert repair_json('{"" : true, "key2": "value2"}') == '{"": true, "key2": "value2"}'
|
225
183
|
assert (
|
226
|
-
repair_json(
|
227
|
-
)
|
228
|
-
assert (
|
229
|
-
repair_json(
|
230
|
-
"""{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}"""
|
231
|
-
)
|
184
|
+
repair_json("""{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}""")
|
232
185
|
== '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
|
233
186
|
)
|
234
187
|
assert (
|
@@ -241,47 +194,26 @@ def test_object_edge_cases():
|
|
241
194
|
)
|
242
195
|
assert repair_json("""{ "a" : "{ b": {} }" }""") == '{"a": "{ b"}'
|
243
196
|
assert repair_json("""{"b": "xxxxx" true}""") == '{"b": "xxxxx"}'
|
197
|
+
assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
|
198
|
+
assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
|
244
199
|
assert (
|
245
|
-
repair_json('{"
|
246
|
-
)
|
247
|
-
assert (
|
248
|
-
repair_json('{"lorem": ipsum, sic, datum.",}')
|
249
|
-
== '{"lorem": "ipsum, sic, datum."}'
|
250
|
-
)
|
251
|
-
assert (
|
252
|
-
repair_json(
|
253
|
-
'{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}'
|
254
|
-
)
|
200
|
+
repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}')
|
255
201
|
== '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
|
256
202
|
)
|
257
203
|
assert (
|
258
204
|
repair_json('{"lorem_ipsum": "sic tamet, quick brown fox. }')
|
259
205
|
== '{"lorem_ipsum": "sic tamet, quick brown fox."}'
|
260
206
|
)
|
261
|
-
assert (
|
262
|
-
|
263
|
-
== '{"key": "value", " key2": "value2"}'
|
264
|
-
)
|
265
|
-
assert (
|
266
|
-
repair_json('{"key":value "key2":"value2" }')
|
267
|
-
== '{"key": "value", "key2": "value2"}'
|
268
|
-
)
|
207
|
+
assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", " key2": "value2"}'
|
208
|
+
assert repair_json('{"key":value "key2":"value2" }') == '{"key": "value", "key2": "value2"}'
|
269
209
|
assert (
|
270
210
|
repair_json("{'text': 'words{words in brackets}more words'}")
|
271
211
|
== '{"text": "words{words in brackets}more words"}'
|
272
212
|
)
|
273
|
-
assert (
|
274
|
-
|
275
|
-
== '{"text": "words{words in brackets}"}'
|
276
|
-
)
|
277
|
-
assert (
|
278
|
-
repair_json("{text:words{words in brackets}m}")
|
279
|
-
== '{"text": "words{words in brackets}m"}'
|
280
|
-
)
|
213
|
+
assert repair_json("{text:words{words in brackets}}") == '{"text": "words{words in brackets}"}'
|
214
|
+
assert repair_json("{text:words{words in brackets}m}") == '{"text": "words{words in brackets}m"}'
|
281
215
|
assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
|
282
|
-
assert (
|
283
|
-
repair_json("{key:value,key2:value2}") == '{"key": "value", "key2": "value2"}'
|
284
|
-
)
|
216
|
+
assert repair_json("{key:value,key2:value2}") == '{"key": "value", "key2": "value2"}'
|
285
217
|
assert repair_json('{"key:"value"}') == '{"key": "value"}'
|
286
218
|
assert repair_json('{"key:value}') == '{"key": "value"}'
|
287
219
|
assert (
|
@@ -302,9 +234,7 @@ def test_object_edge_cases():
|
|
302
234
|
)
|
303
235
|
assert repair_json('{ "key": "value" /* comment') == '{"key": "value"}'
|
304
236
|
assert (
|
305
|
-
repair_json(
|
306
|
-
'{ "key": ["arrayvalue"], ["arrayvalue1"], ["arrayvalue2"], "key3": "value3" }'
|
307
|
-
)
|
237
|
+
repair_json('{ "key": ["arrayvalue"], ["arrayvalue1"], ["arrayvalue2"], "key3": "value3" }')
|
308
238
|
== '{"key": ["arrayvalue", "arrayvalue1", "arrayvalue2"], "key3": "value3"}'
|
309
239
|
)
|
310
240
|
assert (
|
@@ -315,15 +245,11 @@ def test_object_edge_cases():
|
|
315
245
|
|
316
246
|
def test_number_edge_cases():
|
317
247
|
assert (
|
318
|
-
repair_json(' - { "test_key": ["test_value", "test_value2"] }')
|
319
|
-
== '{"test_key": ["test_value", "test_value2"]}'
|
248
|
+
repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
|
320
249
|
)
|
321
250
|
assert repair_json('{"key": 1/3}') == '{"key": "1/3"}'
|
322
251
|
assert repair_json('{"key": .25}') == '{"key": 0.25}'
|
323
|
-
assert (
|
324
|
-
repair_json('{"here": "now", "key": 1/3, "foo": "bar"}')
|
325
|
-
== '{"here": "now", "key": "1/3", "foo": "bar"}'
|
326
|
-
)
|
252
|
+
assert repair_json('{"here": "now", "key": 1/3, "foo": "bar"}') == '{"here": "now", "key": "1/3", "foo": "bar"}'
|
327
253
|
assert repair_json('{"key": 12345/67890}') == '{"key": "12345/67890"}'
|
328
254
|
assert repair_json("[105,12") == "[105, 12]"
|
329
255
|
assert repair_json('{"key", 105,12,') == '{"key": "105,12"}'
|
@@ -344,22 +270,14 @@ def test_markdown():
|
|
344
270
|
== '{"content": "[LINK](\\"https://google.com\\")"}'
|
345
271
|
)
|
346
272
|
assert repair_json('{ "content": "[LINK](" }') == '{"content": "[LINK]("}'
|
347
|
-
assert (
|
348
|
-
repair_json('{ "content": "[LINK](", "key": true }')
|
349
|
-
== '{"content": "[LINK](", "key": true}'
|
350
|
-
)
|
273
|
+
assert repair_json('{ "content": "[LINK](", "key": true }') == '{"content": "[LINK](", "key": true}'
|
351
274
|
|
352
275
|
|
353
276
|
def test_leading_trailing_characters():
|
354
277
|
assert repair_json('````{ "key": "value" }```') == '{"key": "value"}'
|
278
|
+
assert repair_json("""{ "a": "", "b": [ { "c": 1} ] \n}```""") == '{"a": "", "b": [{"c": 1}]}'
|
355
279
|
assert (
|
356
|
-
repair_json("
|
357
|
-
== '{"a": "", "b": [{"c": 1}]}'
|
358
|
-
)
|
359
|
-
assert (
|
360
|
-
repair_json(
|
361
|
-
"Based on the information extracted, here is the filled JSON output: ```json { 'a': 'b' } ```"
|
362
|
-
)
|
280
|
+
repair_json("Based on the information extracted, here is the filled JSON output: ```json { 'a': 'b' } ```")
|
363
281
|
== '{"a": "b"}'
|
364
282
|
)
|
365
283
|
assert (
|
@@ -375,40 +293,32 @@ def test_leading_trailing_characters():
|
|
375
293
|
def test_multiple_jsons():
|
376
294
|
assert repair_json("[]{}") == "[[], {}]"
|
377
295
|
assert repair_json("{}[]{}") == "[{}, [], {}]"
|
296
|
+
assert repair_json('{"key":"value"}[1,2,3,True]') == '[{"key": "value"}, [1, 2, 3, true]]'
|
378
297
|
assert (
|
379
|
-
repair_json('{"key":"value"}[1,2,3,True]')
|
298
|
+
repair_json('lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42')
|
380
299
|
== '[{"key": "value"}, [1, 2, 3, true]]'
|
381
300
|
)
|
382
|
-
assert (
|
383
|
-
repair_json(
|
384
|
-
'lorem ```json {"key":"value"} ``` ipsum ```json [1,2,3,True] ``` 42'
|
385
|
-
)
|
386
|
-
== '[{"key": "value"}, [1, 2, 3, true]]'
|
387
|
-
)
|
388
|
-
assert (
|
389
|
-
repair_json('[{"key":"value"}][{"key":"value_after"}]')
|
390
|
-
== '[{"key": "value_after"}]'
|
391
|
-
)
|
301
|
+
assert repair_json('[{"key":"value"}][{"key":"value_after"}]') == '[{"key": "value_after"}]'
|
392
302
|
|
393
303
|
|
394
304
|
def test_repair_json_with_objects():
|
395
305
|
# Test with valid JSON strings
|
396
306
|
assert repair_json("[]", return_objects=True) == []
|
397
307
|
assert repair_json("{}", return_objects=True) == {}
|
398
|
-
assert repair_json(
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
) == {
|
308
|
+
assert repair_json('{"key": true, "key2": false, "key3": null}', return_objects=True) == {
|
309
|
+
"key": True,
|
310
|
+
"key2": False,
|
311
|
+
"key3": None,
|
312
|
+
}
|
313
|
+
assert repair_json('{"name": "John", "age": 30, "city": "New York"}', return_objects=True) == {
|
404
314
|
"name": "John",
|
405
315
|
"age": 30,
|
406
316
|
"city": "New York",
|
407
317
|
}
|
408
318
|
assert repair_json("[1, 2, 3, 4]", return_objects=True) == [1, 2, 3, 4]
|
409
|
-
assert repair_json(
|
410
|
-
|
411
|
-
|
319
|
+
assert repair_json('{"employees":["John", "Anna", "Peter"]} ', return_objects=True) == {
|
320
|
+
"employees": ["John", "Anna", "Peter"]
|
321
|
+
}
|
412
322
|
assert repair_json(
|
413
323
|
"""
|
414
324
|
{
|
@@ -460,9 +370,7 @@ def test_repair_json_with_objects():
|
|
460
370
|
assert repair_json(
|
461
371
|
'{\n"html": "<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>"}',
|
462
372
|
return_objects=True,
|
463
|
-
) == {
|
464
|
-
"html": '<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>'
|
465
|
-
}
|
373
|
+
) == {"html": '<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>'}
|
466
374
|
assert repair_json(
|
467
375
|
"""
|
468
376
|
[
|
@@ -995,10 +903,7 @@ def test_repair_json_from_file():
|
|
995
903
|
|
996
904
|
|
997
905
|
def test_ensure_ascii():
|
998
|
-
assert (
|
999
|
-
repair_json("{'test_中国人_ascii':'统一码'}", ensure_ascii=False)
|
1000
|
-
== '{"test_中国人_ascii": "统一码"}'
|
1001
|
-
)
|
906
|
+
assert repair_json("{'test_中国人_ascii':'统一码'}", ensure_ascii=False) == '{"test_中国人_ascii": "统一码"}'
|
1002
907
|
|
1003
908
|
|
1004
909
|
def test_stream_stable():
|
@@ -1008,24 +913,14 @@ def test_stream_stable():
|
|
1008
913
|
assert repair_json('{"key": "val\\', stream_stable=False) == '{"key": "val\\\\"}'
|
1009
914
|
assert repair_json('{"key": "val\\n', stream_stable=False) == '{"key": "val"}'
|
1010
915
|
assert (
|
1011
|
-
repair_json('{"key": "val\\n123,`key2:value2', stream_stable=False)
|
1012
|
-
== '{"key": "val\\n123", "key2": "value2"}'
|
1013
|
-
)
|
1014
|
-
assert (
|
1015
|
-
repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True)
|
1016
|
-
== '{"key": "val\\n123,`key2:value2`"}'
|
916
|
+
repair_json('{"key": "val\\n123,`key2:value2', stream_stable=False) == '{"key": "val\\n123", "key2": "value2"}'
|
1017
917
|
)
|
918
|
+
assert repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True) == '{"key": "val\\n123,`key2:value2`"}'
|
1018
919
|
# stream_stable = True
|
1019
920
|
assert repair_json('{"key": "val\\', stream_stable=True) == '{"key": "val"}'
|
1020
921
|
assert repair_json('{"key": "val\\n', stream_stable=True) == '{"key": "val\\n"}'
|
1021
|
-
assert (
|
1022
|
-
|
1023
|
-
== '{"key": "val\\n123,`key2:value2"}'
|
1024
|
-
)
|
1025
|
-
assert (
|
1026
|
-
repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True)
|
1027
|
-
== '{"key": "val\\n123,`key2:value2`"}'
|
1028
|
-
)
|
922
|
+
assert repair_json('{"key": "val\\n123,`key2:value2', stream_stable=True) == '{"key": "val\\n123,`key2:value2"}'
|
923
|
+
assert repair_json('{"key": "val\\n123,`key2:value2`"}', stream_stable=True) == '{"key": "val\\n123,`key2:value2`"}'
|
1029
924
|
|
1030
925
|
|
1031
926
|
def test_cli(capsys):
|
@@ -19,12 +19,10 @@ def test_true_true_correct(benchmark):
|
|
19
19
|
mean_time = benchmark.stats.get("median")
|
20
20
|
|
21
21
|
# Define your time threshold in seconds
|
22
|
-
max_time =
|
22
|
+
max_time = 3 / 10**3 # 3 millisecond
|
23
23
|
|
24
24
|
# Assert that the average time is below the threshold
|
25
|
-
assert mean_time < max_time,
|
26
|
-
f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
27
|
-
)
|
25
|
+
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
28
26
|
|
29
27
|
|
30
28
|
def test_true_true_incorrect(benchmark):
|
@@ -34,12 +32,10 @@ def test_true_true_incorrect(benchmark):
|
|
34
32
|
mean_time = benchmark.stats.get("median")
|
35
33
|
|
36
34
|
# Define your time threshold in seconds
|
37
|
-
max_time =
|
35
|
+
max_time = 3 / 10**3 # 3 millisecond
|
38
36
|
|
39
37
|
# Assert that the average time is below the threshold
|
40
|
-
assert mean_time < max_time,
|
41
|
-
f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
42
|
-
)
|
38
|
+
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
43
39
|
|
44
40
|
|
45
41
|
def test_true_false_correct(benchmark):
|
@@ -51,9 +47,7 @@ def test_true_false_correct(benchmark):
|
|
51
47
|
max_time = 30 * (1 / 10**6) # 30 microsecond
|
52
48
|
|
53
49
|
# Assert that the average time is below the threshold
|
54
|
-
assert mean_time < max_time,
|
55
|
-
f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
56
|
-
)
|
50
|
+
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
57
51
|
|
58
52
|
|
59
53
|
def test_true_false_incorrect(benchmark):
|
@@ -62,12 +56,10 @@ def test_true_false_incorrect(benchmark):
|
|
62
56
|
mean_time = benchmark.stats.get("median")
|
63
57
|
|
64
58
|
# Define your time threshold in seconds
|
65
|
-
max_time =
|
59
|
+
max_time = 3 / 10**3 # 3 millisecond
|
66
60
|
|
67
61
|
# Assert that the average time is below the threshold
|
68
|
-
assert mean_time < max_time,
|
69
|
-
f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
70
|
-
)
|
62
|
+
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
71
63
|
|
72
64
|
|
73
65
|
def test_false_true_correct(benchmark):
|
@@ -76,12 +68,10 @@ def test_false_true_correct(benchmark):
|
|
76
68
|
mean_time = benchmark.stats.get("median")
|
77
69
|
|
78
70
|
# Define your time threshold in seconds
|
79
|
-
max_time =
|
71
|
+
max_time = 3 / 10**3 # 3 millisecond
|
80
72
|
|
81
73
|
# Assert that the average time is below the threshold
|
82
|
-
assert mean_time < max_time,
|
83
|
-
f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
84
|
-
)
|
74
|
+
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
85
75
|
|
86
76
|
|
87
77
|
def test_false_true_incorrect(benchmark):
|
@@ -90,12 +80,10 @@ def test_false_true_incorrect(benchmark):
|
|
90
80
|
mean_time = benchmark.stats.get("median")
|
91
81
|
|
92
82
|
# Define your time threshold in seconds
|
93
|
-
max_time =
|
83
|
+
max_time = 3 / 10**3 # 3 millisecond
|
94
84
|
|
95
85
|
# Assert that the average time is below the threshold
|
96
|
-
assert mean_time < max_time,
|
97
|
-
f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
98
|
-
)
|
86
|
+
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
99
87
|
|
100
88
|
|
101
89
|
def test_false_false_correct(benchmark):
|
@@ -107,9 +95,7 @@ def test_false_false_correct(benchmark):
|
|
107
95
|
max_time = 60 / 10**6 # 60 microsecond
|
108
96
|
|
109
97
|
# Assert that the average time is below the threshold
|
110
|
-
assert mean_time < max_time,
|
111
|
-
f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
112
|
-
)
|
98
|
+
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
113
99
|
|
114
100
|
|
115
101
|
def test_false_false_incorrect(benchmark):
|
@@ -118,9 +104,7 @@ def test_false_false_incorrect(benchmark):
|
|
118
104
|
mean_time = benchmark.stats.get("median")
|
119
105
|
|
120
106
|
# Define your time threshold in seconds
|
121
|
-
max_time =
|
107
|
+
max_time = 3 / 10**3 # 3 millisecond
|
122
108
|
|
123
109
|
# Assert that the average time is below the threshold
|
124
|
-
assert mean_time < max_time,
|
125
|
-
f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
126
|
-
)
|
110
|
+
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|