json-repair 0.39.0__tar.gz → 0.39.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (20) hide show
  1. {json_repair-0.39.0/src/json_repair.egg-info → json_repair-0.39.1}/PKG-INFO +7 -7
  2. {json_repair-0.39.0 → json_repair-0.39.1}/README.md +6 -6
  3. {json_repair-0.39.0 → json_repair-0.39.1}/pyproject.toml +1 -1
  4. {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair/json_parser.py +21 -35
  5. {json_repair-0.39.0 → json_repair-0.39.1/src/json_repair.egg-info}/PKG-INFO +7 -7
  6. {json_repair-0.39.0 → json_repair-0.39.1}/tests/test_json_repair.py +5 -3
  7. {json_repair-0.39.0 → json_repair-0.39.1}/LICENSE +0 -0
  8. {json_repair-0.39.0 → json_repair-0.39.1}/setup.cfg +0 -0
  9. {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair/__init__.py +0 -0
  10. {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair/__main__.py +0 -0
  11. {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair/json_context.py +0 -0
  12. {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair/json_repair.py +0 -0
  13. {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair/py.typed +0 -0
  14. {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair/string_file_wrapper.py +0 -0
  15. {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair.egg-info/SOURCES.txt +0 -0
  16. {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair.egg-info/dependency_links.txt +0 -0
  17. {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair.egg-info/entry_points.txt +0 -0
  18. {json_repair-0.39.0 → json_repair-0.39.1}/src/json_repair.egg-info/top_level.txt +0 -0
  19. {json_repair-0.39.0 → json_repair-0.39.1}/tests/test_coverage.py +0 -0
  20. {json_repair-0.39.0 → json_repair-0.39.1}/tests/test_performance.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: json_repair
3
- Version: 0.39.0
3
+ Version: 0.39.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -196,12 +196,12 @@ pipx install json-repair
196
196
  to know all options available:
197
197
  ```
198
198
  $ json_repair -h
199
- usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
199
+ usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] [filename]
200
200
 
201
201
  Repair and parse JSON files.
202
202
 
203
203
  positional arguments:
204
- filename The JSON file to repair
204
+ filename The JSON file to repair (if omitted, reads from stdin)
205
205
 
206
206
  options:
207
207
  -h, --help show this help message and exit
@@ -226,13 +226,13 @@ In this example, any version that starts with `0.` will be acceptable, allowing
226
226
  # How to cite
227
227
  If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
228
228
 
229
- @software{Baccianella_JSON_Repair_-_2024,
229
+ @software{Baccianella_JSON_Repair_-_2025,
230
230
  author = {Baccianella, Stefano},
231
- month = aug,
231
+ month = feb,
232
232
  title = {{JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs}},
233
233
  url = {https://github.com/mangiucugna/json_repair},
234
- version = {0.28.3},
235
- year = {2024}
234
+ version = {0.39.0},
235
+ year = {2025}
236
236
  }
237
237
 
238
238
  Thank you for citing my work and please send me a link to the paper if you can!
@@ -158,12 +158,12 @@ pipx install json-repair
158
158
  to know all options available:
159
159
  ```
160
160
  $ json_repair -h
161
- usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
161
+ usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] [filename]
162
162
 
163
163
  Repair and parse JSON files.
164
164
 
165
165
  positional arguments:
166
- filename The JSON file to repair
166
+ filename The JSON file to repair (if omitted, reads from stdin)
167
167
 
168
168
  options:
169
169
  -h, --help show this help message and exit
@@ -188,13 +188,13 @@ In this example, any version that starts with `0.` will be acceptable, allowing
188
188
  # How to cite
189
189
  If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
190
190
 
191
- @software{Baccianella_JSON_Repair_-_2024,
191
+ @software{Baccianella_JSON_Repair_-_2025,
192
192
  author = {Baccianella, Stefano},
193
- month = aug,
193
+ month = feb,
194
194
  title = {{JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs}},
195
195
  url = {https://github.com/mangiucugna/json_repair},
196
- version = {0.28.3},
197
- year = {2024}
196
+ version = {0.39.0},
197
+ year = {2025}
198
198
  }
199
199
 
200
200
  Thank you for citing my work and please send me a link to the paper if you can!
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.39.0"
6
+ version = "0.39.1"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -9,6 +9,7 @@ JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
9
9
  class JSONParser:
10
10
  # Constants
11
11
  STRING_DELIMITERS = ['"', "'", "“", "”"]
12
+ NUMBER_CHARS = set("0123456789-.eE/,")
12
13
 
13
14
  def __init__(
14
15
  self,
@@ -129,8 +130,6 @@ class JSONParser:
129
130
  # Context is used in the string parser to manage the lack of quotes
130
131
  self.context.set(ContextValues.OBJECT_KEY)
131
132
 
132
- self.skip_whitespaces_at()
133
-
134
133
  # Save this index in case we need find a duplicate key
135
134
  rollback_index = self.index
136
135
 
@@ -219,18 +218,13 @@ class JSONParser:
219
218
  char = self.get_char_at()
220
219
 
221
220
  # Especially at the end of an LLM generated json you might miss the last "]"
222
- char = self.get_char_at()
223
221
  if char and char != "]":
224
222
  self.log(
225
- "While parsing an array we missed the closing ], adding it back",
226
- )
227
- self.index -= 1
228
- # Add the missing closing bracket
229
- self.json_str = (
230
- self.json_str[: self.index + 1] + "]" + self.json_str[self.index + 1 :]
223
+ "While parsing an array we missed the closing ], ignoring it",
231
224
  )
232
225
 
233
226
  self.index += 1
227
+
234
228
  self.context.reset()
235
229
  return arr
236
230
 
@@ -275,15 +269,11 @@ class JSONParser:
275
269
  self.log(
276
270
  "While parsing a string, we found a literal instead of a quote",
277
271
  )
278
- self.log(
279
- "While parsing a string, we found no starting quote. Will add the quote back",
280
- )
281
272
  missing_quotes = True
282
273
 
283
274
  if not missing_quotes:
284
275
  self.index += 1
285
276
 
286
- self.skip_whitespaces_at()
287
277
  # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
288
278
  if self.get_char_at() in self.STRING_DELIMITERS:
289
279
  # If the next character is the same type of quote, then we manage it as double quotes
@@ -583,6 +573,13 @@ class JSONParser:
583
573
  elif (
584
574
  next_c == rstring_delimiter and self.get_char_at(i - 1) != "\\"
585
575
  ):
576
+ # Check if self.index:self.index+i is only whitespaces, break if that's the case
577
+ if all(
578
+ str(self.get_char_at(j)).isspace()
579
+ for j in range(1, i)
580
+ if self.get_char_at(j)
581
+ ):
582
+ break
586
583
  if self.context.current == ContextValues.OBJECT_VALUE:
587
584
  # But this might not be it! This could be just a missing comma
588
585
  # We found a delimiter and we need to check if this is a key
@@ -610,26 +607,16 @@ class JSONParser:
610
607
  self.index += 1
611
608
  char = self.get_char_at()
612
609
  elif self.context.current == ContextValues.ARRAY:
613
- # In array context this could be something like "lorem "ipsum" sic"
614
- # So let's check if we find a rstring_delimiter forward otherwise end early
615
- i = self.skip_to_character(rstring_delimiter, idx=i + 1)
616
- next_c = self.get_char_at(i)
617
- if next_c and next_c == rstring_delimiter:
618
- # Ok now if I find a comma or a closing ], that can be have also an optional rstring_delimiter before them
619
- # We can consider this a misplaced quote
620
- i += 1
621
- i = self.skip_whitespaces_at(
622
- idx=i, move_main_index=False
623
- )
624
- next_c = self.get_char_at(i)
625
- if next_c and next_c in [",", "]"]:
626
- self.log(
627
- "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
628
- )
629
- unmatched_delimiter = not unmatched_delimiter
630
- string_acc += str(char)
631
- self.index += 1
632
- char = self.get_char_at()
610
+ # If we got up to here it means that this is a situation like this:
611
+ # ["bla bla bla "puppy" bla bla bla "kitty" bla bla"]
612
+ # So we need to ignore this quote
613
+ self.log(
614
+ "While parsing a string in Array context, we detected a quoted section that would have closed the string but has a different meaning here, ignoring it",
615
+ )
616
+ unmatched_delimiter = not unmatched_delimiter
617
+ string_acc += str(char)
618
+ self.index += 1
619
+ char = self.get_char_at()
633
620
 
634
621
  if (
635
622
  char
@@ -663,10 +650,9 @@ class JSONParser:
663
650
  def parse_number(self) -> Union[float, int, str, JSONReturnType]:
664
651
  # <number> is a valid real number expressed in one of a number of given formats
665
652
  number_str = ""
666
- number_chars = set("0123456789-.eE/,")
667
653
  char = self.get_char_at()
668
654
  is_array = self.context.current == ContextValues.ARRAY
669
- while char and char in number_chars and (char != "," or not is_array):
655
+ while char and char in self.NUMBER_CHARS and (not is_array or char != ","):
670
656
  number_str += char
671
657
  self.index += 1
672
658
  char = self.get_char_at()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: json_repair
3
- Version: 0.39.0
3
+ Version: 0.39.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -196,12 +196,12 @@ pipx install json-repair
196
196
  to know all options available:
197
197
  ```
198
198
  $ json_repair -h
199
- usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] filename
199
+ usage: json_repair [-h] [-i] [-o TARGET] [--ensure_ascii] [--indent INDENT] [filename]
200
200
 
201
201
  Repair and parse JSON files.
202
202
 
203
203
  positional arguments:
204
- filename The JSON file to repair
204
+ filename The JSON file to repair (if omitted, reads from stdin)
205
205
 
206
206
  options:
207
207
  -h, --help show this help message and exit
@@ -226,13 +226,13 @@ In this example, any version that starts with `0.` will be acceptable, allowing
226
226
  # How to cite
227
227
  If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
228
228
 
229
- @software{Baccianella_JSON_Repair_-_2024,
229
+ @software{Baccianella_JSON_Repair_-_2025,
230
230
  author = {Baccianella, Stefano},
231
- month = aug,
231
+ month = feb,
232
232
  title = {{JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs}},
233
233
  url = {https://github.com/mangiucugna/json_repair},
234
- version = {0.28.3},
235
- year = {2024}
234
+ version = {0.39.0},
235
+ year = {2025}
236
236
  }
237
237
 
238
238
  Thank you for citing my work and please send me a link to the paper if you can!
@@ -126,6 +126,8 @@ def test_array_edge_cases():
126
126
  assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
127
127
  assert repair_json('{"key1": ["value1", "value2"}, "key2": ["value3", "value4"]}') == '{"key1": ["value1", "value2"], "key2": ["value3", "value4"]}'
128
128
  assert repair_json('[ "value", /* comment */ "value2" ]') == '["value", "value2"]'
129
+ assert repair_json('{"key": ["value" "value1" "value2"]}') == '{"key": ["value", "value1", "value2"]}'
130
+ assert repair_json('{"key": ["lorem "ipsum" dolor "sit" amet, "consectetur" ", "lorem "ipsum" dolor", "lorem"]}') == '{"key": ["lorem \\"ipsum\\" dolor \\"sit\\" amet, \\"consectetur\\" ", "lorem \\"ipsum\\" dolor", "lorem"]}'
129
131
 
130
132
  def test_escaping():
131
133
  assert repair_json("'\"'") == '""'
@@ -150,7 +152,7 @@ def test_object_edge_cases():
150
152
  assert repair_json('{"lorem": ipsum, sic, datum.",}') == '{"lorem": "ipsum, sic, datum."}'
151
153
  assert repair_json('{"lorem": sic tamet. "ipsum": sic tamet, quick brown fox. "sic": ipsum}') == '{"lorem": "sic tamet.", "ipsum": "sic tamet", "sic": "ipsum"}'
152
154
  assert repair_json('{"lorem_ipsum": "sic tamet, quick brown fox. }') == '{"lorem_ipsum": "sic tamet, quick brown fox."}'
153
- assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", "key2": "value2"}'
155
+ assert repair_json('{"key":value, " key2":"value2" }') == '{"key": "value", " key2": "value2"}'
154
156
  assert repair_json('{"key":value "key2":"value2" }') == '{"key": "value", "key2": "value2"}'
155
157
  assert repair_json("{'text': 'words{words in brackets}more words'}") == '{"text": "words{words in brackets}more words"}'
156
158
  assert repair_json('{text:words{words in brackets}}') == '{"text": "words{words in brackets}"}'
@@ -265,8 +267,8 @@ def test_repair_json_from_file():
265
267
  # Write content to the temporary file
266
268
  with os.fdopen(temp_fd, 'w') as tmp:
267
269
  tmp.write("{key:value}")
268
- assert from_file(filename=temp_path, logging=True) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object key context, we found a :, stopping here',}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}, {'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn\'t determine that a right delimiter was present. Stopping here'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}])
269
- assert from_file(filename=temp_path, logging=True, chunk_length=2) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object key context, we found a :, stopping here',}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}, {'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'text': 'While parsing a string, we found no starting quote. Will add the quote back', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn\'t determine that a right delimiter was present. Stopping here'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}])
270
+ assert from_file(filename=temp_path, logging=True) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object key context, we found a :, stopping here',}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}, {'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn\'t determine that a right delimiter was present. Stopping here'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}])
271
+ assert from_file(filename=temp_path, logging=True, chunk_length=2) == ({'key': 'value'}, [{'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object key context, we found a :, stopping here',}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}, {'text': 'While parsing a string, we found a literal instead of a quote', 'context': '{key:value}'}, {'context': '{key:value}', 'text': 'While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn\'t determine that a right delimiter was present. Stopping here'}, {'text': 'While parsing a string, we missed the closing quote, ignoring', 'context': '{key:value}'}])
270
272
  finally:
271
273
  # Clean up - delete the temporary file
272
274
  os.remove(temp_path)
File without changes
File without changes