json-repair 0.15.5__tar.gz → 0.15.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.15.5
3
+ Version: 0.15.6
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.15.5"
6
+ version = "0.15.6"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -174,6 +174,7 @@ class JSONParser:
174
174
  arr = []
175
175
  # Stop when you either find the closing parentheses or you have iterated over the entire string
176
176
  while (self.get_char_at() or "]") != "]":
177
+ self.skip_whitespaces_at()
177
178
  value = self.parse_json()
178
179
 
179
180
  # It is possible that parse_json() returns nothing valid, so we stop
@@ -218,6 +219,7 @@ class JSONParser:
218
219
 
219
220
  # Flag to manage corner cases related to missing starting quote
220
221
  fixed_quotes = False
222
+ doubled_quotes = False
221
223
  lstring_delimiter = rstring_delimiter = '"'
222
224
  if isinstance(string_quotes, list):
223
225
  lstring_delimiter = string_quotes[0]
@@ -239,6 +241,7 @@ class JSONParser:
239
241
  "While parsing a string, we found a valid starting doubled quote, ignoring it",
240
242
  "info",
241
243
  )
244
+ doubled_quotes = True
242
245
  self.index += 1
243
246
  char = self.get_char_at()
244
247
  if char != lstring_delimiter:
@@ -279,13 +282,9 @@ class JSONParser:
279
282
  self.remove_char_at(-1)
280
283
  self.index -= 1
281
284
  # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
282
- if (
283
- char == rstring_delimiter
284
- # Next character is not a delimiter
285
- and self.get_char_at(1) not in [",", ":", "]", "}"]
286
- ):
285
+ if char == rstring_delimiter:
287
286
  # Special case here, in case of double quotes one after another
288
- if self.get_char_at(1) == rstring_delimiter:
287
+ if doubled_quotes and self.get_char_at(1) == rstring_delimiter:
289
288
  self.log(
290
289
  "While parsing a string, we found a doubled quote, ignoring it",
291
290
  "info",
@@ -294,13 +293,20 @@ class JSONParser:
294
293
  self.remove_char_at()
295
294
  else:
296
295
  # Check if eventually there is a rstring delimiter, otherwise we bail
297
- i = 2
296
+ i = 1
297
+ context = self.get_context()
298
298
  next_c = self.get_char_at(i)
299
299
  while next_c and next_c != rstring_delimiter:
300
+ # If we are in an object context, let's check for the right delimiters
301
+ if (
302
+ (context == "object_key" and next_c == ":")
303
+ or (context == "object_value" and next_c in ["}", ","])
304
+ or (context == "" and next_c in ["]", ","])
305
+ ):
306
+ break
300
307
  i += 1
301
308
  next_c = self.get_char_at(i)
302
- # In that case we ignore this rstring delimiter
303
- if next_c:
309
+ if next_c == rstring_delimiter:
304
310
  self.log(
305
311
  "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
306
312
  "info",
@@ -416,7 +422,7 @@ class JSONParser:
416
422
 
417
423
  def get_context(self) -> str:
418
424
  try:
419
- return self.context[0]
425
+ return self.context[-1]
420
426
  except Exception:
421
427
  return ""
422
428
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.15.5
3
+ Version: 0.15.6
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -190,6 +190,18 @@ def test_repair_json_with_objects():
190
190
  }
191
191
  ''', True) == {"resourceType": "Bundle", "id": "1", "type": "collection", "entry": [{"resource": {"resourceType": "Patient", "id": "1", "name": [{"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}, {"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}]}}]}
192
192
  assert repair_json('{\n"html": "<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>"}', True) == {'html': '<h3 id="aaa">Waarom meer dan 200 Technical Experts - "Passie voor techniek"?</h3>'}
193
+ assert repair_json("""
194
+ [
195
+ {
196
+ "foo": "Foo bar baz",
197
+ "tag": "#foo-bar-baz"
198
+ },
199
+ {
200
+ "foo": "foo bar "foobar" foo bar baz.",
201
+ "tag": "#foo-bar-foobar"
202
+ }
203
+ ]
204
+ """, True) == [{"foo": "Foo bar baz", "tag": "#foo-bar-baz"},{"foo": "foo bar \"foobar\" foo bar baz.", "tag": "#foo-bar-foobar" }]
193
205
 
194
206
 
195
207
  def test_repair_json_corner_cases_generate_by_gpt():
File without changes
File without changes
File without changes