json-repair 0.19.2__tar.gz → 0.20.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.19.2
3
+ Version: 0.20.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.19.2"
6
+ version = "0.20.1"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -131,10 +131,14 @@ class JSONParser:
131
131
  # Sometimes the string search might not move the index at all, that might lead us to an infinite loop
132
132
  self.index += 1
133
133
 
134
+ self.skip_whitespaces_at()
135
+
134
136
  # We reached the end here
135
137
  if (self.get_char_at() or "}") == "}":
136
138
  continue
137
139
 
140
+ self.skip_whitespaces_at()
141
+
138
142
  # An extreme case of missing ":" after a key
139
143
  if (self.get_char_at() or "") != ":":
140
144
  self.log(
@@ -178,7 +182,7 @@ class JSONParser:
178
182
  value = self.parse_json()
179
183
 
180
184
  # It is possible that parse_json() returns nothing valid, so we stop
181
- if not value:
185
+ if value == "":
182
186
  break
183
187
 
184
188
  if value == "..." and self.get_char_at(-1) == ".":
@@ -335,32 +339,50 @@ class JSONParser:
335
339
  # Check if eventually there is a rstring delimiter, otherwise we bail
336
340
  i = 1
337
341
  next_c = self.get_char_at(i)
338
- while next_c and next_c != rstring_delimiter:
342
+ check_comma_in_object_value = True
343
+ while next_c and next_c not in [
344
+ rstring_delimiter,
345
+ lstring_delimiter,
346
+ ]:
347
+ # This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
348
+ # This is because the routine after will make sure to correct any bad guess and this solves a corner case
349
+ if next_c.isalpha():
350
+ check_comma_in_object_value = False
339
351
  # If we are in an object context, let's check for the right delimiters
340
352
  if (
341
- next_c == lstring_delimiter
342
- or ("object_key" in self.context and next_c == ":")
343
- or ("object_value" in self.context and next_c in ["}", ","])
353
+ ("object_key" in self.context and next_c in [":", "}"])
354
+ or ("object_value" in self.context and next_c == "}")
344
355
  or ("array" in self.context and next_c in ["]", ","])
356
+ or (
357
+ check_comma_in_object_value
358
+ and self.get_context() == "object_value"
359
+ and next_c == ","
360
+ )
345
361
  ):
346
362
  break
347
363
  i += 1
348
364
  next_c = self.get_char_at(i)
349
365
  if next_c == rstring_delimiter:
350
- # But this might not be it! This could be just a missing comma
351
- # We need to check if we find a rstring_delimiter and a colon after
352
- i += 1
353
- next_c = self.get_char_at(i)
354
- while next_c and next_c != rstring_delimiter:
366
+ if self.get_context() == "object_value":
367
+ # But this might not be it! This could be just a missing comma
368
+ # We found a delimiter and we need to check if this is a key
369
+ # so find a rstring_delimiter and a colon after
355
370
  i += 1
356
371
  next_c = self.get_char_at(i)
357
- i += 1
358
- next_c = self.get_char_at(i)
359
- while next_c and next_c != ":":
360
- if next_c in [lstring_delimiter, rstring_delimiter, ","]:
361
- break
372
+ while next_c and next_c != rstring_delimiter:
373
+ i += 1
374
+ next_c = self.get_char_at(i)
362
375
  i += 1
363
376
  next_c = self.get_char_at(i)
377
+ while next_c and next_c != ":":
378
+ if next_c in [
379
+ lstring_delimiter,
380
+ rstring_delimiter,
381
+ ",",
382
+ ]:
383
+ break
384
+ i += 1
385
+ next_c = self.get_char_at(i)
364
386
  # Only if we fail to find a ':' then we know this is misplaced quote
365
387
  if next_c != ":":
366
388
  self.log(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.19.2
3
+ Version: 0.20.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -32,7 +32,7 @@ def test_valid_json():
32
32
  assert repair_json('{"key": "value\\nvalue"}') == '{"key": "value\\nvalue"}'
33
33
 
34
34
  def test_brackets_edge_cases():
35
- assert repair_json("[{]") == "[]"
35
+ assert repair_json("[{]") == "[{}]"
36
36
  assert repair_json(" { } ") == "{}"
37
37
  assert repair_json("[") == "[]"
38
38
  assert repair_json("]") == '""'
@@ -83,6 +83,7 @@ def test_array_edge_cases():
83
83
  assert repair_json("[1, 2, 3, ...]") == "[1, 2, 3]"
84
84
  assert repair_json("[1, 2, ... , 3]") == "[1, 2, 3]"
85
85
  assert repair_json("[1, 2, '...', 3]") == '[1, 2, "...", 3]'
86
+ assert repair_json("[true, false, null, ...]") == '[true, false, null]'
86
87
  assert (
87
88
  repair_json('{"employees":["John", "Anna",')
88
89
  == '{"employees": ["John", "Anna"]}'
@@ -119,8 +120,9 @@ def test_object_edge_cases():
119
120
  assert repair_json('{""answer"":[{""traits"":''Female aged 60+'',""answer1"":""5""}]}') == '{"answer": [{"traits": "Female aged 60+", "answer1": "5"}]}'
120
121
  assert repair_json('{ "words": abcdef", "numbers": 12345", "words2": ghijkl" }') == '{"words": "abcdef", "numbers": 12345, "words2": "ghijkl"}'
121
122
  assert repair_json('''{"number": 1,"reason": "According...""ans": "YES"}''') == '{"number": 1, "reason": "According...", "ans": "YES"}'
122
- assert repair_json('''{ "a": "{ b": {} }" }''') == '{"a": "{ b"}'
123
+ assert repair_json('''{ "a" : "{ b": {} }" }''') == '{"a": "{ b"}'
123
124
  assert repair_json("""{"b": "xxxxx" true}""") == '{"b": "xxxxx"}'
125
+ assert repair_json('{"key": "Lorem "ipsum" s,"}') == '{"key": "Lorem \\"ipsum\\" s,"}'
124
126
 
125
127
  def test_number_edge_cases():
126
128
  assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
File without changes
File without changes
File without changes