json-repair 0.24.0__py3-none-any.whl → 0.25.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -119,40 +119,38 @@ class JSONParser:
119
119
  def parse_json(
120
120
  self,
121
121
  ) -> JSONReturnType:
122
- char = self.get_char_at()
123
- # This parser will ignore any basic element (string or number) that is not inside an array or object
124
- is_in_context = len(self.context) > 0
125
- # False means that we are at the end of the string provided, is the base case for recursion
126
- if char is False:
127
- return ""
128
- # <object> starts with '{'
129
- # but an object key must be a string
130
- elif char == "{":
131
- self.index += 1
132
- return self.parse_object()
133
- # <array> starts with '['
134
- # but an object key must be a string
135
- elif char == "[":
136
- self.index += 1
137
- return self.parse_array()
138
- # there can be an edge case in which a key is empty and at the end of an object
139
- # like "key": }. We return an empty string here to close the object properly
140
- elif char == "}":
141
- self.log(
142
- "At the end of an object we found a key with missing value, skipping",
143
- "info",
144
- )
145
- return ""
146
- # <string> starts with a quote
147
- elif is_in_context and (char in ['"', "'", "“"] or char.isalpha()):
148
- return self.parse_string()
149
- # <number> starts with [0-9] or minus
150
- elif is_in_context and (char.isdigit() or char == "-" or char == "."):
151
- return self.parse_number()
152
- # If everything else fails, we just ignore and move on
153
- else:
154
- self.index += 1
155
- return self.parse_json()
122
+ while True:
123
+ char = self.get_char_at()
124
+ # This parser will ignore any basic element (string or number) that is not inside an array or object
125
+ is_in_context = len(self.context) > 0
126
+ # False means that we are at the end of the string provided
127
+ if char is False:
128
+ return ""
129
+ # <object> starts with '{'
130
+ elif char == "{":
131
+ self.index += 1
132
+ return self.parse_object()
133
+ # <array> starts with '['
134
+ elif char == "[":
135
+ self.index += 1
136
+ return self.parse_array()
137
+ # there can be an edge case in which a key is empty and at the end of an object
138
+ # like "key": }. We return an empty string here to close the object properly
139
+ elif char == "}":
140
+ self.log(
141
+ "At the end of an object we found a key with missing value, skipping",
142
+ "info",
143
+ )
144
+ return ""
145
+ # <string> starts with a quote
146
+ elif is_in_context and (char in ['"', "'", "“"] or char.isalpha()):
147
+ return self.parse_string()
148
+ # <number> starts with [0-9] or minus
149
+ elif is_in_context and (char.isdigit() or char == "-" or char == "."):
150
+ return self.parse_number()
151
+ # If everything else fails, we just ignore and move on
152
+ else:
153
+ self.index += 1
156
154
 
157
155
  def parse_object(self) -> Dict[str, Any]:
158
156
  # <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
@@ -370,6 +368,10 @@ class JSONParser:
370
368
  if self.get_context() == "object_key" and (
371
369
  char == ":" or char.isspace()
372
370
  ):
371
+ self.log(
372
+ "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
373
+ "info",
374
+ )
373
375
  break
374
376
  elif self.get_context() == "object_value" and char in [",", "}"]:
375
377
  rstring_delimiter_missing = True
@@ -389,6 +391,10 @@ class JSONParser:
389
391
  if next_c and next_c in [",", "}"]:
390
392
  rstring_delimiter_missing = False
391
393
  if rstring_delimiter_missing:
394
+ self.log(
395
+ "While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
396
+ "info",
397
+ )
392
398
  break
393
399
  string_acc += char
394
400
  self.index += 1
@@ -410,6 +416,33 @@ class JSONParser:
410
416
  "While parsing a string, we found a doubled quote, ignoring it",
411
417
  "info",
412
418
  )
419
+ elif missing_quotes and self.get_context() == "object_value":
420
+ # In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
421
+ i = 1
422
+ next_c = self.get_char_at(i)
423
+ while next_c and next_c not in [
424
+ rstring_delimiter,
425
+ lstring_delimiter,
426
+ ]:
427
+ i += 1
428
+ next_c = self.get_char_at(i)
429
+ if next_c:
430
+ # We found a quote, now let's make sure there's a ":" following
431
+ i += 1
432
+ next_c = self.get_char_at(i)
433
+ # found a delimiter, now we need to check that is followed strictly by a comma or brace
434
+ while next_c and next_c.isspace():
435
+ i += 1
436
+ next_c = self.get_char_at(i)
437
+ if next_c and next_c == ":":
438
+ # Reset the cursor
439
+ self.index -= 1
440
+ char = self.get_char_at()
441
+ self.log(
442
+ "In a string with missing quotes and object value context, I found a delimeter but it turns out it was the beginning on the next key. Stopping here.",
443
+ "info",
444
+ )
445
+ break
413
446
  else:
414
447
  # Check if eventually there is a rstring delimiter, otherwise we bail
415
448
  i = 1
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.24.0
3
+ Version: 0.25.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -0,0 +1,7 @@
1
+ json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
+ json_repair/json_repair.py,sha256=PbfthTey1p04_9Q3QRGm8We1ueFVYwO5kQUShpfR10Y,30961
3
+ json_repair-0.25.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
+ json_repair-0.25.1.dist-info/METADATA,sha256=QjbXE09AIfd70scALY8DPIBxg0TYTBlKg3KuJ7MAMRw,7596
5
+ json_repair-0.25.1.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
6
+ json_repair-0.25.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
+ json_repair-0.25.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (70.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
- json_repair/json_repair.py,sha256=jM509L0rg8AiWksnYdOqueopP8fmh0MJxpxx7LvaoiM,28954
3
- json_repair-0.24.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.24.0.dist-info/METADATA,sha256=kSxFAdkH_qxJMX85DKW605BOZrbh8VCWzsAvP_X80cM,7596
5
- json_repair-0.24.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- json_repair-0.24.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.24.0.dist-info/RECORD,,