json-repair 0.16.0__py3-none-any.whl → 0.16.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,7 @@ This module will parse the JSON file following the BNF definition:
11
11
 
12
12
  <container> ::= <object> | <array>
13
13
  <array> ::= '[' [ <json> *(', ' <json>) ] ']' ; A sequence of JSON values separated by commas
14
- <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
14
+ <object> ::= '{' [ <string> *(', ' <member>) ] '}' ; A sequence of 'members'
15
15
  <member> ::= <string> ': ' <json> ; A pair consisting of a name, and a JSON value
16
16
 
17
17
  If something is wrong (a missing parantheses or quotes for example) it will use a few simple heuristics to fix the JSON string:
@@ -55,16 +55,18 @@ class JSONParser:
55
55
  if char is False:
56
56
  return ""
57
57
  # <object> starts with '{'
58
- elif char == "{":
58
+ # but an object key must be a string
59
+ elif self.get_context() != "object_key" and char == "{":
59
60
  self.index += 1
60
61
  return self.parse_object()
61
62
  # <array> starts with '['
62
- elif char == "[":
63
+ # but an object key must be a string
64
+ elif self.get_context() != "object_key" and char == "[":
63
65
  self.index += 1
64
66
  return self.parse_array()
65
67
  # there can be an edge case in which a key is empty and at the end of an object
66
68
  # like "key": }. We return an empty string here to close the object properly
67
- elif char == "}":
69
+ elif self.get_context() != "object_key" and char == "}":
68
70
  self.log(
69
71
  "At the end of an object we found a key with missing value, skipping",
70
72
  "info",
@@ -78,10 +80,20 @@ class JSONParser:
78
80
  elif char == "“":
79
81
  return self.parse_string(string_quotes=["“", "”"])
80
82
  # <number> starts with [0-9] or minus
81
- elif self.get_context() != "" and char.isdigit() or char == "-" or char == ".":
83
+ elif (
84
+ self.get_context() != ""
85
+ and self.get_context() != "object_key"
86
+ and char.isdigit()
87
+ or char == "-"
88
+ or char == "."
89
+ ):
82
90
  return self.parse_number()
83
91
  # <boolean> could be (T)rue or (F)alse or (N)ull
84
- elif self.get_context() != "" and char.lower() in ["t", "f", "n"]:
92
+ elif (
93
+ self.get_context() != ""
94
+ and self.get_context() != "object_key"
95
+ and char.lower() in ["t", "f", "n"]
96
+ ):
85
97
  return self.parse_boolean_or_null()
86
98
  # This might be a <string> that is missing the starting '"'
87
99
  elif self.get_context() != "" and char.isalpha():
@@ -302,7 +314,8 @@ class JSONParser:
302
314
  while next_c and next_c != rstring_delimiter:
303
315
  # If we are in an object context, let's check for the right delimiters
304
316
  if (
305
- ("object_key" in self.context and next_c == ":")
317
+ next_c == lstring_delimiter
318
+ or ("object_key" in self.context and next_c == ":")
306
319
  or ("object_value" in self.context and next_c in ["}", ","])
307
320
  or ("array" in self.context and next_c in ["]", ","])
308
321
  ):
@@ -310,12 +323,28 @@ class JSONParser:
310
323
  i += 1
311
324
  next_c = self.get_char_at(i)
312
325
  if next_c == rstring_delimiter:
313
- self.log(
314
- "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
315
- "info",
316
- )
317
- self.index += 1
318
- char = self.get_char_at()
326
+ # But this might not be it! This could be just a missing comma
327
+ # We need to check if we find a rstring_delimiter and a colon after
328
+ i += 1
329
+ next_c = self.get_char_at(i)
330
+ while next_c and next_c != rstring_delimiter:
331
+ i += 1
332
+ next_c = self.get_char_at(i)
333
+ i += 1
334
+ next_c = self.get_char_at(i)
335
+ while next_c and next_c != ":":
336
+ if next_c in [lstring_delimiter, rstring_delimiter, ","]:
337
+ break
338
+ i += 1
339
+ next_c = self.get_char_at(i)
340
+ # Only if we fail to find a ':' then we know this is misplaced quote
341
+ if next_c != ":":
342
+ self.log(
343
+ "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
344
+ "info",
345
+ )
346
+ self.index += 1
347
+ char = self.get_char_at()
319
348
 
320
349
  if (
321
350
  char
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.16.0
3
+ Version: 0.16.2
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -0,0 +1,7 @@
1
+ json_repair/__init__.py,sha256=AlNie5y6BZBioGi5fzTAUvum_y0U5aL5aNsuQ_68LQc,175
2
+ json_repair/json_repair.py,sha256=Z1BiZlCBWDGiZeARAMcQ-PYRJE5PHFeTDGLLTEVg4fs,21822
3
+ json_repair-0.16.2.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
+ json_repair-0.16.2.dist-info/METADATA,sha256=8CXuXrM_3G5ti6wSef9waYRW3_ilW5lhQ6KIhrFRG80,7355
5
+ json_repair-0.16.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
+ json_repair-0.16.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
+ json_repair-0.16.2.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=AlNie5y6BZBioGi5fzTAUvum_y0U5aL5aNsuQ_68LQc,175
2
- json_repair/json_repair.py,sha256=XahLp82VVwg8KgyywNxMFBHIPIbvPDp6uWMP1VD_40w,20418
3
- json_repair-0.16.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.16.0.dist-info/METADATA,sha256=-OLuKPGwu4enrP7kP2947Gyz9l7JRVpGTRH1b1MB2ZY,7355
5
- json_repair-0.16.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- json_repair-0.16.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.16.0.dist-info/RECORD,,