json-repair 0.9.0__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,7 +61,9 @@ class JSONParser:
61
61
  elif char == '"':
62
62
  return self.parse_string()
63
63
  elif char == "'":
64
- return self.parse_string(use_single_quotes=True)
64
+ return self.parse_string(string_quotes="'")
65
+ elif char == "“":
66
+ return self.parse_string(string_quotes=["“", "”"])
65
67
  # <number> starts with [0-9] or minus
66
68
  elif char.isdigit() or char == "-":
67
69
  return self.parse_number()
@@ -102,9 +104,7 @@ class JSONParser:
102
104
  # <member> starts with a <string>
103
105
  key = ""
104
106
  while key == "" and self.get_char_at():
105
- key = self.parse_string(
106
- use_single_quotes=(self.json_str[self.index] == "'")
107
- )
107
+ key = self.parse_json()
108
108
 
109
109
  # This can happen sometimes like { "": "value" }
110
110
  if key == "" and self.get_char_at() == ":":
@@ -112,7 +112,7 @@ class JSONParser:
112
112
  break
113
113
 
114
114
  # We reached the end here
115
- if key == "}":
115
+ if (self.get_char_at() or "}") == "}":
116
116
  continue
117
117
 
118
118
  # An extreme case of missing ":" after a key
@@ -170,19 +170,22 @@ class JSONParser:
170
170
  self.index += 1
171
171
  return arr
172
172
 
173
- def parse_string(self, use_single_quotes=False) -> str:
173
+ def parse_string(self, string_quotes=False) -> str:
174
174
  # <string> is a string of valid characters enclosed in quotes
175
175
  # i.e. { name: "John" }
176
176
  # Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
177
177
 
178
178
  # Flag to manage corner cases related to missing starting quote
179
179
  fixed_quotes = False
180
- string_terminator = '"'
181
- if use_single_quotes:
182
- string_terminator = "'"
180
+ lstring_delimiter = rstring_delimiter = '"'
181
+ if isinstance(string_quotes, list):
182
+ lstring_delimiter = string_quotes[0]
183
+ rstring_delimiter = string_quotes[1]
184
+ elif isinstance(string_quotes, str):
185
+ lstring_delimiter = rstring_delimiter = string_quotes
183
186
  char = self.get_char_at()
184
- if char != string_terminator:
185
- self.insert_char_at(string_terminator)
187
+ if char != lstring_delimiter:
188
+ self.insert_char_at(lstring_delimiter)
186
189
  fixed_quotes = True
187
190
  else:
188
191
  self.index += 1
@@ -198,7 +201,7 @@ class JSONParser:
198
201
  # * If we are fixing missing quotes in an object, when it finds the special terminators
199
202
  char = self.get_char_at()
200
203
  fix_broken_markdown_link = False
201
- while char and char != string_terminator:
204
+ while char and char != rstring_delimiter:
202
205
  if fixed_quotes:
203
206
  if self.context == "object_key" and (char == ":" or char.isspace()):
204
207
  break
@@ -206,9 +209,13 @@ class JSONParser:
206
209
  break
207
210
  self.index += 1
208
211
  char = self.get_char_at()
212
+ # If the string contains escaped delimiters we should respect that
213
+ if char == rstring_delimiter and self.get_char_at(-1) == "\\":
214
+ self.index += 1
215
+ char = self.get_char_at()
209
216
  # ChatGPT sometimes forget to quote links in markdown like: { "content": "[LINK]("https://google.com")" }
210
217
  if (
211
- char == string_terminator
218
+ char == rstring_delimiter
212
219
  # Next character is not a comma
213
220
  and self.get_char_at(1) != ","
214
221
  and (
@@ -228,8 +235,8 @@ class JSONParser:
228
235
  end = self.index
229
236
 
230
237
  # A fallout of the previous special case in the while loop, we need to update the index only if we had a closing quote
231
- if char != string_terminator:
232
- self.insert_char_at(string_terminator)
238
+ if char != rstring_delimiter:
239
+ self.insert_char_at(rstring_delimiter)
233
240
  else:
234
241
  self.index += 1
235
242
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.9.0
3
+ Version: 0.10.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -56,20 +56,20 @@ I searched for a lightweight python package that was able to reliably fix this p
56
56
  You can use this library to completely replace `json.loads()`:
57
57
 
58
58
  import json_repair
59
-
59
+
60
60
  decoded_object = json_repair.loads(json_string)
61
61
 
62
62
  or just
63
63
 
64
64
  import json_repair
65
-
65
+
66
66
  decoded_object = json_repair.repair_json(json_string, return_objects=True)
67
-
67
+
68
68
  ### Performance considerations
69
69
  If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
70
70
 
71
71
  from json_repair import repair_json
72
-
72
+
73
73
  good_json_string = repair_json(bad_json_string, skip_json_loads=True)
74
74
 
75
75
  I made a choice of not using any fast json library to avoid having any external dependency, so that anybody can use it regardless of their stack.
@@ -77,7 +77,7 @@ I made a choice of not using any fast json library to avoid having any external
77
77
  Some rules of thumb to use:
78
78
  - Setting `return_objects=True` will always be faster because the parser returns an object already and it doesn't have serialize that object to JSON
79
79
  - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
80
-
80
+ - If you are having issues with escaping pass the string as **raw** string like: `r"string with escaping\""`
81
81
  ## Adding to requirements
82
82
  **Please pin this library only on the major version!**
83
83
 
@@ -0,0 +1,7 @@
1
+ json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
2
+ json_repair/json_repair.py,sha256=OIjr9L0CyysGrEEdfeeEkKoiFLaX2sGSnn2MYBqGHHo,13826
3
+ json_repair-0.10.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
+ json_repair-0.10.1.dist-info/METADATA,sha256=kPI-1mCI6HuVeV0eVV98GR5BS6Tk04tkxDkxKCdKG7E,6404
5
+ json_repair-0.10.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
6
+ json_repair-0.10.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
+ json_repair-0.10.1.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
2
- json_repair/json_repair.py,sha256=63d6OrbAW3dbJ0C06eGtxSB8x5EASRI3OvdsXvmotKk,13373
3
- json_repair-0.9.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.9.0.dist-info/METADATA,sha256=ueRJPKZhwXF66UmZXxcLNVwkOJbjGSbsRR8hhKo78MI,6311
5
- json_repair-0.9.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
6
- json_repair-0.9.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.9.0.dist-info/RECORD,,