json-repair 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -62,7 +62,7 @@ class JSONParser:
62
62
  # <number> starts with [0-9] or minus
63
63
  elif char.isdigit() or char == "-":
64
64
  return self.parse_number()
65
- # <boolean> could (T)rue or (F)alse or (N)ull
65
+ # <boolean> could be (T)rue or (F)alse or (N)ull
66
66
  elif char == "t" or char == "f" or char == "n":
67
67
  return self.parse_boolean_or_null()
68
68
  # This might be a <string> that is missing the starting '"'
@@ -70,6 +70,7 @@ class JSONParser:
70
70
  return self.parse_string()
71
71
  # Ignore whitespaces outside of strings
72
72
  elif char.isspace():
73
+ self.index += 1
73
74
  self.skip_whitespaces_at()
74
75
  return self.parse_json()
75
76
  # If everything else fails, then we give up and return an exception
@@ -104,6 +105,7 @@ class JSONParser:
104
105
  self.context = "object_key"
105
106
 
106
107
  # <member> starts with a <string>
108
+ self.skip_whitespaces_at()
107
109
  key = self.parse_string()
108
110
  while key == "":
109
111
  key = self.parse_string()
@@ -176,8 +178,6 @@ class JSONParser:
176
178
  # Flag to manage corner cases related to missing starting quote
177
179
  fixed_quotes = False
178
180
  # i.e. { name: "John" }
179
- # Remove any trailing space
180
- self.skip_whitespaces_at()
181
181
  if self.get_char_at() != '"':
182
182
  self.insert_char_at('"')
183
183
  fixed_quotes = True
@@ -218,11 +218,7 @@ class JSONParser:
218
218
  self.skip_whitespaces_at()
219
219
  # This string is invalid if there's no valid termination afterwards
220
220
 
221
- if (
222
- self.get_char_at() != ":"
223
- or self.get_char_at() != ","
224
- or self.get_char_at() != "}"
225
- ):
221
+ if self.get_char_at() not in [":", ","]:
226
222
  return ""
227
223
 
228
224
  end = self.index
@@ -236,11 +232,13 @@ class JSONParser:
236
232
 
237
233
  def parse_number(self) -> Union[float, int]:
238
234
  # <number> is a valid real number expressed in one of a number of given formats
239
- number_pattern = r"-?\d+(\.\d+)?([eE][+-]?\d+)?"
240
- match = re.match(number_pattern, self.json_str[self.index :])
241
- if match:
242
- number_str = match.group()
243
- self.index += len(number_str)
235
+ number_str = ""
236
+ char = self.get_char_at()
237
+ while char and (char.isdigit() or char in "-.eE"):
238
+ number_str += char
239
+ self.index += 1
240
+ char = self.get_char_at()
241
+ if number_str:
244
242
  if "." in number_str or "e" in number_str or "E" in number_str:
245
243
  return float(number_str)
246
244
  else:
@@ -264,24 +262,34 @@ class JSONParser:
264
262
  # This is a string then
265
263
  return self.parse_string()
266
264
 
267
- # This is a string then
268
- return self.parse_string()
269
-
270
265
  def insert_char_at(self, char: str) -> None:
271
266
  self.json_str = self.json_str[: self.index] + char + self.json_str[self.index :]
272
267
  self.index += 1
273
268
 
274
269
  def get_char_at(self) -> Union[str, bool]:
275
270
  # Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
276
- return self.json_str[self.index] if self.index < len(self.json_str) else False
271
+ try:
272
+ return self.json_str[self.index]
273
+ except IndexError:
274
+ return False
277
275
 
278
276
  def remove_char_at(self) -> None:
279
277
  self.json_str = self.json_str[: self.index] + self.json_str[self.index + 1 :]
280
278
 
281
279
  def skip_whitespaces_at(self) -> None:
282
280
  # Remove trailing spaces
283
- while self.get_char_at() and self.get_char_at().isspace():
281
+ # I'd rather not do this BUT this method is called so many times that it makes sense to expand get_char_at
282
+ # At least this is what the profiler said and I believe in our lord and savior the profiler
283
+ try:
284
+ char = self.json_str[self.index]
285
+ except IndexError:
286
+ return
287
+ while char and char.isspace():
284
288
  self.index += 1
289
+ try:
290
+ char = self.json_str[self.index]
291
+ except IndexError:
292
+ return
285
293
 
286
294
 
287
295
  def repair_json(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json-repair
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -85,6 +85,10 @@ If you find this library too slow because is using `json.loads()` you can skip t
85
85
 
86
86
  I made a choice of not using any fast json library to avoid having any external dependency, so that anybody can use it regardless of their stack.
87
87
 
88
+ Some rules of thumb to use:
89
+ - Setting `return_objects=True` will always be faster because the parser returns an object already and it doesn't have serialize that object to JSON
90
+ - `skip_json_loads` is faster only if you 100% know that the string is not a valid JSON
91
+
88
92
  # How it works
89
93
  This module will parse the JSON file following the BNF definition:
90
94
 
@@ -0,0 +1,7 @@
1
+ json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
2
+ json_repair/json_repair.py,sha256=Qxy8eQpkm9e1qVUDvhYOuID7flLHIzAsr5cB1NWb3Y4,12974
3
+ json_repair-0.4.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
+ json_repair-0.4.1.dist-info/METADATA,sha256=MXc1lBLV9wr-DNTqqkzuH8W-VYiDNY9UVsvsHJGqMw4,6155
5
+ json_repair-0.4.1.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
6
+ json_repair-0.4.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
+ json_repair-0.4.1.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
2
- json_repair/json_repair.py,sha256=FJf3c9FVBoI09zQXA6RE2sX0fDc0vTKWaOLW86fccf8,12709
3
- json_repair-0.4.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.4.0.dist-info/METADATA,sha256=55m7tDzhs_R7g19ecNGFoY4fChmfp6Ft1TzdK8MxN5Q,5889
5
- json_repair-0.4.0.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
6
- json_repair-0.4.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.4.0.dist-info/RECORD,,