json-repair 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -100,9 +100,10 @@ class JSONParser:
100
100
  # Context is used in the string parser to manage the lack of quotes
101
101
  self.context = "object_key"
102
102
 
103
- # <member> starts with a <string>
104
103
  self.skip_whitespaces_at()
105
- key = self.parse_string()
104
+
105
+ # <member> starts with a <string>
106
+ key = ""
106
107
  while key == "" and self.get_char_at():
107
108
  key = self.parse_string()
108
109
 
@@ -110,8 +111,6 @@ class JSONParser:
110
111
  if key == "}":
111
112
  continue
112
113
 
113
- # Reset context
114
- self.context = ""
115
114
  # An extreme case of missing ":" after a key
116
115
  if (self.get_char_at() or "") != ":":
117
116
  self.insert_char_at(":")
@@ -119,6 +118,8 @@ class JSONParser:
119
118
  self.context = "object_value"
120
119
  # The value can be any valid json
121
120
  value = self.parse_json()
121
+
122
+ # Reset context since our job is done
122
123
  self.context = ""
123
124
  obj[key] = value
124
125
 
@@ -140,6 +141,11 @@ class JSONParser:
140
141
  # Stop when you either find the closing parentheses or you have iterated over the entire string
141
142
  while (self.get_char_at() or "]") != "]":
142
143
  value = self.parse_json()
144
+
145
+ # It is possible that parse_json() returns nothing valid, so we stop
146
+ if not value:
147
+ break
148
+
143
149
  arr.append(value)
144
150
 
145
151
  # skip over whitespace after a value but before closing ]
@@ -149,9 +155,10 @@ class JSONParser:
149
155
  char = self.get_char_at()
150
156
 
151
157
  # Especially at the end of an LLM generated json you might miss the last "]"
152
- if (self.get_char_at() or "]") != "]":
158
+ char = self.get_char_at()
159
+ if char and char != "]":
153
160
  # Sometimes when you fix a missing "]" you'll have a trailing "," there that makes the JSON invalid
154
- if (self.get_char_at() or "") == ",":
161
+ if char == ",":
155
162
  # Remove trailing "," before adding the "]"
156
163
  self.remove_char_at()
157
164
  self.insert_char_at("]")
@@ -161,16 +168,20 @@ class JSONParser:
161
168
 
162
169
  def parse_string(self) -> str:
163
170
  # <string> is a string of valid characters enclosed in quotes
171
+ # i.e. { name: "John" }
164
172
  # Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
173
+
165
174
  # Flag to manage corner cases related to missing starting quote
166
175
  fixed_quotes = False
167
- # i.e. { name: "John" }
168
- if (self.get_char_at() or '"') != '"':
176
+
177
+ char = self.get_char_at()
178
+ if char != '"':
169
179
  self.insert_char_at('"')
170
180
  fixed_quotes = True
171
181
  else:
172
182
  self.index += 1
173
- # Start position of the string
183
+
184
+ # Start position of the string (to use later in the return value)
174
185
  start = self.index
175
186
 
176
187
  # Here things get a bit hairy because a string missing the final quote can also be a key or a value in an object
@@ -180,38 +191,26 @@ class JSONParser:
180
191
  # * It iterated over the entire sequence
181
192
  # * If we are fixing missing quotes in an object, when it finds the special terminators
182
193
  char = self.get_char_at()
183
- while (
184
- char
185
- and char != '"'
186
- and (not fixed_quotes or self.context != "object_key" or char != ":")
187
- and (not fixed_quotes or self.context != "object_key" or not char.isspace())
188
- and (
189
- not fixed_quotes
190
- or self.context != "object_value"
191
- or (char != "," and char != "}")
192
- )
193
- ):
194
+ while char and char != '"':
195
+ if fixed_quotes:
196
+ if self.context == "object_key" and (char == ":" or char.isspace()):
197
+ break
198
+ elif self.context == "object_value" and (char == "," or char == "}"):
199
+ break
194
200
  self.index += 1
195
201
  char = self.get_char_at()
196
202
 
197
- # If the cycle stopped at a space we have some doubts on wheter this is a valid string, check one char ahead
198
- if (
199
- fixed_quotes
200
- and self.context == "object_key"
201
- and (self.get_char_at() or "").isspace()
202
- ):
203
- # skip whitespaces
203
+ if char and fixed_quotes and self.context == "object_key" and char.isspace():
204
204
  self.skip_whitespaces_at()
205
- # This string is invalid if there's no valid termination afterwards
206
-
207
- if (self.get_char_at() or "") not in [":", ","]:
205
+ if self.get_char_at() not in [":", ","]:
208
206
  return ""
209
207
 
210
208
  end = self.index
211
- if (self.get_char_at() or '"') != '"':
212
- self.insert_char_at('"')
209
+
213
210
  # A fallout of the previous special case in the while loop, we need to update the index only if we had a closing quote
214
- if (self.get_char_at() or "") == '"':
211
+ if char != '"':
212
+ self.insert_char_at('"')
213
+ else:
215
214
  self.index += 1
216
215
 
217
216
  return self.json_str[start:end]
@@ -219,8 +218,9 @@ class JSONParser:
219
218
  def parse_number(self) -> Union[float, int]:
220
219
  # <number> is a valid real number expressed in one of a number of given formats
221
220
  number_str = ""
221
+ number_chars = set("0123456789-.eE")
222
222
  char = self.get_char_at()
223
- while char and (char.isdigit() or char in "-.eE"):
223
+ while char and char in number_chars:
224
224
  number_str += char
225
225
  self.index += 1
226
226
  char = self.get_char_at()
@@ -235,18 +235,14 @@ class JSONParser:
235
235
 
236
236
  def parse_boolean_or_null(self) -> Union[bool, None]:
237
237
  # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
238
- if self.json_str.startswith("true", self.index):
239
- self.index += 4
240
- return True
241
- elif self.json_str.startswith("false", self.index):
242
- self.index += 5
243
- return False
244
- elif self.json_str.startswith("null", self.index):
245
- self.index += 4
246
- return None
247
- else:
248
- # This is a string then
249
- return self.parse_string()
238
+ boolean_map = {"true": (True, 4), "false": (False, 5), "null": (None, 4)}
239
+ for key, (value, length) in boolean_map.items():
240
+ if self.json_str.startswith(key, self.index):
241
+ self.index += length
242
+ return value
243
+
244
+ # This is a string then
245
+ return self.parse_string()
250
246
 
251
247
  def insert_char_at(self, char: str) -> None:
252
248
  self.json_str = self.json_str[: self.index] + char + self.json_str[self.index :]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json-repair
3
- Version: 0.4.3
3
+ Version: 0.4.5
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -0,0 +1,7 @@
1
+ json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
2
+ json_repair/json_repair.py,sha256=ezQraeyIucpwRgnh12S5m7Mn2eR5i0rKdwAkLrkAhEA,12136
3
+ json_repair-0.4.5.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
+ json_repair-0.4.5.dist-info/METADATA,sha256=C6GUgX3IMBJfr_kecH3ZD6jkjdgfgdI0S0vFnivHaK4,6011
5
+ json_repair-0.4.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
6
+ json_repair-0.4.5.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
+ json_repair-0.4.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.3)
2
+ Generator: bdist_wheel (0.42.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
2
- json_repair/json_repair.py,sha256=duu-4stxqeJ1ZrVY02wU8I0y7W4q3lh0ocTT4Cov69U,12512
3
- json_repair-0.4.3.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.4.3.dist-info/METADATA,sha256=BtpWejgqXSxlO-vnnlzdAf9QWjztTexudWnKvUUiO9g,6011
5
- json_repair-0.4.3.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
6
- json_repair-0.4.3.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.4.3.dist-info/RECORD,,