json-repair 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,6 +61,8 @@ class JSONParser:
61
61
  # <string> starts with '"'
62
62
  elif char == '"':
63
63
  return self.parse_string()
64
+ elif char == "'":
65
+ return self.parse_string(use_single_quotes=True)
64
66
  # <number> starts with [0-9] or minus
65
67
  elif char.isdigit() or char == "-":
66
68
  return self.parse_number()
@@ -100,18 +102,19 @@ class JSONParser:
100
102
  # Context is used in the string parser to manage the lack of quotes
101
103
  self.context = "object_key"
102
104
 
103
- # <member> starts with a <string>
104
105
  self.skip_whitespaces_at()
105
- key = self.parse_string()
106
+
107
+ # <member> starts with a <string>
108
+ key = ""
106
109
  while key == "" and self.get_char_at():
107
- key = self.parse_string()
110
+ key = self.parse_string(
111
+ use_single_quotes=(self.json_str[self.index] == "'")
112
+ )
108
113
 
109
114
  # We reached the end here
110
115
  if key == "}":
111
116
  continue
112
117
 
113
- # Reset context
114
- self.context = ""
115
118
  # An extreme case of missing ":" after a key
116
119
  if (self.get_char_at() or "") != ":":
117
120
  self.insert_char_at(":")
@@ -119,6 +122,8 @@ class JSONParser:
119
122
  self.context = "object_value"
120
123
  # The value can be any valid json
121
124
  value = self.parse_json()
125
+
126
+ # Reset context since our job is done
122
127
  self.context = ""
123
128
  obj[key] = value
124
129
 
@@ -141,6 +146,7 @@ class JSONParser:
141
146
  while (self.get_char_at() or "]") != "]":
142
147
  value = self.parse_json()
143
148
 
149
+ # It is possible that parse_json() returns nothing valid, so we stop
144
150
  if not value:
145
151
  break
146
152
 
@@ -153,9 +159,10 @@ class JSONParser:
153
159
  char = self.get_char_at()
154
160
 
155
161
  # Especially at the end of an LLM generated json you might miss the last "]"
156
- if (self.get_char_at() or "]") != "]":
162
+ char = self.get_char_at()
163
+ if char and char != "]":
157
164
  # Sometimes when you fix a missing "]" you'll have a trailing "," there that makes the JSON invalid
158
- if (self.get_char_at() or "") == ",":
165
+ if char == ",":
159
166
  # Remove trailing "," before adding the "]"
160
167
  self.remove_char_at()
161
168
  self.insert_char_at("]")
@@ -163,18 +170,24 @@ class JSONParser:
163
170
  self.index += 1
164
171
  return arr
165
172
 
166
- def parse_string(self) -> str:
173
+ def parse_string(self, use_single_quotes=False) -> str:
167
174
  # <string> is a string of valid characters enclosed in quotes
175
+ # i.e. { name: "John" }
168
176
  # Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
177
+
169
178
  # Flag to manage corner cases related to missing starting quote
170
179
  fixed_quotes = False
171
- # i.e. { name: "John" }
172
- if (self.get_char_at() or '"') != '"':
173
- self.insert_char_at('"')
180
+ string_terminator = '"'
181
+ if use_single_quotes:
182
+ string_terminator = "'"
183
+ char = self.get_char_at()
184
+ if char != string_terminator:
185
+ self.insert_char_at(string_terminator)
174
186
  fixed_quotes = True
175
187
  else:
176
188
  self.index += 1
177
- # Start position of the string
189
+
190
+ # Start position of the string (to use later in the return value)
178
191
  start = self.index
179
192
 
180
193
  # Here things get a bit hairy because a string missing the final quote can also be a key or a value in an object
@@ -184,38 +197,26 @@ class JSONParser:
184
197
  # * It iterated over the entire sequence
185
198
  # * If we are fixing missing quotes in an object, when it finds the special terminators
186
199
  char = self.get_char_at()
187
- while (
188
- char
189
- and char != '"'
190
- and (not fixed_quotes or self.context != "object_key" or char != ":")
191
- and (not fixed_quotes or self.context != "object_key" or not char.isspace())
192
- and (
193
- not fixed_quotes
194
- or self.context != "object_value"
195
- or (char != "," and char != "}")
196
- )
197
- ):
200
+ while char and char != string_terminator:
201
+ if fixed_quotes:
202
+ if self.context == "object_key" and (char == ":" or char.isspace()):
203
+ break
204
+ elif self.context == "object_value" and (char == "," or char == "}"):
205
+ break
198
206
  self.index += 1
199
207
  char = self.get_char_at()
200
208
 
201
- # If the cycle stopped at a space we have some doubts on wheter this is a valid string, check one char ahead
202
- if (
203
- fixed_quotes
204
- and self.context == "object_key"
205
- and (self.get_char_at() or "").isspace()
206
- ):
207
- # skip whitespaces
209
+ if char and fixed_quotes and self.context == "object_key" and char.isspace():
208
210
  self.skip_whitespaces_at()
209
- # This string is invalid if there's no valid termination afterwards
210
-
211
- if (self.get_char_at() or "") not in [":", ","]:
211
+ if self.get_char_at() not in [":", ","]:
212
212
  return ""
213
213
 
214
214
  end = self.index
215
- if (self.get_char_at() or '"') != '"':
216
- self.insert_char_at('"')
215
+
217
216
  # A fallout of the previous special case in the while loop, we need to update the index only if we had a closing quote
218
- if (self.get_char_at() or "") == '"':
217
+ if char != string_terminator:
218
+ self.insert_char_at(string_terminator)
219
+ else:
219
220
  self.index += 1
220
221
 
221
222
  return self.json_str[start:end]
@@ -223,8 +224,9 @@ class JSONParser:
223
224
  def parse_number(self) -> Union[float, int]:
224
225
  # <number> is a valid real number expressed in one of a number of given formats
225
226
  number_str = ""
227
+ number_chars = set("0123456789-.eE")
226
228
  char = self.get_char_at()
227
- while char and (char.isdigit() or char in "-.eE"):
229
+ while char and char in number_chars:
228
230
  number_str += char
229
231
  self.index += 1
230
232
  char = self.get_char_at()
@@ -239,18 +241,14 @@ class JSONParser:
239
241
 
240
242
  def parse_boolean_or_null(self) -> Union[bool, None]:
241
243
  # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
242
- if self.json_str.startswith("true", self.index):
243
- self.index += 4
244
- return True
245
- elif self.json_str.startswith("false", self.index):
246
- self.index += 5
247
- return False
248
- elif self.json_str.startswith("null", self.index):
249
- self.index += 4
250
- return None
251
- else:
252
- # This is a string then
253
- return self.parse_string()
244
+ boolean_map = {"true": (True, 4), "false": (False, 5), "null": (None, 4)}
245
+ for key, (value, length) in boolean_map.items():
246
+ if self.json_str.startswith(key, self.index):
247
+ self.index += length
248
+ return value
249
+
250
+ # This is a string then
251
+ return self.parse_string()
254
252
 
255
253
  def insert_char_at(self, char: str) -> None:
256
254
  self.json_str = self.json_str[: self.index] + char + self.json_str[self.index :]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
- Name: json-repair
3
- Version: 0.4.4
2
+ Name: json_repair
3
+ Version: 0.5.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -0,0 +1,7 @@
1
+ json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
2
+ json_repair/json_repair.py,sha256=Hdevgq1_s8JaHCALBd7gTxp8_DexKezz8g6Cm0LcJTc,12505
3
+ json_repair-0.5.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
+ json_repair-0.5.0.dist-info/METADATA,sha256=6_UiFGc3X93VQ8tFU1y02b16fbMyRybrQu1B7wwBmL8,6011
5
+ json_repair-0.5.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
6
+ json_repair-0.5.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
+ json_repair-0.5.0.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=p9mZnte8Bg18NcxqgJ7vopH2gQv_XbZ0dRnk686QuRE,92
2
- json_repair/json_repair.py,sha256=b02twJkjYPlKBU8bCMud3ReIATlb2Tn_CkP1u4e4tN4,12562
3
- json_repair-0.4.4.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.4.4.dist-info/METADATA,sha256=JgJrJ_qMBYQS06W_O2PfJoMT-ngjlvx4aLg3AYVdQ18,6011
5
- json_repair-0.4.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
6
- json_repair-0.4.4.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.4.4.dist-info/RECORD,,