json-repair 0.19.0__py3-none-any.whl → 0.19.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,6 +22,7 @@ If something is wrong (a missing parantheses or quotes for example) it will use
22
22
  All supported use cases are in the unit tests
23
23
  """
24
24
 
25
+ import os
25
26
  import json
26
27
  from typing import Any, Dict, List, Union, TextIO
27
28
 
@@ -31,7 +32,9 @@ class JSONParser:
31
32
  # The string to parse
32
33
  self.json_str = json_str
33
34
  # Alternatively, the file description with a json file in it
34
- self.json_fd = json_fd
35
+ if json_fd:
36
+ # This is a trick we do to treat the file wrapper as an array
37
+ self.json_str = StringFileWrapper(json_fd)
35
38
  # Index is our iterator that will keep track of which character we are looking at right now
36
39
  self.index = 0
37
40
  # This is used in the object member parsing to manage the special cases of missing quotes in key or value
@@ -246,7 +249,8 @@ class JSONParser:
246
249
  rstring_delimiter = "”"
247
250
  elif char.isalpha():
248
251
  # This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
249
- if char.lower() in ["t", "f", "n"]:
252
+ # But remember, object keys are only of type string
253
+ if char.lower() in ["t", "f", "n"] and self.get_context() != "object_key":
250
254
  value = self.parse_boolean_or_null()
251
255
  if value != "":
252
256
  return value
@@ -263,7 +267,8 @@ class JSONParser:
263
267
  self.index += 1
264
268
  return self.parse_json()
265
269
  self.log(
266
- "While parsing a string, we found no starting quote, ignoring", "info"
270
+ "While parsing a string, we found no starting quote. Will add the quote back",
271
+ "info",
267
272
  )
268
273
  missing_quotes = True
269
274
 
@@ -309,6 +314,15 @@ class JSONParser:
309
314
  string_acc += char
310
315
  self.index += 1
311
316
  char = self.get_char_at()
317
+ if len(string_acc) > 1 and string_acc[-1] == "\\":
318
+ # This is a special case, if people use real strings this might happen
319
+ self.log("Found a stray escape sequence, normalizing it", "info")
320
+ string_acc = string_acc[:-1]
321
+ if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
322
+ escape_seqs = {"t": "\t", "n": "\n", "r": "\r", "b": "\b"}
323
+ string_acc += escape_seqs.get(char, char)
324
+ self.index += 1
325
+ char = self.get_char_at()
312
326
  # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
313
327
  if char == rstring_delimiter:
314
328
  # Special case here, in case of double quotes one after another
@@ -442,36 +456,22 @@ class JSONParser:
442
456
  try:
443
457
  return self.json_str[self.index + count]
444
458
  except IndexError:
445
- if self.json_fd:
446
- self.json_fd.seek(self.index + count)
447
- char = self.json_fd.read(1)
448
- if char == "":
449
- return False
450
- return char
451
- else:
452
- return False
459
+ return False
453
460
 
454
461
  def skip_whitespaces_at(self) -> None:
455
462
  """
456
463
  This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
457
464
  """
458
- if self.json_fd:
459
- char = self.get_char_at()
460
- while char and char.isspace():
461
- self.index += 1
462
- char = self.get_char_at()
463
- else:
464
- # If this is not a file stream, we do this monster here to make this function much much faster
465
+ try:
466
+ char = self.json_str[self.index]
467
+ except IndexError:
468
+ return
469
+ while char.isspace():
470
+ self.index += 1
465
471
  try:
466
472
  char = self.json_str[self.index]
467
473
  except IndexError:
468
474
  return
469
- while char.isspace():
470
- self.index += 1
471
- try:
472
- char = self.json_str[self.index]
473
- except IndexError:
474
- return
475
475
 
476
476
  def set_context(self, value: str) -> None:
477
477
  # If a value is provided update the context variable and save in stack
@@ -493,22 +493,9 @@ class JSONParser:
493
493
  def log(self, text: str, level: str) -> None:
494
494
  if level == self.logger["log_level"]:
495
495
  context = ""
496
- if self.json_fd:
497
- self.json_fd.seek(self.index - self.logger["window"])
498
- context = self.json_fd.read(self.logger["window"] * 2)
499
- self.json_fd.seek(self.index)
500
- else:
501
- start = (
502
- self.index - self.logger["window"]
503
- if (self.index - self.logger["window"]) >= 0
504
- else 0
505
- )
506
- end = (
507
- self.index + self.logger["window"]
508
- if (self.index + self.logger["window"]) <= len(self.json_str)
509
- else len(self.json_str)
510
- )
511
- context = self.json_str[start:end]
496
+ start = max(self.index - self.logger["window"], 0)
497
+ end = min(self.index + self.logger["window"], len(self.json_str))
498
+ context = self.json_str[start:end]
512
499
  self.logger["log"].append(
513
500
  {
514
501
  "text": text,
@@ -584,3 +571,31 @@ def from_file(
584
571
  fd.close()
585
572
 
586
573
  return jsonobj
574
+
575
+
576
+ class StringFileWrapper:
577
+ # This is a trick to simplify the code above, transform the filedescriptor handling into an array handling
578
+ def __init__(self, fd: TextIO) -> None:
579
+ self.fd = fd
580
+ self.length = None
581
+
582
+ def __getitem__(self, index: int) -> Any:
583
+ if isinstance(index, slice):
584
+ self.fd.seek(index.start)
585
+ value = self.fd.read(index.stop - index.start)
586
+ self.fd.seek(index.start)
587
+ return value
588
+ else:
589
+ self.fd.seek(index)
590
+ return self.fd.read(1)
591
+
592
+ def __len__(self) -> int:
593
+ if not self.length:
594
+ current_position = self.fd.tell()
595
+ self.fd.seek(0, os.SEEK_END)
596
+ self.length = self.fd.tell()
597
+ self.fd.seek(current_position)
598
+ return self.length
599
+
600
+ def __setitem__(self):
601
+ raise Exception("This is read-only!")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.19.0
3
+ Version: 0.19.2
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -0,0 +1,7 @@
1
+ json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
+ json_repair/json_repair.py,sha256=qNkCy5by3jFhCAC3pDNKzDECOJr1Crh-P71j6N1CBBg,24405
3
+ json_repair-0.19.2.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
+ json_repair-0.19.2.dist-info/METADATA,sha256=UdTpknO6GMk16WlPV-YxSFd_QbY25WiNkqj2IE3B_NA,7333
5
+ json_repair-0.19.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
+ json_repair-0.19.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
+ json_repair-0.19.2.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
- json_repair/json_repair.py,sha256=ORf1wm6wTXriTJBtCJtoFU4rEw4daAoqV0ktdyhcOT0,23775
3
- json_repair-0.19.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
4
- json_repair-0.19.0.dist-info/METADATA,sha256=Mj5le5SqwFzYmWGZWu5JbjZNqX4cYPP_h1XpQDYNeOI,7333
5
- json_repair-0.19.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- json_repair-0.19.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
7
- json_repair-0.19.0.dist-info/RECORD,,