json-repair 0.19.1__py3-none-any.whl → 0.19.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +46 -40
- {json_repair-0.19.1.dist-info → json_repair-0.19.2.dist-info}/METADATA +1 -1
- json_repair-0.19.2.dist-info/RECORD +7 -0
- json_repair-0.19.1.dist-info/RECORD +0 -7
- {json_repair-0.19.1.dist-info → json_repair-0.19.2.dist-info}/LICENSE +0 -0
- {json_repair-0.19.1.dist-info → json_repair-0.19.2.dist-info}/WHEEL +0 -0
- {json_repair-0.19.1.dist-info → json_repair-0.19.2.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -22,6 +22,7 @@ If something is wrong (a missing parantheses or quotes for example) it will use
|
|
22
22
|
All supported use cases are in the unit tests
|
23
23
|
"""
|
24
24
|
|
25
|
+
import os
|
25
26
|
import json
|
26
27
|
from typing import Any, Dict, List, Union, TextIO
|
27
28
|
|
@@ -31,7 +32,9 @@ class JSONParser:
|
|
31
32
|
# The string to parse
|
32
33
|
self.json_str = json_str
|
33
34
|
# Alternatively, the file description with a json file in it
|
34
|
-
|
35
|
+
if json_fd:
|
36
|
+
# This is a trick we do to treat the file wrapper as an array
|
37
|
+
self.json_str = StringFileWrapper(json_fd)
|
35
38
|
# Index is our iterator that will keep track of which character we are looking at right now
|
36
39
|
self.index = 0
|
37
40
|
# This is used in the object member parsing to manage the special cases of missing quotes in key or value
|
@@ -246,7 +249,8 @@ class JSONParser:
|
|
246
249
|
rstring_delimiter = "”"
|
247
250
|
elif char.isalpha():
|
248
251
|
# This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
|
249
|
-
|
252
|
+
# But remember, object keys are only of type string
|
253
|
+
if char.lower() in ["t", "f", "n"] and self.get_context() != "object_key":
|
250
254
|
value = self.parse_boolean_or_null()
|
251
255
|
if value != "":
|
252
256
|
return value
|
@@ -263,7 +267,8 @@ class JSONParser:
|
|
263
267
|
self.index += 1
|
264
268
|
return self.parse_json()
|
265
269
|
self.log(
|
266
|
-
"While parsing a string, we found no starting quote
|
270
|
+
"While parsing a string, we found no starting quote. Will add the quote back",
|
271
|
+
"info",
|
267
272
|
)
|
268
273
|
missing_quotes = True
|
269
274
|
|
@@ -451,36 +456,22 @@ class JSONParser:
|
|
451
456
|
try:
|
452
457
|
return self.json_str[self.index + count]
|
453
458
|
except IndexError:
|
454
|
-
|
455
|
-
self.json_fd.seek(self.index + count)
|
456
|
-
char = self.json_fd.read(1)
|
457
|
-
if char == "":
|
458
|
-
return False
|
459
|
-
return char
|
460
|
-
else:
|
461
|
-
return False
|
459
|
+
return False
|
462
460
|
|
463
461
|
def skip_whitespaces_at(self) -> None:
|
464
462
|
"""
|
465
463
|
This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
|
466
464
|
"""
|
467
|
-
|
468
|
-
char = self.
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
# If this is not a file stream, we do this monster here to make this function much much faster
|
465
|
+
try:
|
466
|
+
char = self.json_str[self.index]
|
467
|
+
except IndexError:
|
468
|
+
return
|
469
|
+
while char.isspace():
|
470
|
+
self.index += 1
|
474
471
|
try:
|
475
472
|
char = self.json_str[self.index]
|
476
473
|
except IndexError:
|
477
474
|
return
|
478
|
-
while char.isspace():
|
479
|
-
self.index += 1
|
480
|
-
try:
|
481
|
-
char = self.json_str[self.index]
|
482
|
-
except IndexError:
|
483
|
-
return
|
484
475
|
|
485
476
|
def set_context(self, value: str) -> None:
|
486
477
|
# If a value is provided update the context variable and save in stack
|
@@ -502,22 +493,9 @@ class JSONParser:
|
|
502
493
|
def log(self, text: str, level: str) -> None:
|
503
494
|
if level == self.logger["log_level"]:
|
504
495
|
context = ""
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
self.json_fd.seek(self.index)
|
509
|
-
else:
|
510
|
-
start = (
|
511
|
-
self.index - self.logger["window"]
|
512
|
-
if (self.index - self.logger["window"]) >= 0
|
513
|
-
else 0
|
514
|
-
)
|
515
|
-
end = (
|
516
|
-
self.index + self.logger["window"]
|
517
|
-
if (self.index + self.logger["window"]) <= len(self.json_str)
|
518
|
-
else len(self.json_str)
|
519
|
-
)
|
520
|
-
context = self.json_str[start:end]
|
496
|
+
start = max(self.index - self.logger["window"], 0)
|
497
|
+
end = min(self.index + self.logger["window"], len(self.json_str))
|
498
|
+
context = self.json_str[start:end]
|
521
499
|
self.logger["log"].append(
|
522
500
|
{
|
523
501
|
"text": text,
|
@@ -593,3 +571,31 @@ def from_file(
|
|
593
571
|
fd.close()
|
594
572
|
|
595
573
|
return jsonobj
|
574
|
+
|
575
|
+
|
576
|
+
class StringFileWrapper:
|
577
|
+
# This is a trick to simplify the code above, transform the filedescriptor handling into an array handling
|
578
|
+
def __init__(self, fd: TextIO) -> None:
|
579
|
+
self.fd = fd
|
580
|
+
self.length = None
|
581
|
+
|
582
|
+
def __getitem__(self, index: int) -> Any:
|
583
|
+
if isinstance(index, slice):
|
584
|
+
self.fd.seek(index.start)
|
585
|
+
value = self.fd.read(index.stop - index.start)
|
586
|
+
self.fd.seek(index.start)
|
587
|
+
return value
|
588
|
+
else:
|
589
|
+
self.fd.seek(index)
|
590
|
+
return self.fd.read(1)
|
591
|
+
|
592
|
+
def __len__(self) -> int:
|
593
|
+
if not self.length:
|
594
|
+
current_position = self.fd.tell()
|
595
|
+
self.fd.seek(0, os.SEEK_END)
|
596
|
+
self.length = self.fd.tell()
|
597
|
+
self.fd.seek(current_position)
|
598
|
+
return self.length
|
599
|
+
|
600
|
+
def __setitem__(self):
|
601
|
+
raise Exception("This is read-only!")
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
+
json_repair/json_repair.py,sha256=qNkCy5by3jFhCAC3pDNKzDECOJr1Crh-P71j6N1CBBg,24405
|
3
|
+
json_repair-0.19.2.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.19.2.dist-info/METADATA,sha256=UdTpknO6GMk16WlPV-YxSFd_QbY25WiNkqj2IE3B_NA,7333
|
5
|
+
json_repair-0.19.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
+
json_repair-0.19.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.19.2.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
-
json_repair/json_repair.py,sha256=5viwB7G8T9EyYRsJpCxZPZChmgDkaT1WQ7oxFUjm5Lg,24349
|
3
|
-
json_repair-0.19.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.19.1.dist-info/METADATA,sha256=FlAtvarNR4sXKlNYr7VDedyScl7t8UgqyDyNZ2NIS10,7333
|
5
|
-
json_repair-0.19.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
-
json_repair-0.19.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.19.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|