json-repair 0.19.0__py3-none-any.whl → 0.19.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +55 -40
- {json_repair-0.19.0.dist-info → json_repair-0.19.2.dist-info}/METADATA +1 -1
- json_repair-0.19.2.dist-info/RECORD +7 -0
- json_repair-0.19.0.dist-info/RECORD +0 -7
- {json_repair-0.19.0.dist-info → json_repair-0.19.2.dist-info}/LICENSE +0 -0
- {json_repair-0.19.0.dist-info → json_repair-0.19.2.dist-info}/WHEEL +0 -0
- {json_repair-0.19.0.dist-info → json_repair-0.19.2.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -22,6 +22,7 @@ If something is wrong (a missing parantheses or quotes for example) it will use
|
|
22
22
|
All supported use cases are in the unit tests
|
23
23
|
"""
|
24
24
|
|
25
|
+
import os
|
25
26
|
import json
|
26
27
|
from typing import Any, Dict, List, Union, TextIO
|
27
28
|
|
@@ -31,7 +32,9 @@ class JSONParser:
|
|
31
32
|
# The string to parse
|
32
33
|
self.json_str = json_str
|
33
34
|
# Alternatively, the file description with a json file in it
|
34
|
-
|
35
|
+
if json_fd:
|
36
|
+
# This is a trick we do to treat the file wrapper as an array
|
37
|
+
self.json_str = StringFileWrapper(json_fd)
|
35
38
|
# Index is our iterator that will keep track of which character we are looking at right now
|
36
39
|
self.index = 0
|
37
40
|
# This is used in the object member parsing to manage the special cases of missing quotes in key or value
|
@@ -246,7 +249,8 @@ class JSONParser:
|
|
246
249
|
rstring_delimiter = "”"
|
247
250
|
elif char.isalpha():
|
248
251
|
# This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
|
249
|
-
|
252
|
+
# But remember, object keys are only of type string
|
253
|
+
if char.lower() in ["t", "f", "n"] and self.get_context() != "object_key":
|
250
254
|
value = self.parse_boolean_or_null()
|
251
255
|
if value != "":
|
252
256
|
return value
|
@@ -263,7 +267,8 @@ class JSONParser:
|
|
263
267
|
self.index += 1
|
264
268
|
return self.parse_json()
|
265
269
|
self.log(
|
266
|
-
"While parsing a string, we found no starting quote
|
270
|
+
"While parsing a string, we found no starting quote. Will add the quote back",
|
271
|
+
"info",
|
267
272
|
)
|
268
273
|
missing_quotes = True
|
269
274
|
|
@@ -309,6 +314,15 @@ class JSONParser:
|
|
309
314
|
string_acc += char
|
310
315
|
self.index += 1
|
311
316
|
char = self.get_char_at()
|
317
|
+
if len(string_acc) > 1 and string_acc[-1] == "\\":
|
318
|
+
# This is a special case, if people use real strings this might happen
|
319
|
+
self.log("Found a stray escape sequence, normalizing it", "info")
|
320
|
+
string_acc = string_acc[:-1]
|
321
|
+
if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
|
322
|
+
escape_seqs = {"t": "\t", "n": "\n", "r": "\r", "b": "\b"}
|
323
|
+
string_acc += escape_seqs.get(char, char)
|
324
|
+
self.index += 1
|
325
|
+
char = self.get_char_at()
|
312
326
|
# ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
|
313
327
|
if char == rstring_delimiter:
|
314
328
|
# Special case here, in case of double quotes one after another
|
@@ -442,36 +456,22 @@ class JSONParser:
|
|
442
456
|
try:
|
443
457
|
return self.json_str[self.index + count]
|
444
458
|
except IndexError:
|
445
|
-
|
446
|
-
self.json_fd.seek(self.index + count)
|
447
|
-
char = self.json_fd.read(1)
|
448
|
-
if char == "":
|
449
|
-
return False
|
450
|
-
return char
|
451
|
-
else:
|
452
|
-
return False
|
459
|
+
return False
|
453
460
|
|
454
461
|
def skip_whitespaces_at(self) -> None:
|
455
462
|
"""
|
456
463
|
This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
|
457
464
|
"""
|
458
|
-
|
459
|
-
char = self.
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
# If this is not a file stream, we do this monster here to make this function much much faster
|
465
|
+
try:
|
466
|
+
char = self.json_str[self.index]
|
467
|
+
except IndexError:
|
468
|
+
return
|
469
|
+
while char.isspace():
|
470
|
+
self.index += 1
|
465
471
|
try:
|
466
472
|
char = self.json_str[self.index]
|
467
473
|
except IndexError:
|
468
474
|
return
|
469
|
-
while char.isspace():
|
470
|
-
self.index += 1
|
471
|
-
try:
|
472
|
-
char = self.json_str[self.index]
|
473
|
-
except IndexError:
|
474
|
-
return
|
475
475
|
|
476
476
|
def set_context(self, value: str) -> None:
|
477
477
|
# If a value is provided update the context variable and save in stack
|
@@ -493,22 +493,9 @@ class JSONParser:
|
|
493
493
|
def log(self, text: str, level: str) -> None:
|
494
494
|
if level == self.logger["log_level"]:
|
495
495
|
context = ""
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
self.json_fd.seek(self.index)
|
500
|
-
else:
|
501
|
-
start = (
|
502
|
-
self.index - self.logger["window"]
|
503
|
-
if (self.index - self.logger["window"]) >= 0
|
504
|
-
else 0
|
505
|
-
)
|
506
|
-
end = (
|
507
|
-
self.index + self.logger["window"]
|
508
|
-
if (self.index + self.logger["window"]) <= len(self.json_str)
|
509
|
-
else len(self.json_str)
|
510
|
-
)
|
511
|
-
context = self.json_str[start:end]
|
496
|
+
start = max(self.index - self.logger["window"], 0)
|
497
|
+
end = min(self.index + self.logger["window"], len(self.json_str))
|
498
|
+
context = self.json_str[start:end]
|
512
499
|
self.logger["log"].append(
|
513
500
|
{
|
514
501
|
"text": text,
|
@@ -584,3 +571,31 @@ def from_file(
|
|
584
571
|
fd.close()
|
585
572
|
|
586
573
|
return jsonobj
|
574
|
+
|
575
|
+
|
576
|
+
class StringFileWrapper:
|
577
|
+
# This is a trick to simplify the code above, transform the filedescriptor handling into an array handling
|
578
|
+
def __init__(self, fd: TextIO) -> None:
|
579
|
+
self.fd = fd
|
580
|
+
self.length = None
|
581
|
+
|
582
|
+
def __getitem__(self, index: int) -> Any:
|
583
|
+
if isinstance(index, slice):
|
584
|
+
self.fd.seek(index.start)
|
585
|
+
value = self.fd.read(index.stop - index.start)
|
586
|
+
self.fd.seek(index.start)
|
587
|
+
return value
|
588
|
+
else:
|
589
|
+
self.fd.seek(index)
|
590
|
+
return self.fd.read(1)
|
591
|
+
|
592
|
+
def __len__(self) -> int:
|
593
|
+
if not self.length:
|
594
|
+
current_position = self.fd.tell()
|
595
|
+
self.fd.seek(0, os.SEEK_END)
|
596
|
+
self.length = self.fd.tell()
|
597
|
+
self.fd.seek(current_position)
|
598
|
+
return self.length
|
599
|
+
|
600
|
+
def __setitem__(self):
|
601
|
+
raise Exception("This is read-only!")
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
+
json_repair/json_repair.py,sha256=qNkCy5by3jFhCAC3pDNKzDECOJr1Crh-P71j6N1CBBg,24405
|
3
|
+
json_repair-0.19.2.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.19.2.dist-info/METADATA,sha256=UdTpknO6GMk16WlPV-YxSFd_QbY25WiNkqj2IE3B_NA,7333
|
5
|
+
json_repair-0.19.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
+
json_repair-0.19.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.19.2.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
-
json_repair/json_repair.py,sha256=ORf1wm6wTXriTJBtCJtoFU4rEw4daAoqV0ktdyhcOT0,23775
|
3
|
-
json_repair-0.19.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.19.0.dist-info/METADATA,sha256=Mj5le5SqwFzYmWGZWu5JbjZNqX4cYPP_h1XpQDYNeOI,7333
|
5
|
-
json_repair-0.19.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
-
json_repair-0.19.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.19.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|