json-repair 0.19.1__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_repair/json_repair.py +78 -54
- {json_repair-0.19.1.dist-info → json_repair-0.20.0.dist-info}/METADATA +1 -1
- json_repair-0.20.0.dist-info/RECORD +7 -0
- json_repair-0.19.1.dist-info/RECORD +0 -7
- {json_repair-0.19.1.dist-info → json_repair-0.20.0.dist-info}/LICENSE +0 -0
- {json_repair-0.19.1.dist-info → json_repair-0.20.0.dist-info}/WHEEL +0 -0
- {json_repair-0.19.1.dist-info → json_repair-0.20.0.dist-info}/top_level.txt +0 -0
json_repair/json_repair.py
CHANGED
@@ -22,6 +22,7 @@ If something is wrong (a missing parantheses or quotes for example) it will use
|
|
22
22
|
All supported use cases are in the unit tests
|
23
23
|
"""
|
24
24
|
|
25
|
+
import os
|
25
26
|
import json
|
26
27
|
from typing import Any, Dict, List, Union, TextIO
|
27
28
|
|
@@ -31,7 +32,9 @@ class JSONParser:
|
|
31
32
|
# The string to parse
|
32
33
|
self.json_str = json_str
|
33
34
|
# Alternatively, the file description with a json file in it
|
34
|
-
|
35
|
+
if json_fd:
|
36
|
+
# This is a trick we do to treat the file wrapper as an array
|
37
|
+
self.json_str = StringFileWrapper(json_fd)
|
35
38
|
# Index is our iterator that will keep track of which character we are looking at right now
|
36
39
|
self.index = 0
|
37
40
|
# This is used in the object member parsing to manage the special cases of missing quotes in key or value
|
@@ -246,7 +249,8 @@ class JSONParser:
|
|
246
249
|
rstring_delimiter = "”"
|
247
250
|
elif char.isalpha():
|
248
251
|
# This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
|
249
|
-
|
252
|
+
# But remember, object keys are only of type string
|
253
|
+
if char.lower() in ["t", "f", "n"] and self.get_context() != "object_key":
|
250
254
|
value = self.parse_boolean_or_null()
|
251
255
|
if value != "":
|
252
256
|
return value
|
@@ -263,7 +267,8 @@ class JSONParser:
|
|
263
267
|
self.index += 1
|
264
268
|
return self.parse_json()
|
265
269
|
self.log(
|
266
|
-
"While parsing a string, we found no starting quote
|
270
|
+
"While parsing a string, we found no starting quote. Will add the quote back",
|
271
|
+
"info",
|
267
272
|
)
|
268
273
|
missing_quotes = True
|
269
274
|
|
@@ -330,32 +335,50 @@ class JSONParser:
|
|
330
335
|
# Check if eventually there is a rstring delimiter, otherwise we bail
|
331
336
|
i = 1
|
332
337
|
next_c = self.get_char_at(i)
|
333
|
-
|
338
|
+
check_comma_in_object_value = True
|
339
|
+
while next_c and next_c not in [
|
340
|
+
rstring_delimiter,
|
341
|
+
lstring_delimiter,
|
342
|
+
]:
|
343
|
+
# This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
|
344
|
+
# This is because the routine after will make sure to correct any bad guess and this solves a corner case
|
345
|
+
if next_c.isalpha():
|
346
|
+
check_comma_in_object_value = False
|
334
347
|
# If we are in an object context, let's check for the right delimiters
|
335
348
|
if (
|
336
|
-
next_c
|
337
|
-
or ("
|
338
|
-
or ("object_value" in self.context and next_c in ["}", ","])
|
349
|
+
("object_key" in self.context and next_c in [":", "}"])
|
350
|
+
or ("object_value" in self.context and next_c == "}")
|
339
351
|
or ("array" in self.context and next_c in ["]", ","])
|
352
|
+
or (
|
353
|
+
check_comma_in_object_value
|
354
|
+
and self.get_context() == "object_value"
|
355
|
+
and next_c == ","
|
356
|
+
)
|
340
357
|
):
|
341
358
|
break
|
342
359
|
i += 1
|
343
360
|
next_c = self.get_char_at(i)
|
344
361
|
if next_c == rstring_delimiter:
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
while next_c and next_c != rstring_delimiter:
|
362
|
+
if self.get_context() == "object_value":
|
363
|
+
# But this might not be it! This could be just a missing comma
|
364
|
+
# We found a delimiter and we need to check if this is a key
|
365
|
+
# so find a rstring_delimiter and a colon after
|
350
366
|
i += 1
|
351
367
|
next_c = self.get_char_at(i)
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
if next_c in [lstring_delimiter, rstring_delimiter, ","]:
|
356
|
-
break
|
368
|
+
while next_c and next_c != rstring_delimiter:
|
369
|
+
i += 1
|
370
|
+
next_c = self.get_char_at(i)
|
357
371
|
i += 1
|
358
372
|
next_c = self.get_char_at(i)
|
373
|
+
while next_c and next_c != ":":
|
374
|
+
if next_c in [
|
375
|
+
lstring_delimiter,
|
376
|
+
rstring_delimiter,
|
377
|
+
",",
|
378
|
+
]:
|
379
|
+
break
|
380
|
+
i += 1
|
381
|
+
next_c = self.get_char_at(i)
|
359
382
|
# Only if we fail to find a ':' then we know this is misplaced quote
|
360
383
|
if next_c != ":":
|
361
384
|
self.log(
|
@@ -451,36 +474,22 @@ class JSONParser:
|
|
451
474
|
try:
|
452
475
|
return self.json_str[self.index + count]
|
453
476
|
except IndexError:
|
454
|
-
|
455
|
-
self.json_fd.seek(self.index + count)
|
456
|
-
char = self.json_fd.read(1)
|
457
|
-
if char == "":
|
458
|
-
return False
|
459
|
-
return char
|
460
|
-
else:
|
461
|
-
return False
|
477
|
+
return False
|
462
478
|
|
463
479
|
def skip_whitespaces_at(self) -> None:
|
464
480
|
"""
|
465
481
|
This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
|
466
482
|
"""
|
467
|
-
|
468
|
-
char = self.
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
# If this is not a file stream, we do this monster here to make this function much much faster
|
483
|
+
try:
|
484
|
+
char = self.json_str[self.index]
|
485
|
+
except IndexError:
|
486
|
+
return
|
487
|
+
while char.isspace():
|
488
|
+
self.index += 1
|
474
489
|
try:
|
475
490
|
char = self.json_str[self.index]
|
476
491
|
except IndexError:
|
477
492
|
return
|
478
|
-
while char.isspace():
|
479
|
-
self.index += 1
|
480
|
-
try:
|
481
|
-
char = self.json_str[self.index]
|
482
|
-
except IndexError:
|
483
|
-
return
|
484
493
|
|
485
494
|
def set_context(self, value: str) -> None:
|
486
495
|
# If a value is provided update the context variable and save in stack
|
@@ -502,22 +511,9 @@ class JSONParser:
|
|
502
511
|
def log(self, text: str, level: str) -> None:
|
503
512
|
if level == self.logger["log_level"]:
|
504
513
|
context = ""
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
self.json_fd.seek(self.index)
|
509
|
-
else:
|
510
|
-
start = (
|
511
|
-
self.index - self.logger["window"]
|
512
|
-
if (self.index - self.logger["window"]) >= 0
|
513
|
-
else 0
|
514
|
-
)
|
515
|
-
end = (
|
516
|
-
self.index + self.logger["window"]
|
517
|
-
if (self.index + self.logger["window"]) <= len(self.json_str)
|
518
|
-
else len(self.json_str)
|
519
|
-
)
|
520
|
-
context = self.json_str[start:end]
|
514
|
+
start = max(self.index - self.logger["window"], 0)
|
515
|
+
end = min(self.index + self.logger["window"], len(self.json_str))
|
516
|
+
context = self.json_str[start:end]
|
521
517
|
self.logger["log"].append(
|
522
518
|
{
|
523
519
|
"text": text,
|
@@ -593,3 +589,31 @@ def from_file(
|
|
593
589
|
fd.close()
|
594
590
|
|
595
591
|
return jsonobj
|
592
|
+
|
593
|
+
|
594
|
+
class StringFileWrapper:
|
595
|
+
# This is a trick to simplify the code above, transform the filedescriptor handling into an array handling
|
596
|
+
def __init__(self, fd: TextIO) -> None:
|
597
|
+
self.fd = fd
|
598
|
+
self.length = None
|
599
|
+
|
600
|
+
def __getitem__(self, index: int) -> Any:
|
601
|
+
if isinstance(index, slice):
|
602
|
+
self.fd.seek(index.start)
|
603
|
+
value = self.fd.read(index.stop - index.start)
|
604
|
+
self.fd.seek(index.start)
|
605
|
+
return value
|
606
|
+
else:
|
607
|
+
self.fd.seek(index)
|
608
|
+
return self.fd.read(1)
|
609
|
+
|
610
|
+
def __len__(self) -> int:
|
611
|
+
if not self.length:
|
612
|
+
current_position = self.fd.tell()
|
613
|
+
self.fd.seek(0, os.SEEK_END)
|
614
|
+
self.length = self.fd.tell()
|
615
|
+
self.fd.seek(current_position)
|
616
|
+
return self.length
|
617
|
+
|
618
|
+
def __setitem__(self):
|
619
|
+
raise Exception("This is read-only!")
|
@@ -0,0 +1,7 @@
|
|
1
|
+
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
+
json_repair/json_repair.py,sha256=zYg4tIwZ4rdVkCQ5XVceNQaOz2MT50O7jHJbJ1EpKhk,25446
|
3
|
+
json_repair-0.20.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
+
json_repair-0.20.0.dist-info/METADATA,sha256=2WrsjORPx37e4CqdkdiARwB-VoP5EyjsZaS0hcVnVBo,7333
|
5
|
+
json_repair-0.20.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
+
json_repair-0.20.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
+
json_repair-0.20.0.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
|
2
|
-
json_repair/json_repair.py,sha256=5viwB7G8T9EyYRsJpCxZPZChmgDkaT1WQ7oxFUjm5Lg,24349
|
3
|
-
json_repair-0.19.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
|
4
|
-
json_repair-0.19.1.dist-info/METADATA,sha256=FlAtvarNR4sXKlNYr7VDedyScl7t8UgqyDyNZ2NIS10,7333
|
5
|
-
json_repair-0.19.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
-
json_repair-0.19.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
|
7
|
-
json_repair-0.19.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|