json-repair 0.29.2__py3-none-any.whl → 0.29.4__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,98 @@
1
+ import os
2
+ from typing import TextIO, Union
3
+
4
+
5
+ class StringFileWrapper:
6
+ # This is a trick to simplify the code, transform the filedescriptor handling into a string handling
7
+ def __init__(self, fd: TextIO, CHUNK_LENGTH: int) -> None:
8
+ """
9
+ Initialize the StringFileWrapper with a file descriptor and chunk length.
10
+
11
+ Args:
12
+ fd (TextIO): The file descriptor to wrap.
13
+ CHUNK_LENGTH (int): The length of each chunk to read from the file.
14
+
15
+ Attributes:
16
+ fd (TextIO): The wrapped file descriptor.
17
+ length (int): The total length of the file content.
18
+ buffers (dict[int, str]): Dictionary to store chunks of file content.
19
+ buffer_length (int): The length of each buffer chunk.
20
+ """
21
+ self.fd = fd
22
+ self.length: int = 0
23
+ # Buffers are 1MB strings that are read from the file
24
+ # and kept in memory to keep reads low
25
+ self.buffers: dict[int, str] = {}
26
+ # CHUNK_LENGTH is in bytes
27
+ if not CHUNK_LENGTH or CHUNK_LENGTH < 2:
28
+ CHUNK_LENGTH = 1_000_000
29
+ self.buffer_length = CHUNK_LENGTH
30
+
31
+ def get_buffer(self, index: int) -> str:
32
+ """
33
+ Retrieve or load a buffer chunk from the file.
34
+
35
+ Args:
36
+ index (int): The index of the buffer chunk to retrieve.
37
+
38
+ Returns:
39
+ str: The buffer chunk at the specified index.
40
+ """
41
+ if self.buffers.get(index) is None:
42
+ self.fd.seek(index * self.buffer_length)
43
+ self.buffers[index] = self.fd.read(self.buffer_length)
44
+ # Save memory by keeping max 2MB buffer chunks and min 2 chunks
45
+ if len(self.buffers) > max(2, 2_000_000 / self.buffer_length):
46
+ oldest_key = next(iter(self.buffers))
47
+ if oldest_key != index:
48
+ self.buffers.pop(oldest_key)
49
+ return self.buffers[index]
50
+
51
+ def __getitem__(self, index: Union[int, slice]) -> str:
52
+ """
53
+ Retrieve a character or a slice of characters from the file.
54
+
55
+ Args:
56
+ index (Union[int, slice]): The index or slice of characters to retrieve.
57
+
58
+ Returns:
59
+ str: The character(s) at the specified index or slice.
60
+ """
61
+ # The buffer is an array that is seek like a RAM:
62
+ # self.buffers[index]: the row in the array of length 1MB, index is `i` modulo CHUNK_LENGTH
63
+ # self.buffures[index][j]: the column of the row that is `i` remainder CHUNK_LENGTH
64
+ if isinstance(index, slice):
65
+ buffer_index = index.start // self.buffer_length
66
+ buffer_end = index.stop // self.buffer_length
67
+ if buffer_index == buffer_end:
68
+ return self.get_buffer(buffer_index)[
69
+ index.start % self.buffer_length : index.stop % self.buffer_length
70
+ ]
71
+ else:
72
+ start_slice = self.get_buffer(buffer_index)[
73
+ index.start % self.buffer_length :
74
+ ]
75
+ end_slice = self.get_buffer(buffer_end)[
76
+ : index.stop % self.buffer_length
77
+ ]
78
+ middle_slices = [
79
+ self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
80
+ ]
81
+ return start_slice + "".join(middle_slices) + end_slice
82
+ else:
83
+ buffer_index = index // self.buffer_length
84
+ return self.get_buffer(buffer_index)[index % self.buffer_length]
85
+
86
+ def __len__(self) -> int:
87
+ """
88
+ Get the total length of the file.
89
+
90
+ Returns:
91
+ int: The total number of characters in the file.
92
+ """
93
+ if self.length < 1:
94
+ current_position = self.fd.tell()
95
+ self.fd.seek(0, os.SEEK_END)
96
+ self.length = self.fd.tell()
97
+ self.fd.seek(current_position)
98
+ return self.length
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.29.2
3
+ Version: 0.29.4
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -45,21 +45,6 @@ This simple package can be used to fix an invalid json string. To know all cases
45
45
 
46
46
  Inspired by https://github.com/josdejong/jsonrepair
47
47
 
48
- ---
49
- # How to cite
50
- If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
51
-
52
- @software{Baccianella_JSON_Repair_-_2024,
53
- author = {Baccianella, Stefano},
54
- month = aug,
55
- title = {{JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs}},
56
- url = {https://github.com/mangiucugna/json_repair},
57
- version = {0.28.3},
58
- year = {2024}
59
- }
60
-
61
- Thank you for citing my work and please send me a link to the paper if you can!
62
-
63
48
  ---
64
49
  # Offer me a beer
65
50
  If you find this library useful, you can help me by donating toward my monthly beer budget here: https://github.com/sponsors/mangiucugna
@@ -79,7 +64,30 @@ I searched for a lightweight python package that was able to reliably fix this p
79
64
 
80
65
  *So I wrote one*
81
66
 
67
+ # Supported use cases
68
+
69
+ ### Fixing Syntax Errors in JSON
70
+
71
+ - Missing quotes, misplaced commas, unescaped characters, and incomplete key-value pairs.
72
+ - Missing quotation marks, improperly formatted values (true, false, null), and repairs corrupted key-value structures.
73
+
74
+ ### Repairing Malformed JSON Arrays and Objects
75
+
76
+ - Incomplete or broken arrays/objects by adding necessary elements (e.g., commas, brackets) or default values (null, "").
77
+ - The library can process JSON that includes extra non-JSON characters like comments or improperly placed characters, cleaning them up while maintaining valid structure.
78
+
79
+ ### Auto-Completion for Missing JSON Values
80
+
81
+ - Automatically completes missing values in JSON fields with reasonable defaults (like empty strings or null), ensuring validity.
82
+
82
83
  # How to use
84
+
85
+ Install the library with pip
86
+
87
+ pip install json-repair
88
+
89
+ then you can use use it in your code like this
90
+
83
91
  from json_repair import repair_json
84
92
 
85
93
  good_json_string = repair_json(bad_json_string)
@@ -185,6 +193,23 @@ To ensure that you only pin the major version of this library in your `requireme
185
193
 
186
194
  In this example, any version that starts with `0.` will be acceptable, allowing for updates on minor and patch versions.
187
195
 
196
+ ---
197
+ # How to cite
198
+ If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
199
+
200
+ @software{Baccianella_JSON_Repair_-_2024,
201
+ author = {Baccianella, Stefano},
202
+ month = aug,
203
+ title = {{JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs}},
204
+ url = {https://github.com/mangiucugna/json_repair},
205
+ version = {0.28.3},
206
+ year = {2024}
207
+ }
208
+
209
+ Thank you for citing my work and please send me a link to the paper if you can!
210
+
211
+ ---
212
+
188
213
  # How it works
189
214
  This module will parse the JSON file following the BNF definition:
190
215
 
@@ -0,0 +1,13 @@
1
+ json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
+ json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
+ json_repair/json_context.py,sha256=DdJu3DJR-ANvr8KrWfJqdtOE3uI6_B0VQidKvE3PjJA,1080
4
+ json_repair/json_parser.py,sha256=BUPyAsb7wzkjNrBmsZgxgoOM9JhksCN-8cHcbJQpcPU,25525
5
+ json_repair/json_repair.py,sha256=GTg3OAXRbAJAHWs8oiQDqUHh4h6qKDVvWPXcrqafzLY,6100
6
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ json_repair/string_file_wrapper.py,sha256=EHLhNBWoyUitzT08thytYJiNZh_klEFwfT8zutPSdb4,3905
8
+ json_repair-0.29.4.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
+ json_repair-0.29.4.dist-info/METADATA,sha256=dBmPfg4wBTxOFXklH4V38aiO4pUks5FS7HcvQlZ4NIg,10686
10
+ json_repair-0.29.4.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
11
+ json_repair-0.29.4.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
+ json_repair-0.29.4.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
+ json_repair-0.29.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.1.2)
2
+ Generator: setuptools (75.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
- json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
- json_repair/json_repair.py,sha256=anGQI5RxauBnZUO9QKoPU7JgN_sUaIddyiR4ecpMmm8,34060
4
- json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- json_repair-0.29.2.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
6
- json_repair-0.29.2.dist-info/METADATA,sha256=Jtwl047L79Xj0CmA363Xc2EemzttgMWqYW0abi4a7fA,9787
7
- json_repair-0.29.2.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
8
- json_repair-0.29.2.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
9
- json_repair-0.29.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
10
- json_repair-0.29.2.dist-info/RECORD,,