json-repair 0.29.2__py3-none-any.whl → 0.29.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,98 @@
1
+ import os
2
+ from typing import TextIO, Union
3
+
4
+
5
+ class StringFileWrapper:
6
+ # This is a trick to simplify the code, transform the filedescriptor handling into a string handling
7
+ def __init__(self, fd: TextIO, CHUNK_LENGTH: int) -> None:
8
+ """
9
+ Initialize the StringFileWrapper with a file descriptor and chunk length.
10
+
11
+ Args:
12
+ fd (TextIO): The file descriptor to wrap.
13
+ CHUNK_LENGTH (int): The length of each chunk to read from the file.
14
+
15
+ Attributes:
16
+ fd (TextIO): The wrapped file descriptor.
17
+ length (int): The total length of the file content.
18
+ buffers (dict[int, str]): Dictionary to store chunks of file content.
19
+ buffer_length (int): The length of each buffer chunk.
20
+ """
21
+ self.fd = fd
22
+ self.length: int = 0
23
+ # Buffers are 1MB strings that are read from the file
24
+ # and kept in memory to keep reads low
25
+ self.buffers: dict[int, str] = {}
26
+ # CHUNK_LENGTH is in bytes
27
+ if not CHUNK_LENGTH or CHUNK_LENGTH < 2:
28
+ CHUNK_LENGTH = 1_000_000
29
+ self.buffer_length = CHUNK_LENGTH
30
+
31
+ def get_buffer(self, index: int) -> str:
32
+ """
33
+ Retrieve or load a buffer chunk from the file.
34
+
35
+ Args:
36
+ index (int): The index of the buffer chunk to retrieve.
37
+
38
+ Returns:
39
+ str: The buffer chunk at the specified index.
40
+ """
41
+ if self.buffers.get(index) is None:
42
+ self.fd.seek(index * self.buffer_length)
43
+ self.buffers[index] = self.fd.read(self.buffer_length)
44
+ # Save memory by keeping max 2MB buffer chunks and min 2 chunks
45
+ if len(self.buffers) > max(2, 2_000_000 / self.buffer_length):
46
+ oldest_key = next(iter(self.buffers))
47
+ if oldest_key != index:
48
+ self.buffers.pop(oldest_key)
49
+ return self.buffers[index]
50
+
51
+ def __getitem__(self, index: Union[int, slice]) -> str:
52
+ """
53
+ Retrieve a character or a slice of characters from the file.
54
+
55
+ Args:
56
+ index (Union[int, slice]): The index or slice of characters to retrieve.
57
+
58
+ Returns:
59
+ str: The character(s) at the specified index or slice.
60
+ """
61
+ # The buffer is an array that is seek like a RAM:
62
+ # self.buffers[index]: the row in the array of length 1MB, index is `i` modulo CHUNK_LENGTH
63
+ # self.buffures[index][j]: the column of the row that is `i` remainder CHUNK_LENGTH
64
+ if isinstance(index, slice):
65
+ buffer_index = index.start // self.buffer_length
66
+ buffer_end = index.stop // self.buffer_length
67
+ if buffer_index == buffer_end:
68
+ return self.get_buffer(buffer_index)[
69
+ index.start % self.buffer_length : index.stop % self.buffer_length
70
+ ]
71
+ else:
72
+ start_slice = self.get_buffer(buffer_index)[
73
+ index.start % self.buffer_length :
74
+ ]
75
+ end_slice = self.get_buffer(buffer_end)[
76
+ : index.stop % self.buffer_length
77
+ ]
78
+ middle_slices = [
79
+ self.get_buffer(i) for i in range(buffer_index + 1, buffer_end)
80
+ ]
81
+ return start_slice + "".join(middle_slices) + end_slice
82
+ else:
83
+ buffer_index = index // self.buffer_length
84
+ return self.get_buffer(buffer_index)[index % self.buffer_length]
85
+
86
+ def __len__(self) -> int:
87
+ """
88
+ Get the total length of the file.
89
+
90
+ Returns:
91
+ int: The total number of characters in the file.
92
+ """
93
+ if self.length < 1:
94
+ current_position = self.fd.tell()
95
+ self.fd.seek(0, os.SEEK_END)
96
+ self.length = self.fd.tell()
97
+ self.fd.seek(current_position)
98
+ return self.length
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.29.2
3
+ Version: 0.29.4
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -45,21 +45,6 @@ This simple package can be used to fix an invalid json string. To know all cases
45
45
 
46
46
  Inspired by https://github.com/josdejong/jsonrepair
47
47
 
48
- ---
49
- # How to cite
50
- If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
51
-
52
- @software{Baccianella_JSON_Repair_-_2024,
53
- author = {Baccianella, Stefano},
54
- month = aug,
55
- title = {{JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs}},
56
- url = {https://github.com/mangiucugna/json_repair},
57
- version = {0.28.3},
58
- year = {2024}
59
- }
60
-
61
- Thank you for citing my work and please send me a link to the paper if you can!
62
-
63
48
  ---
64
49
  # Offer me a beer
65
50
  If you find this library useful, you can help me by donating toward my monthly beer budget here: https://github.com/sponsors/mangiucugna
@@ -79,7 +64,30 @@ I searched for a lightweight python package that was able to reliably fix this p
79
64
 
80
65
  *So I wrote one*
81
66
 
67
+ # Supported use cases
68
+
69
+ ### Fixing Syntax Errors in JSON
70
+
71
+ - Missing quotes, misplaced commas, unescaped characters, and incomplete key-value pairs.
72
+ - Missing quotation marks, improperly formatted values (true, false, null), and repairs corrupted key-value structures.
73
+
74
+ ### Repairing Malformed JSON Arrays and Objects
75
+
76
+ - Incomplete or broken arrays/objects by adding necessary elements (e.g., commas, brackets) or default values (null, "").
77
+ - The library can process JSON that includes extra non-JSON characters like comments or improperly placed characters, cleaning them up while maintaining valid structure.
78
+
79
+ ### Auto-Completion for Missing JSON Values
80
+
81
+ - Automatically completes missing values in JSON fields with reasonable defaults (like empty strings or null), ensuring validity.
82
+
82
83
  # How to use
84
+
85
+ Install the library with pip
86
+
87
+ pip install json-repair
88
+
89
+ then you can use use it in your code like this
90
+
83
91
  from json_repair import repair_json
84
92
 
85
93
  good_json_string = repair_json(bad_json_string)
@@ -185,6 +193,23 @@ To ensure that you only pin the major version of this library in your `requireme
185
193
 
186
194
  In this example, any version that starts with `0.` will be acceptable, allowing for updates on minor and patch versions.
187
195
 
196
+ ---
197
+ # How to cite
198
+ If you are using this library in your academic work (as I know many folks are) please find the BibTex here:
199
+
200
+ @software{Baccianella_JSON_Repair_-_2024,
201
+ author = {Baccianella, Stefano},
202
+ month = aug,
203
+ title = {{JSON Repair - A python module to repair invalid JSON, commonly used to parse the output of LLMs}},
204
+ url = {https://github.com/mangiucugna/json_repair},
205
+ version = {0.28.3},
206
+ year = {2024}
207
+ }
208
+
209
+ Thank you for citing my work and please send me a link to the paper if you can!
210
+
211
+ ---
212
+
188
213
  # How it works
189
214
  This module will parse the JSON file following the BNF definition:
190
215
 
@@ -0,0 +1,13 @@
1
+ json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
+ json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
+ json_repair/json_context.py,sha256=DdJu3DJR-ANvr8KrWfJqdtOE3uI6_B0VQidKvE3PjJA,1080
4
+ json_repair/json_parser.py,sha256=BUPyAsb7wzkjNrBmsZgxgoOM9JhksCN-8cHcbJQpcPU,25525
5
+ json_repair/json_repair.py,sha256=GTg3OAXRbAJAHWs8oiQDqUHh4h6qKDVvWPXcrqafzLY,6100
6
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ json_repair/string_file_wrapper.py,sha256=EHLhNBWoyUitzT08thytYJiNZh_klEFwfT8zutPSdb4,3905
8
+ json_repair-0.29.4.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
+ json_repair-0.29.4.dist-info/METADATA,sha256=dBmPfg4wBTxOFXklH4V38aiO4pUks5FS7HcvQlZ4NIg,10686
10
+ json_repair-0.29.4.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
11
+ json_repair-0.29.4.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
+ json_repair-0.29.4.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
+ json_repair-0.29.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.1.2)
2
+ Generator: setuptools (75.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- json_repair/__init__.py,sha256=IIzSm1DsCRrr8seF3UeMZXwxcq-tE3j-8d1WBxvEJvE,178
2
- json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
- json_repair/json_repair.py,sha256=anGQI5RxauBnZUO9QKoPU7JgN_sUaIddyiR4ecpMmm8,34060
4
- json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- json_repair-0.29.2.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
6
- json_repair-0.29.2.dist-info/METADATA,sha256=Jtwl047L79Xj0CmA363Xc2EemzttgMWqYW0abi4a7fA,9787
7
- json_repair-0.29.2.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
8
- json_repair-0.29.2.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
9
- json_repair-0.29.2.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
10
- json_repair-0.29.2.dist-info/RECORD,,