json-repair 0.12.3__tar.gz → 0.13.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.12.3
3
+ Version: 0.13.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -65,6 +65,33 @@ or just
65
65
 
66
66
  decoded_object = json_repair.repair_json(json_string, return_objects=True)
67
67
 
68
+ ### Read json from a file or file descriptor
69
+
70
+ JSON repair provides also a drop-in replacement for `json.load()`:
71
+
72
+ import json_repair
73
+
74
+ try:
75
+ file_descriptor = open(fname, 'rb')
76
+ except OSError:
77
+ ...
78
+
79
+ with file_descriptor:
80
+ decoded_object = json_repair.load(file_descriptor)
81
+
82
+ and another method to read from a file:
83
+
84
+ import json_repair
85
+
86
+ try:
87
+ decoded_object = json_repair.from_file(json_file)
88
+ except OSError:
89
+ ...
90
+ except IOError:
91
+ ...
92
+
93
+ Keep in mind that the library will not catch any IO-related exception and those will need to be managed by you
94
+
68
95
  ### Performance considerations
69
96
  If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
70
97
 
@@ -28,6 +28,33 @@ or just
28
28
 
29
29
  decoded_object = json_repair.repair_json(json_string, return_objects=True)
30
30
 
31
+ ### Read json from a file or file descriptor
32
+
33
+ JSON repair provides also a drop-in replacement for `json.load()`:
34
+
35
+ import json_repair
36
+
37
+ try:
38
+ file_descriptor = open(fname, 'rb')
39
+ except OSError:
40
+ ...
41
+
42
+ with file_descriptor:
43
+ decoded_object = json_repair.load(file_descriptor)
44
+
45
+ and another method to read from a file:
46
+
47
+ import json_repair
48
+
49
+ try:
50
+ decoded_object = json_repair.from_file(json_file)
51
+ except OSError:
52
+ ...
53
+ except IOError:
54
+ ...
55
+
56
+ Keep in mind that the library will not catch any IO-related exception and those will need to be managed by you
57
+
31
58
  ### Performance considerations
32
59
  If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
33
60
 
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.12.3"
6
+ version = "0.13.1"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -1,2 +1,4 @@
1
1
  from .json_repair import repair_json as repair_json
2
2
  from .json_repair import loads as loads
3
+ from .json_repair import loads as load
4
+ from .json_repair import loads as from_file
@@ -23,7 +23,7 @@ All supported use cases are in the unit tests
23
23
  """
24
24
 
25
25
  import json
26
- from typing import Any, Dict, List, Union
26
+ from typing import Any, Dict, List, Union, TextIO
27
27
 
28
28
 
29
29
  class JSONParser:
@@ -34,6 +34,7 @@ class JSONParser:
34
34
  self.index = 0
35
35
  # This is used in the object member parsing to manage the special cases of missing quotes in key or value
36
36
  self.context = ""
37
+ self.context_stack = []
37
38
 
38
39
  def parse(self) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
39
40
  return self.parse_json()
@@ -97,7 +98,7 @@ class JSONParser:
97
98
 
98
99
  # We are now searching for they string key
99
100
  # Context is used in the string parser to manage the lack of quotes
100
- self.context = "object_key"
101
+ self.update_context("object_key")
101
102
 
102
103
  self.skip_whitespaces_at()
103
104
 
@@ -119,12 +120,13 @@ class JSONParser:
119
120
  if (self.get_char_at() or "") != ":":
120
121
  self.insert_char_at(":")
121
122
  self.index += 1
122
- self.context = "object_value"
123
+ self.update_context("")
124
+ self.update_context("object_value")
123
125
  # The value can be any valid json
124
126
  value = self.parse_json()
125
127
 
126
128
  # Reset context since our job is done
127
- self.context = ""
129
+ self.update_context("")
128
130
  obj[key] = value
129
131
 
130
132
  if (self.get_char_at() or "") in [",", "'", '"']:
@@ -157,6 +159,9 @@ class JSONParser:
157
159
  while char and (char.isspace() or char == ","):
158
160
  self.index += 1
159
161
  char = self.get_char_at()
162
+ # If this is the right value of an object and we are closing the object, it means the array is over
163
+ if self.context == "object_value" and char == "}":
164
+ break
160
165
 
161
166
  # Especially at the end of an LLM generated json you might miss the last "]"
162
167
  char = self.get_char_at()
@@ -166,6 +171,7 @@ class JSONParser:
166
171
  # Remove trailing "," before adding the "]"
167
172
  self.remove_char_at()
168
173
  self.insert_char_at("]")
174
+ self.index -= 1
169
175
 
170
176
  self.index += 1
171
177
  return arr
@@ -319,6 +325,19 @@ class JSONParser:
319
325
  except IndexError:
320
326
  return
321
327
 
328
+ def update_context(self, value: str) -> None:
329
+ # If a value is provided update the context variable and save in stack
330
+ if value:
331
+ if self.context:
332
+ self.context_stack.append(self.context)
333
+ self.context = value
334
+ # Otherwise pop and update the context, or empty if the stack is empty
335
+ else:
336
+ try:
337
+ self.context = self.context_stack.pop()
338
+ except Exception:
339
+ self.context = ""
340
+
322
341
 
323
342
  def repair_json(
324
343
  json_str: str, return_objects: bool = False, skip_json_loads: bool = False
@@ -351,3 +370,17 @@ def loads(
351
370
  It is a wrapper around the `repair_json()` function with `return_objects=True`.
352
371
  """
353
372
  return repair_json(json_str, True)
373
+
374
+
375
+ def load(fp: TextIO) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
376
+ return loads(fp.read())
377
+
378
+
379
+ def from_file(
380
+ filename: str,
381
+ ) -> Union[Dict[str, Any], List[Any], str, float, int, bool, None]:
382
+ fd = open(filename)
383
+ jsonobj = load(fd)
384
+ fd.close()
385
+
386
+ return jsonobj
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.12.3
3
+ Version: 0.13.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -65,6 +65,33 @@ or just
65
65
 
66
66
  decoded_object = json_repair.repair_json(json_string, return_objects=True)
67
67
 
68
+ ### Read json from a file or file descriptor
69
+
70
+ JSON repair provides also a drop-in replacement for `json.load()`:
71
+
72
+ import json_repair
73
+
74
+ try:
75
+ file_descriptor = open(fname, 'rb')
76
+ except OSError:
77
+ ...
78
+
79
+ with file_descriptor:
80
+ decoded_object = json_repair.load(file_descriptor)
81
+
82
+ and another method to read from a file:
83
+
84
+ import json_repair
85
+
86
+ try:
87
+ decoded_object = json_repair.from_file(json_file)
88
+ except OSError:
89
+ ...
90
+ except IOError:
91
+ ...
92
+
93
+ Keep in mind that the library will not catch any IO-related exception and those will need to be managed by you
94
+
68
95
  ### Performance considerations
69
96
  If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
70
97
 
@@ -1,4 +1,4 @@
1
- from src.json_repair.json_repair import repair_json
1
+ from src.json_repair.json_repair import from_file, repair_json
2
2
 
3
3
 
4
4
  def test_repair_json():
@@ -169,6 +169,25 @@ def test_repair_json_with_objects():
169
169
 
170
170
  #Test markdown stupidities from ChatGPT
171
171
  assert repair_json('{ "content": "[LINK]("https://google.com")" }', True) == { "content": "[LINK](\"https://google.com\")"}
172
+ assert repair_json('''
173
+ {
174
+ "resourceType": "Bundle",
175
+ "id": "1",
176
+ "type": "collection",
177
+ "entry": [
178
+ {
179
+ "resource": {
180
+ "resourceType": "Patient",
181
+ "id": "1",
182
+ "name": [
183
+ {"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."},
184
+ {"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}
185
+ ]
186
+ }
187
+ }
188
+ ]
189
+ }
190
+ ''', True) == {"resourceType": "Bundle", "id": "1", "type": "collection", "entry": [{"resource": {"resourceType": "Patient", "id": "1", "name": [{"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}, {"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}]}}]}
172
191
 
173
192
 
174
193
  def test_repair_json_corner_cases_generate_by_gpt():
@@ -240,4 +259,21 @@ def test_repair_json_skip_json_loads():
240
259
  assert repair_json('{"key": true, "key2": false, "key3": null}', skip_json_loads=True) == '{"key": true, "key2": false, "key3": null}'
241
260
  assert repair_json('{"key": true, "key2": false, "key3": null}', return_objects=True, skip_json_loads=True) == {"key": True, "key2": False, "key3": None}
242
261
  assert repair_json('{"key": true, "key2": false, "key3": }', skip_json_loads=True) == '{"key": true, "key2": false, "key3": ""}'
243
- assert repair_json('{"key": true, "key2": false, "key3": }', return_objects=True, skip_json_loads=True) == {"key": True, "key2": False, "key3": ""}
262
+ assert repair_json('{"key": true, "key2": false, "key3": }', return_objects=True, skip_json_loads=True) == {"key": True, "key2": False, "key3": ""}
263
+
264
+ def test_repair_json_from_file():
265
+ import os
266
+ import tempfile
267
+
268
+ # Create a temporary file
269
+ temp_fd, temp_path = tempfile.mkstemp(suffix=".json")
270
+ try:
271
+ # Write content to the temporary file
272
+ with os.fdopen(temp_fd, 'w') as tmp:
273
+ tmp.write("{")
274
+
275
+ assert(from_file(temp_path)) == {}
276
+
277
+ finally:
278
+ # Clean up - delete the temporary file
279
+ os.remove(temp_path)
@@ -580,7 +580,7 @@ def test_true_false_correct(benchmark):
580
580
  mean_time = benchmark.stats.get("median")
581
581
 
582
582
  # Define your time threshold in seconds
583
- max_time = 23 * (1 / 10 ** 6) # 23 microsecond
583
+ max_time = 24 * (1 / 10 ** 6) # 24 microsecond
584
584
 
585
585
  # Assert that the average time is below the threshold
586
586
  assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
File without changes
File without changes