json-repair 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -24,11 +24,9 @@ class JsonContext:
24
24
  Returns:
25
25
  None
26
26
  """
27
- # If a value is provided update the context variable and save in stack
28
- if value:
29
- self.context.append(value)
30
- self.current = value
31
- self.empty = False
27
+ self.context.append(value)
28
+ self.current = value
29
+ self.empty = False
32
30
 
33
31
  def reset(self) -> None:
34
32
  """
@@ -314,10 +314,19 @@ class JSONParser:
314
314
  if next_c:
315
315
  i += 1
316
316
  # found a delimiter, now we need to check that is followed strictly by a comma or brace
317
+ # or the string ended
317
318
  i = self.skip_whitespaces_at(idx=i, move_main_index=False)
318
319
  next_c = self.get_char_at(i)
319
- if next_c and next_c in [",", "}"]:
320
+ if not next_c or next_c in [",", "}"]:
320
321
  rstring_delimiter_missing = False
322
+ else:
323
+ # OK but this could still be some garbage at the end of the string
324
+ # So we need to check if we find a new lstring_delimiter afterwards
325
+ # If we do, this is a missing delimiter
326
+ i = self.skip_to_character(character=lstring_delimiter, idx=i)
327
+ next_c = self.get_char_at(i)
328
+ if not next_c:
329
+ rstring_delimiter_missing = False
321
330
  else:
322
331
  # skip any whitespace first
323
332
  i = self.skip_whitespaces_at(idx=1, move_main_index=False)
@@ -41,10 +41,18 @@ def repair_json(
41
41
  ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
42
42
  """
43
43
  Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
44
- It will return the fixed string by default.
45
- When `return_objects=True` is passed, it will return the decoded data structure instead.
46
- When `skip_json_loads=True` is passed, it will not call the built-in json.loads() function
47
- When `logging=True` is passed, it will return a tuple with the repaired json and a log of all repair actions
44
+
45
+ Args:
46
+ json_str (str, optional): The JSON string to repair. Defaults to an empty string.
47
+ return_objects (bool, optional): If True, return the decoded data structure. Defaults to False.
48
+ skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
49
+ logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
50
+ json_fd (Optional[TextIO], optional): File descriptor for JSON input. Do not use! Use `from_file` or `load` instead. Defaults to None.
51
+ ensure_ascii (bool, optional): Set to False to avoid converting non-latin characters to ascii (for example when using chinese characters). Defaults to True. Ignored if `skip_json_loads` is True.
52
+ chunk_length (int, optional): Size in bytes of the file chunks to read at once. Ignored if `json_fd` is None. Do not use! Use `from_file` or `load` instead. Defaults to 1MB.
53
+
54
+ Returns:
55
+ Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON or a tuple with the repaired JSON and repair log.
48
56
  """
49
57
  parser = JSONParser(json_str, json_fd, logging, chunk_length)
50
58
  if skip_json_loads:
@@ -72,6 +80,14 @@ def loads(
72
80
  """
73
81
  This function works like `json.loads()` except that it will fix your JSON in the process.
74
82
  It is a wrapper around the `repair_json()` function with `return_objects=True`.
83
+
84
+ Args:
85
+ json_str (str): The JSON string to load and repair.
86
+ skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
87
+ logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
88
+
89
+ Returns:
90
+ Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
75
91
  """
76
92
  return repair_json(
77
93
  json_str=json_str,
@@ -90,6 +106,15 @@ def load(
90
106
  """
91
107
  This function works like `json.load()` except that it will fix your JSON in the process.
92
108
  It is a wrapper around the `repair_json()` function with `json_fd=fd` and `return_objects=True`.
109
+
110
+ Args:
111
+ fd (TextIO): File descriptor for JSON input.
112
+ skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
113
+ logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
114
+ chunk_length (int, optional): Size in bytes of the file chunks to read at once. Defaults to 1MB.
115
+
116
+ Returns:
117
+ Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
93
118
  """
94
119
  return repair_json(
95
120
  json_fd=fd,
@@ -108,20 +133,48 @@ def from_file(
108
133
  ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
109
134
  """
110
135
  This function is a wrapper around `load()` so you can pass the filename as string
136
+
137
+ Args:
138
+ filename (str): The name of the file containing JSON data to load and repair.
139
+ skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
140
+ logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
141
+ chunk_length (int, optional): Size in bytes of the file chunks to read at once. Defaults to 1MB.
142
+
143
+ Returns:
144
+ Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
111
145
  """
112
- fd = open(filename)
113
- jsonobj = load(
114
- fd=fd,
115
- skip_json_loads=skip_json_loads,
116
- logging=logging,
117
- chunk_length=chunk_length,
118
- )
119
- fd.close()
146
+ with open(filename) as fd:
147
+ jsonobj = load(
148
+ fd=fd,
149
+ skip_json_loads=skip_json_loads,
150
+ logging=logging,
151
+ chunk_length=chunk_length,
152
+ )
120
153
 
121
154
  return jsonobj
122
155
 
123
156
 
124
157
  def cli(inline_args: Optional[List[str]] = None) -> int:
158
+ """
159
+ Command-line interface for repairing and parsing JSON files.
160
+
161
+ Args:
162
+ inline_args (Optional[List[str]]): List of command-line arguments for testing purposes. Defaults to None.
163
+ - filename (str): The JSON file to repair
164
+ - -i, --inline (bool): Replace the file inline instead of returning the output to stdout.
165
+ - -o, --output TARGET (str): If specified, the output will be written to TARGET filename instead of stdout.
166
+ - --ensure_ascii (bool): Pass ensure_ascii=True to json.dumps(). Will pass False otherwise.
167
+ - --indent INDENT (int): Number of spaces for indentation (Default 2).
168
+
169
+ Returns:
170
+ int: Exit code of the CLI operation.
171
+
172
+ Raises:
173
+ Exception: Any exception that occurs during file processing.
174
+
175
+ Example:
176
+ >>> cli(['example.json', '--indent', '4'])
177
+ """
125
178
  parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
126
179
  parser.add_argument("filename", help="The JSON file to repair")
127
180
  parser.add_argument(
@@ -167,14 +220,13 @@ def cli(inline_args: Optional[List[str]] = None) -> int:
167
220
  result = from_file(args.filename)
168
221
 
169
222
  if args.inline or args.output:
170
- fd = open(args.output or args.filename, mode="w")
171
- json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
172
- fd.close()
223
+ with open(args.output or args.filename, mode="w") as fd:
224
+ json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
173
225
  else:
174
226
  print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
175
227
  except Exception as e: # pragma: no cover
176
228
  print(f"Error: {str(e)}", file=sys.stderr)
177
- sys.exit(1)
229
+ return 1
178
230
 
179
231
  return 0 # Success
180
232
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: json_repair
3
- Version: 0.30.0
3
+ Version: 0.30.1
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -32,19 +32,23 @@ Keywords: JSON,REPAIR,LLM,PARSER
32
32
  Classifier: Programming Language :: Python :: 3
33
33
  Classifier: License :: OSI Approved :: MIT License
34
34
  Classifier: Operating System :: OS Independent
35
- Requires-Python: >=3.8
35
+ Requires-Python: >=3.9
36
36
  Description-Content-Type: text/markdown
37
37
  License-File: LICENSE
38
38
 
39
39
  [![PyPI](https://img.shields.io/pypi/v/json-repair)](https://pypi.org/project/json-repair/)
40
- ![Python version](https://img.shields.io/badge/python-3.8+-important)
40
+ ![Python version](https://img.shields.io/badge/python-3.9+-important)
41
41
  [![PyPI downloads](https://img.shields.io/pypi/dm/json-repair)](https://pypi.org/project/json-repair/)
42
42
  [![Github Sponsors](https://img.shields.io/github/sponsors/mangiucugna)](https://github.com/sponsors/mangiucugna)
43
+ [![GitHub Repo stars](https://img.shields.io/github/stars/mangiucugna/json_repair?style=flat)](https://github.com/mangiucugna/json_repair/stargazers)
44
+
43
45
 
44
46
  This simple package can be used to fix an invalid json string. To know all cases in which this package will work, check out the unit test.
45
47
 
46
48
  Inspired by https://github.com/josdejong/jsonrepair
47
49
 
50
+ ![banner](banner.png)
51
+
48
52
  ---
49
53
  # Offer me a beer
50
54
  If you find this library useful, you can help me by donating toward my monthly beer budget here: https://github.com/sponsors/mangiucugna
@@ -54,6 +58,8 @@ If you find this library useful, you can help me by donating toward my monthly b
54
58
  # Demo
55
59
  If you are unsure if this library will fix your specific problem, or simply want your json validated online, you can visit the demo site on GitHub pages: https://mangiucugna.github.io/json_repair/
56
60
 
61
+ Or hear an [audio deepdive generate by Google's NotebookLM](https://notebooklm.google.com/notebook/05312bb3-f6f3-4e49-a99b-bd51db64520b/audio) for an introduction to the module
62
+
57
63
  ---
58
64
 
59
65
  # Motivation
@@ -64,6 +70,11 @@ I searched for a lightweight python package that was able to reliably fix this p
64
70
 
65
71
  *So I wrote one*
66
72
 
73
+ ### Wouldn't GPT-4o Structured Output make this library outdated?
74
+
75
+ As part of my job we use OpenAI APIs and we noticed that even with structured output sometimes the result isn't a fully valid json.
76
+ So we still use this library to cover those outliers.
77
+
67
78
  # Supported use cases
68
79
 
69
80
  ### Fixing Syntax Errors in JSON
@@ -144,6 +155,26 @@ and another method to read from a file:
144
155
 
145
156
  Keep in mind that the library will not catch any IO-related exception and those will need to be managed by you
146
157
 
158
+ ### Non-Latin characters
159
+
160
+ When working with non-Latin characters (such as Chinese, Japanese, or Korean), you need to pass `ensure_ascii=False` to `repair_json()` in order to preserve the non-Latin characters in the output.
161
+
162
+ Here's an example using Chinese characters:
163
+
164
+ repair_json("{'test_chinese_ascii':'统一码'}")
165
+
166
+ will return
167
+
168
+ {"test_chinese_ascii": "\u7edf\u4e00\u7801"}
169
+
170
+ Instead passing `ensure_ascii=False`:
171
+
172
+ repair_json("{'test_chinese_ascii':'统一码'}", ensure_ascii=False)
173
+
174
+ will return
175
+
176
+ {"test_chinese_ascii": "统一码"}
177
+
147
178
  ### Performance considerations
148
179
  If you find this library too slow because is using `json.loads()` you can skip that by passing `skip_json_loads=True` to `repair_json`. Like:
149
180
 
@@ -226,7 +257,7 @@ This module will parse the JSON file following the BNF definition:
226
257
  <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
227
258
  <member> ::= <string> ': ' <json> ; A pair consisting of a name, and a JSON value
228
259
 
229
- If something is wrong (a missing parantheses or quotes for example) it will use a few simple heuristics to fix the JSON string:
260
+ If something is wrong (a missing parentheses or quotes for example) it will use a few simple heuristics to fix the JSON string:
230
261
  - Add the missing parentheses if the parser believes that the array or object should be closed
231
262
  - Quote strings or add missing single quotes
232
263
  - Adjust whitespaces and remove line breaks
@@ -0,0 +1,13 @@
1
+ json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
+ json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
+ json_repair/json_context.py,sha256=mm6dOyrPJ1sDskTORZSXCW7W9-5veMlUKqXQ3Hw3EG4,971
4
+ json_repair/json_parser.py,sha256=yZ3UHy5xVTJolhtuthiKEkQGhJg0186GeynIWieoejU,28086
5
+ json_repair/json_repair.py,sha256=LINLSJBs3cJMfs1YRDaIpfWR5PJLs87Oe06G5yQjY18,9729
6
+ json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ json_repair/string_file_wrapper.py,sha256=EHLhNBWoyUitzT08thytYJiNZh_klEFwfT8zutPSdb4,3905
8
+ json_repair-0.30.1.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
+ json_repair-0.30.1.dist-info/METADATA,sha256=Tijwlxi1zqpsWwhTmUYiPmeaI0YdOgQ4S2yqcmh9BDk,11847
10
+ json_repair-0.30.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
11
+ json_repair-0.30.1.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
+ json_repair-0.30.1.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
+ json_repair-0.30.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,13 +0,0 @@
1
- json_repair/__init__.py,sha256=c4L2kZrHvWEKfj_ODU2naliNuvU6FlFVxtF0hbLe6s8,178
2
- json_repair/__main__.py,sha256=EsJb-y89uZEvGQQg1GdIDWzfDwfOMvVekKEtdguQXCM,67
3
- json_repair/json_context.py,sha256=DdJu3DJR-ANvr8KrWfJqdtOE3uI6_B0VQidKvE3PjJA,1080
4
- json_repair/json_parser.py,sha256=UQgXtXTRo0oLb4N7GhPAELWtS0E9zGSPBXfnnGOCgfo,27527
5
- json_repair/json_repair.py,sha256=Er6klw5GgmdnLmNM9GXD9gfTi8Mn9cvvTUiVITFA-1E,6101
6
- json_repair/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- json_repair/string_file_wrapper.py,sha256=EHLhNBWoyUitzT08thytYJiNZh_klEFwfT8zutPSdb4,3905
8
- json_repair-0.30.0.dist-info/LICENSE,sha256=wrjQo8MhNrNCicXtMe3MHmS-fx8AmQk1ue8AQwiiFV8,1076
9
- json_repair-0.30.0.dist-info/METADATA,sha256=72pBek4v2f-1zqIwWXjaPcG8reqdX9zpy1dkjT9yspI,10686
10
- json_repair-0.30.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
11
- json_repair-0.30.0.dist-info/entry_points.txt,sha256=SNfge3zPSP-ASqriYU9r3NAPaXdseYr7ciPMKdV2uSw,57
12
- json_repair-0.30.0.dist-info/top_level.txt,sha256=7-VZwZN2CgB_n0NlSLk-rEUFh8ug21lESbsblOYuZqw,12
13
- json_repair-0.30.0.dist-info/RECORD,,