json-repair 0.37.0__tar.gz → 0.38.0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (20) hide show
  1. {json_repair-0.37.0/src/json_repair.egg-info → json_repair-0.38.0}/PKG-INFO +1 -1
  2. {json_repair-0.37.0 → json_repair-0.38.0}/pyproject.toml +1 -1
  3. {json_repair-0.37.0 → json_repair-0.38.0}/src/json_repair/json_parser.py +80 -6
  4. {json_repair-0.37.0 → json_repair-0.38.0/src/json_repair.egg-info}/PKG-INFO +1 -1
  5. {json_repair-0.37.0 → json_repair-0.38.0}/tests/test_json_repair.py +5 -16
  6. {json_repair-0.37.0 → json_repair-0.38.0}/LICENSE +0 -0
  7. {json_repair-0.37.0 → json_repair-0.38.0}/README.md +0 -0
  8. {json_repair-0.37.0 → json_repair-0.38.0}/setup.cfg +0 -0
  9. {json_repair-0.37.0 → json_repair-0.38.0}/src/json_repair/__init__.py +0 -0
  10. {json_repair-0.37.0 → json_repair-0.38.0}/src/json_repair/__main__.py +0 -0
  11. {json_repair-0.37.0 → json_repair-0.38.0}/src/json_repair/json_context.py +0 -0
  12. {json_repair-0.37.0 → json_repair-0.38.0}/src/json_repair/json_repair.py +0 -0
  13. {json_repair-0.37.0 → json_repair-0.38.0}/src/json_repair/py.typed +0 -0
  14. {json_repair-0.37.0 → json_repair-0.38.0}/src/json_repair/string_file_wrapper.py +0 -0
  15. {json_repair-0.37.0 → json_repair-0.38.0}/src/json_repair.egg-info/SOURCES.txt +0 -0
  16. {json_repair-0.37.0 → json_repair-0.38.0}/src/json_repair.egg-info/dependency_links.txt +0 -0
  17. {json_repair-0.37.0 → json_repair-0.38.0}/src/json_repair.egg-info/entry_points.txt +0 -0
  18. {json_repair-0.37.0 → json_repair-0.38.0}/src/json_repair.egg-info/top_level.txt +0 -0
  19. {json_repair-0.37.0 → json_repair-0.38.0}/tests/test_coverage.py +0 -0
  20. {json_repair-0.37.0 → json_repair-0.38.0}/tests/test_performance.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: json_repair
3
- Version: 0.37.0
3
+ Version: 0.38.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "json_repair"
6
- version = "0.37.0"
6
+ version = "0.38.0"
7
7
  license = {file = "LICENSE"}
8
8
  authors = [
9
9
  { name="Stefano Baccianella", email="4247706+mangiucugna@users.noreply.github.com" },
@@ -101,6 +101,8 @@ class JSONParser:
101
101
  char.isdigit() or char == "-" or char == "."
102
102
  ):
103
103
  return self.parse_number()
104
+ elif char in ["#", "/"]:
105
+ return self.parse_comment()
104
106
  # If everything else fails, we just ignore and move on
105
107
  else:
106
108
  self.index += 1
@@ -138,8 +140,9 @@ class JSONParser:
138
140
  # The rollback index needs to be updated here in case the key is empty
139
141
  rollback_index = self.index
140
142
  key = str(self.parse_string())
141
-
142
- if key != "" or (key == "" and self.get_char_at() == ":"):
143
+ if key == "":
144
+ self.skip_whitespaces_at()
145
+ if key != "" or (key == "" and self.get_char_at() in [":", "}"]):
143
146
  # If the string is empty but there is a object divider, we are done here
144
147
  break
145
148
  if ContextValues.ARRAY in self.context.context and key in obj:
@@ -199,11 +202,10 @@ class JSONParser:
199
202
  self.skip_whitespaces_at()
200
203
  value = self.parse_json()
201
204
 
202
- # It is possible that parse_json() returns nothing valid, so we stop
205
+ # It is possible that parse_json() returns nothing valid, so we increase by 1
203
206
  if value == "":
204
- break
205
-
206
- if value == "..." and self.get_char_at(-1) == ".":
207
+ self.index += 1
208
+ elif value == "..." and self.get_char_at(-1) == ".":
207
209
  self.log(
208
210
  "While parsing an array, found a stray '...'; ignoring it",
209
211
  )
@@ -243,6 +245,8 @@ class JSONParser:
243
245
  lstring_delimiter = rstring_delimiter = '"'
244
246
 
245
247
  char = self.get_char_at()
248
+ if char in ["#", "/"]:
249
+ return self.parse_comment()
246
250
  # A valid string can only start with a valid quote or, in our case, with a literal
247
251
  while char and char not in self.STRING_DELIMITERS and not char.isalnum():
248
252
  self.index += 1
@@ -753,6 +757,76 @@ class JSONParser:
753
757
  return self.skip_to_character(character=character, idx=idx + 1)
754
758
  return idx
755
759
 
760
+ def parse_comment(self) -> str:
761
+ """
762
+ Parse code-like comments:
763
+
764
+ - "# comment": A line comment that continues until a newline.
765
+ - "// comment": A line comment that continues until a newline.
766
+ - "/* comment */": A block comment that continues until the closing delimiter "*/".
767
+
768
+ The comment is skipped over and an empty string is returned so that comments do not interfere
769
+ with the actual JSON elements.
770
+ """
771
+ char = self.get_char_at()
772
+ termination_characters = ["\n", "\r"]
773
+ if ContextValues.ARRAY in self.context.context:
774
+ termination_characters.append("]")
775
+ if ContextValues.OBJECT_VALUE in self.context.context:
776
+ termination_characters.append("}")
777
+ if ContextValues.OBJECT_KEY in self.context.context:
778
+ termination_characters.append(":")
779
+ # Line comment starting with #
780
+ if char == "#":
781
+ comment = ""
782
+ while char and char not in termination_characters:
783
+ comment += char
784
+ self.index += 1
785
+ char = self.get_char_at()
786
+ self.log(f"Found line comment: {comment}")
787
+ return ""
788
+
789
+ # Comments starting with '/'
790
+ elif char == "/":
791
+ next_char = self.get_char_at(1)
792
+ # Handle line comment starting with //
793
+ if next_char == "/":
794
+ comment = "//"
795
+ self.index += 2 # Skip both slashes.
796
+ char = self.get_char_at()
797
+ while char and char not in termination_characters:
798
+ comment += char
799
+ self.index += 1
800
+ char = self.get_char_at()
801
+ self.log(f"Found line comment: {comment}")
802
+ return ""
803
+ # Handle block comment starting with /*
804
+ elif next_char == "*":
805
+ comment = "/*"
806
+ self.index += 2 # Skip '/*'
807
+ while True:
808
+ char = self.get_char_at()
809
+ if not char:
810
+ self.log(
811
+ "Reached end-of-string while parsing block comment; unclosed block comment."
812
+ )
813
+ break
814
+ comment += char
815
+ self.index += 1
816
+ if comment.endswith("*/"):
817
+ break
818
+ self.log(f"Found block comment: {comment}")
819
+ return ""
820
+ else:
821
+ # Not a recognized comment pattern, skip the slash.
822
+ self.index += 1
823
+ return ""
824
+
825
+ else:
826
+ # Should not be reached: if for some reason the current character does not start a comment, skip it.
827
+ self.index += 1
828
+ return ""
829
+
756
830
  def _log(self, text: str) -> None:
757
831
  window: int = 10
758
832
  start: int = max(self.index - window, 0)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: json_repair
3
- Version: 0.37.0
3
+ Version: 0.38.0
4
4
  Summary: A package to repair broken json strings
5
5
  Author-email: Stefano Baccianella <4247706+mangiucugna@users.noreply.github.com>
6
6
  License: MIT License
@@ -124,6 +124,7 @@ def test_array_edge_cases():
124
124
  assert repair_json('{"key": ["value]}') == '{"key": ["value"]}'
125
125
  assert repair_json('["lorem "ipsum" sic"]') == '["lorem \\"ipsum\\" sic"]'
126
126
  assert repair_json('{"key1": ["value1", "value2"}, "key2": ["value3", "value4"]}') == '{"key1": ["value1", "value2"], "key2": ["value3", "value4"]}'
127
+ assert repair_json('[ "value", /* comment */ "value2" ]') == '["value", "value2"]'
127
128
 
128
129
  def test_escaping():
129
130
  assert repair_json("'\"'") == '""'
@@ -158,6 +159,9 @@ def test_object_edge_cases():
158
159
  assert repair_json('{"key:"value"}') == '{"key": "value"}'
159
160
  assert repair_json('{"key:value}') == '{"key": "value"}'
160
161
  assert repair_json('[{"lorem": {"ipsum": "sic"}, """" "lorem": {"ipsum": "sic"}]') == '[{"lorem": {"ipsum": "sic"}}, {"lorem": {"ipsum": "sic"}}]'
162
+ assert repair_json('{ "key": { "key2": "value2" // comment }, "key3": "value3" }') == '{"key": {"key2": "value2"}, "key3": "value3"}'
163
+ assert repair_json('{ "key": { "key2": "value2" # comment }, "key3": "value3" }') == '{"key": {"key2": "value2"}, "key3": "value3"}'
164
+ assert repair_json('{ "key": { "key2": "value2" /* comment */ }, "key3": "value3" }') == '{"key": {"key2": "value2"}, "key3": "value3"}'
161
165
 
162
166
  def test_number_edge_cases():
163
167
  assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
@@ -312,19 +316,4 @@ def test_cli(capsys):
312
316
  finally:
313
317
  # Clean up - delete the temporary file
314
318
  os.remove(temp_path)
315
- os.remove(tempout_path)
316
-
317
- """
318
- def test_cli_inline(sample_json_file):
319
- with patch('sys.argv', ['json_repair', sample_json_file, '-i']):
320
- cli()
321
- with open(sample_json_file, 'r') as f:
322
- assert json.load(f) == {"key": "value"}
323
-
324
- def test_cli_output_file(sample_json_file, tmp_path):
325
- output_file = tmp_path / "output.json"
326
- with patch('sys.argv', ['json_repair', sample_json_file, '-o', str(output_file)]):
327
- cli()
328
- with open(output_file, 'r') as f:
329
- assert json.load(f) == {"key": "value"}
330
- """
319
+ os.remove(tempout_path)
File without changes
File without changes
File without changes