json-repair 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +0 -0
- data/CLAUDE.md +67 -0
- data/README.md +16 -1
- data/Steepfile +6 -0
- data/exe/json-repair +6 -0
- data/lib/json/repair/cli.rb +133 -0
- data/lib/json/repair/string_utils.rb +45 -20
- data/lib/json/repair/version.rb +1 -1
- data/lib/json/repairer.rb +295 -174
- data/sig/json/repair/cli.rbs +16 -0
- data/sig/json/repair/string_utils.rbs +165 -0
- data/sig/json/repair.rbs +5 -2
- data/sig/json/repairer.rbs +103 -0
- metadata +12 -7
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
module JSON
|
|
2
|
+
module Repair
|
|
3
|
+
module StringUtils
|
|
4
|
+
@output: untyped
|
|
5
|
+
|
|
6
|
+
@index: untyped
|
|
7
|
+
|
|
8
|
+
# Constants for character chars
|
|
9
|
+
BACKSLASH: "\\"
|
|
10
|
+
|
|
11
|
+
SLASH: "/"
|
|
12
|
+
|
|
13
|
+
ASTERISK: "*"
|
|
14
|
+
|
|
15
|
+
OPENING_BRACE: "{"
|
|
16
|
+
|
|
17
|
+
CLOSING_BRACE: "}"
|
|
18
|
+
|
|
19
|
+
OPENING_BRACKET: "["
|
|
20
|
+
|
|
21
|
+
CLOSING_BRACKET: "]"
|
|
22
|
+
|
|
23
|
+
OPEN_PARENTHESIS: "("
|
|
24
|
+
|
|
25
|
+
CLOSE_PARENTHESIS: ")"
|
|
26
|
+
|
|
27
|
+
SPACE: " "
|
|
28
|
+
|
|
29
|
+
NEWLINE: "\n"
|
|
30
|
+
|
|
31
|
+
TAB: "\t"
|
|
32
|
+
|
|
33
|
+
RETURN: "\r"
|
|
34
|
+
|
|
35
|
+
BACKSPACE: "\b"
|
|
36
|
+
|
|
37
|
+
FORM_FEED: "\f"
|
|
38
|
+
|
|
39
|
+
DOUBLE_QUOTE: "\""
|
|
40
|
+
|
|
41
|
+
PLUS: "+"
|
|
42
|
+
|
|
43
|
+
MINUS: "-"
|
|
44
|
+
|
|
45
|
+
QUOTE: "'"
|
|
46
|
+
|
|
47
|
+
ZERO: "0"
|
|
48
|
+
|
|
49
|
+
NINE: "9"
|
|
50
|
+
|
|
51
|
+
COMMA: ","
|
|
52
|
+
|
|
53
|
+
DOT: "."
|
|
54
|
+
|
|
55
|
+
COLON: ":"
|
|
56
|
+
|
|
57
|
+
SEMICOLON: ";"
|
|
58
|
+
|
|
59
|
+
UPPERCASE_A: "A"
|
|
60
|
+
|
|
61
|
+
LOWERCASE_A: "a"
|
|
62
|
+
|
|
63
|
+
UPPERCASE_E: "E"
|
|
64
|
+
|
|
65
|
+
LOWERCASE_E: "e"
|
|
66
|
+
|
|
67
|
+
UPPERCASE_F: "F"
|
|
68
|
+
|
|
69
|
+
LOWERCASE_F: "f"
|
|
70
|
+
|
|
71
|
+
NON_BREAKING_SPACE: ::String
|
|
72
|
+
|
|
73
|
+
MONGOLIAN_VOWEL_SEPARATOR: ::String
|
|
74
|
+
|
|
75
|
+
EN_QUAD: ::String
|
|
76
|
+
|
|
77
|
+
ZERO_WIDTH_SPACE: ::String
|
|
78
|
+
|
|
79
|
+
NARROW_NO_BREAK_SPACE: ::String
|
|
80
|
+
|
|
81
|
+
MEDIUM_MATHEMATICAL_SPACE: ::String
|
|
82
|
+
|
|
83
|
+
IDEOGRAPHIC_SPACE: ::String
|
|
84
|
+
|
|
85
|
+
ZERO_WIDTH_NO_BREAK_SPACE: ::String
|
|
86
|
+
|
|
87
|
+
DOUBLE_QUOTE_LEFT: ::String
|
|
88
|
+
|
|
89
|
+
DOUBLE_QUOTE_RIGHT: ::String
|
|
90
|
+
|
|
91
|
+
QUOTE_LEFT: ::String
|
|
92
|
+
|
|
93
|
+
QUOTE_RIGHT: ::String
|
|
94
|
+
|
|
95
|
+
GRAVE_ACCENT: "`"
|
|
96
|
+
|
|
97
|
+
ACUTE_ACCENT: ::String
|
|
98
|
+
|
|
99
|
+
REGEX_DELIMITER: ::Regexp
|
|
100
|
+
|
|
101
|
+
REGEX_UNQUOTED_STRING_DELIMITER: ::Regexp
|
|
102
|
+
|
|
103
|
+
REGEX_START_OF_VALUE: ::Regexp
|
|
104
|
+
|
|
105
|
+
REGEX_URL_START: ::Regexp
|
|
106
|
+
|
|
107
|
+
REGEX_URL_CHAR: ::Regexp
|
|
108
|
+
|
|
109
|
+
REGEX_FUNCTION_NAME_CHAR_START: ::Regexp
|
|
110
|
+
|
|
111
|
+
REGEX_FUNCTION_NAME_CHAR: ::Regexp
|
|
112
|
+
|
|
113
|
+
# Functions to check character chars
|
|
114
|
+
def hex?: (untyped char) -> untyped
|
|
115
|
+
|
|
116
|
+
def digit?: (untyped char) -> untyped
|
|
117
|
+
|
|
118
|
+
def valid_string_character?: (untyped char) -> untyped
|
|
119
|
+
|
|
120
|
+
def delimiter?: (untyped char) -> untyped
|
|
121
|
+
|
|
122
|
+
def unquoted_string_delimiter?: (untyped char) -> untyped
|
|
123
|
+
|
|
124
|
+
def function_name_char_start?: (untyped char) -> untyped
|
|
125
|
+
|
|
126
|
+
def function_name_char?: (untyped char) -> untyped
|
|
127
|
+
|
|
128
|
+
def start_of_value?: (untyped char) -> untyped
|
|
129
|
+
|
|
130
|
+
def control_character?: (untyped char) -> untyped
|
|
131
|
+
|
|
132
|
+
def whitespace?: (untyped char) -> untyped
|
|
133
|
+
|
|
134
|
+
def whitespace_except_newline?: (untyped char) -> untyped
|
|
135
|
+
|
|
136
|
+
def special_whitespace?: (untyped char) -> untyped
|
|
137
|
+
|
|
138
|
+
def quote?: (untyped char) -> untyped
|
|
139
|
+
|
|
140
|
+
def double_quote?: (untyped char) -> untyped
|
|
141
|
+
|
|
142
|
+
def single_quote?: (untyped char) -> untyped
|
|
143
|
+
|
|
144
|
+
def double_quote_like?: (untyped char) -> untyped
|
|
145
|
+
|
|
146
|
+
def single_quote_like?: (untyped char) -> untyped
|
|
147
|
+
|
|
148
|
+
# Strip last occurrence of text_to_strip from text
|
|
149
|
+
def strip_last_occurrence: (untyped text, untyped text_to_strip, ?strip_remaining_text: bool) -> untyped
|
|
150
|
+
|
|
151
|
+
def insert_before_last_whitespace: (untyped text, untyped text_to_insert) -> untyped
|
|
152
|
+
|
|
153
|
+
# Parse keywords true, false, null
|
|
154
|
+
# Repair Python keywords True, False, None
|
|
155
|
+
# Repair Ruby keyword nil
|
|
156
|
+
def parse_keywords: () -> untyped
|
|
157
|
+
|
|
158
|
+
def parse_keyword: (untyped name, untyped value) -> (true | false)
|
|
159
|
+
|
|
160
|
+
def remove_at_index: (untyped text, untyped start, untyped count) -> untyped
|
|
161
|
+
|
|
162
|
+
def ends_with_comma_or_newline?: (untyped text) -> untyped
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
data/sig/json/repair.rbs
CHANGED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
module JSON
|
|
2
|
+
class Repairer
|
|
3
|
+
@json: ::String
|
|
4
|
+
|
|
5
|
+
@index: Integer
|
|
6
|
+
|
|
7
|
+
@output: ::String
|
|
8
|
+
|
|
9
|
+
include Repair::StringUtils
|
|
10
|
+
|
|
11
|
+
CONTROL_CHARACTERS: ::Hash[::String, "\\b" | "\\f" | "\\n" | "\\r" | "\\t"]
|
|
12
|
+
|
|
13
|
+
ESCAPE_CHARACTERS: ::Hash[::String, "\"" | "\\" | "/" | "\b" | "\f" | "\n" | "\r" | "\t"]
|
|
14
|
+
|
|
15
|
+
MARKDOWN_OPEN_BLOCKS: ::Array[::String]
|
|
16
|
+
|
|
17
|
+
MARKDOWN_CLOSE_BLOCKS: ::Array[::String]
|
|
18
|
+
|
|
19
|
+
def initialize: (::String json) -> void
|
|
20
|
+
|
|
21
|
+
def repair: () -> ::String
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def parse_value: () -> untyped
|
|
26
|
+
|
|
27
|
+
def parse_whitespace: (?skip_newline: bool) -> (true | false)
|
|
28
|
+
|
|
29
|
+
def parse_comment: () -> (true | false)
|
|
30
|
+
|
|
31
|
+
# Find and skip over a Markdown fenced code block
|
|
32
|
+
def parse_markdown_code_block: (::Array[::String] blocks) -> (true | false)
|
|
33
|
+
|
|
34
|
+
def skip_markdown_code_block: (::Array[::String] blocks) -> (true | false)
|
|
35
|
+
|
|
36
|
+
# Parse an object like '{"key": "value"}'
|
|
37
|
+
def parse_object: () -> (false | true)
|
|
38
|
+
|
|
39
|
+
def skip_character: (untyped char) -> (true | false)
|
|
40
|
+
|
|
41
|
+
# Skip ellipsis like "[1,2,3,...]" or "[1,2,3,...,9]" or "[...,7,8,9]"
|
|
42
|
+
# or a similar construct in objects.
|
|
43
|
+
def skip_ellipsis: () -> untyped
|
|
44
|
+
|
|
45
|
+
# Parse a string enclosed by double quotes "...". Can contain escaped quotes
|
|
46
|
+
# Repair strings enclosed in single quotes or special quotes
|
|
47
|
+
# Repair an escaped string
|
|
48
|
+
#
|
|
49
|
+
# The function can run in two stages:
|
|
50
|
+
# - First, it assumes the string has a valid end quote
|
|
51
|
+
# - If it turns out that the string does not have a valid end quote followed
|
|
52
|
+
# by a delimiter (which should be the case), the function runs again in a
|
|
53
|
+
# more conservative way, stopping the string at the first next delimiter
|
|
54
|
+
# and fixing the string by inserting a quote there, or stopping at a
|
|
55
|
+
# stop index detected in the first iteration.
|
|
56
|
+
def parse_string: (?stop_at_delimiter: bool, ?stop_at_index: ::Integer) -> (untyped | true | false)
|
|
57
|
+
|
|
58
|
+
# Repair an unquoted string by adding quotes around it
|
|
59
|
+
# Repair a MongoDB function call like NumberLong("2")
|
|
60
|
+
# Repair a JSONP function call like callback({...});
|
|
61
|
+
def parse_unquoted_string: (bool is_key) -> (false | true)
|
|
62
|
+
|
|
63
|
+
# Parse a regular expression literal like /foo/ or /foo\/bar/
|
|
64
|
+
def parse_regex: () -> (false | true)
|
|
65
|
+
|
|
66
|
+
def parse_character: (untyped char) -> (true | false)
|
|
67
|
+
|
|
68
|
+
def parse_whitespace_and_skip_comments: (?skip_newline: bool) -> untyped
|
|
69
|
+
|
|
70
|
+
# Parse a number like 2.4 or 2.4e6
|
|
71
|
+
def parse_number: () -> (true | false)
|
|
72
|
+
|
|
73
|
+
def at_end_of_number?: () -> untyped
|
|
74
|
+
|
|
75
|
+
# Parse an array like '["item1", "item2", ...]'
|
|
76
|
+
def parse_array: () -> (true | false)
|
|
77
|
+
|
|
78
|
+
def prev_non_whitespace_index: (untyped start) -> untyped
|
|
79
|
+
|
|
80
|
+
# Repair concatenated strings like "hello" + "world", change this into "helloworld"
|
|
81
|
+
def parse_concatenated_string: () -> untyped
|
|
82
|
+
|
|
83
|
+
def repair_number_ending_with_numeric_symbol: (untyped start) -> untyped
|
|
84
|
+
|
|
85
|
+
# Parse and repair Newline Delimited JSON (NDJSON):
|
|
86
|
+
# multiple JSON objects separated by a newline character
|
|
87
|
+
def parse_newline_delimited_json: () -> untyped
|
|
88
|
+
|
|
89
|
+
def skip_escape_character: () -> untyped
|
|
90
|
+
|
|
91
|
+
def throw_invalid_character: (untyped char) -> untyped
|
|
92
|
+
|
|
93
|
+
def throw_unexpected_character: () -> untyped
|
|
94
|
+
|
|
95
|
+
def throw_unexpected_end: () -> untyped
|
|
96
|
+
|
|
97
|
+
def throw_object_key_expected: () -> untyped
|
|
98
|
+
|
|
99
|
+
def throw_colon_expected: () -> untyped
|
|
100
|
+
|
|
101
|
+
def throw_invalid_unicode_character: () -> untyped
|
|
102
|
+
end
|
|
103
|
+
end
|
metadata
CHANGED
|
@@ -1,34 +1,41 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: json-repair
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Aleksandr Zykov
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: exe
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies: []
|
|
13
12
|
description: This is a simple gem that repairs broken JSON strings.
|
|
14
13
|
email:
|
|
15
14
|
- alexandrz@gmail.com
|
|
16
|
-
executables:
|
|
15
|
+
executables:
|
|
16
|
+
- json-repair
|
|
17
17
|
extensions: []
|
|
18
18
|
extra_rdoc_files: []
|
|
19
19
|
files:
|
|
20
20
|
- ".rspec"
|
|
21
21
|
- ".rubocop.yml"
|
|
22
22
|
- CHANGELOG.md
|
|
23
|
+
- CLAUDE.md
|
|
23
24
|
- CODE_OF_CONDUCT.md
|
|
24
25
|
- LICENSE.txt
|
|
25
26
|
- README.md
|
|
26
27
|
- Rakefile
|
|
28
|
+
- Steepfile
|
|
29
|
+
- exe/json-repair
|
|
27
30
|
- lib/json/repair.rb
|
|
31
|
+
- lib/json/repair/cli.rb
|
|
28
32
|
- lib/json/repair/string_utils.rb
|
|
29
33
|
- lib/json/repair/version.rb
|
|
30
34
|
- lib/json/repairer.rb
|
|
31
35
|
- sig/json/repair.rbs
|
|
36
|
+
- sig/json/repair/cli.rbs
|
|
37
|
+
- sig/json/repair/string_utils.rbs
|
|
38
|
+
- sig/json/repairer.rbs
|
|
32
39
|
homepage: https://github.com/sashazykov/json-repair-rb
|
|
33
40
|
licenses:
|
|
34
41
|
- ISC
|
|
@@ -37,7 +44,6 @@ metadata:
|
|
|
37
44
|
homepage_uri: https://github.com/sashazykov/json-repair-rb
|
|
38
45
|
source_code_uri: https://github.com/sashazykov/json-repair-rb
|
|
39
46
|
changelog_uri: https://github.com/sashazykov/json-repair-rb/blob/main/CHANGELOG.md
|
|
40
|
-
post_install_message:
|
|
41
47
|
rdoc_options: []
|
|
42
48
|
require_paths:
|
|
43
49
|
- lib
|
|
@@ -52,8 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
52
58
|
- !ruby/object:Gem::Version
|
|
53
59
|
version: '0'
|
|
54
60
|
requirements: []
|
|
55
|
-
rubygems_version: 3.
|
|
56
|
-
signing_key:
|
|
61
|
+
rubygems_version: 3.6.9
|
|
57
62
|
specification_version: 4
|
|
58
63
|
summary: Repairs broken JSON strings.
|
|
59
64
|
test_files: []
|