json-repair 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +7 -7
- data/CHANGELOG.md +0 -0
- data/README.md +10 -3
- data/Steepfile +6 -0
- data/lib/json/repair/string_utils.rb +45 -20
- data/lib/json/repair/version.rb +1 -1
- data/lib/json/repair.rb +4 -6
- data/lib/json/repairer.rb +766 -0
- data/sig/json/repair/string_utils.rbs +165 -0
- data/sig/json/repair.rbs +6 -3
- data/sig/json/repairer.rbs +103 -0
- metadata +7 -7
- data/lib/json/repair/repairer.rb +0 -647
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: db2b6fb7849a2e75329405c1f85fa7de836b0fa2f079623032571f42d359514d
|
|
4
|
+
data.tar.gz: 1c845714c4c443bad3c9277a2ceae6cef8ff346125f52f89473aaa50b9ff2132
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 53929154af31033e2f380ed89979430f4339c97c94c088b6f85da27ac251d658b98840e44085c4ba9b4972bab75c1bb0f8ad750beddd4bb79e439efb135e0386
|
|
7
|
+
data.tar.gz: b4b5150aee81c518eaee8847bb2f5d8d8131a15719bb93badce465a2d447ddc361888155b2d33125fdd69d2568424c08440772c61a7f7f5b35922a4d1270adf8
|
data/.rubocop.yml
CHANGED
|
@@ -10,32 +10,32 @@ Style/Documentation:
|
|
|
10
10
|
|
|
11
11
|
Metrics/ClassLength:
|
|
12
12
|
Exclude:
|
|
13
|
-
- lib/json/
|
|
13
|
+
- lib/json/repairer.rb
|
|
14
14
|
|
|
15
15
|
Metrics/AbcSize:
|
|
16
16
|
Exclude:
|
|
17
|
-
- lib/json/
|
|
17
|
+
- lib/json/repairer.rb
|
|
18
18
|
|
|
19
19
|
Metrics/MethodLength:
|
|
20
20
|
Exclude:
|
|
21
|
-
- lib/json/
|
|
21
|
+
- lib/json/repairer.rb
|
|
22
22
|
|
|
23
23
|
Metrics/CyclomaticComplexity:
|
|
24
24
|
Exclude:
|
|
25
|
-
- lib/json/
|
|
25
|
+
- lib/json/repairer.rb
|
|
26
26
|
|
|
27
27
|
Metrics/PerceivedComplexity:
|
|
28
28
|
Exclude:
|
|
29
|
-
- lib/json/
|
|
29
|
+
- lib/json/repairer.rb
|
|
30
30
|
|
|
31
31
|
Metrics/BlockLength:
|
|
32
32
|
Exclude:
|
|
33
|
-
- lib/json/
|
|
33
|
+
- lib/json/repairer.rb
|
|
34
34
|
- spec/**/*
|
|
35
35
|
|
|
36
36
|
Metrics/BlockNesting:
|
|
37
37
|
Exclude:
|
|
38
|
-
- lib/json/
|
|
38
|
+
- lib/json/repairer.rb
|
|
39
39
|
|
|
40
40
|
Metrics/ModuleLength:
|
|
41
41
|
Exclude:
|
data/CHANGELOG.md
CHANGED
|
Binary file
|
data/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
# JSON::Repair
|
|
1
|
+
# JSON::Repair [](https://badge.fury.io/rb/json-repair) [](https://github.com/sashazykov/json-repair-rb/actions) [](https://stand-with-ukraine.pp.ua)
|
|
2
2
|
|
|
3
|
-
This is a Ruby gem designed to repair broken JSON strings. Inspired by the [jsonrepair js library](https://github.com/josdejong/jsonrepair/). It efficiently handles and corrects malformed JSON data, making it especially useful in scenarios where JSON output from LLMs might not strictly adhere to JSON standards. Whether it's missing quotes, misplaced commas, or unexpected characters, it ensures that the JSON data is valid and can be parsed correctly.
|
|
3
|
+
This is a Ruby gem designed to repair broken JSON strings. Inspired by and based on the [jsonrepair js library](https://github.com/josdejong/jsonrepair/). It efficiently handles and corrects malformed JSON data, making it especially useful in scenarios where JSON output from LLMs might not strictly adhere to JSON standards. Whether it's missing quotes, misplaced commas, or unexpected characters, it ensures that the JSON data is valid and can be parsed correctly.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -25,7 +25,7 @@ require 'json/repair'
|
|
|
25
25
|
|
|
26
26
|
# Example of repairing a JSON string
|
|
27
27
|
broken_json = '{name: Alice, "age": 25,}'
|
|
28
|
-
repaired_json = JSON
|
|
28
|
+
repaired_json = JSON.repair(broken_json)
|
|
29
29
|
puts repaired_json # Outputs: {"name": "Alice", "age": 25}
|
|
30
30
|
```
|
|
31
31
|
|
|
@@ -48,3 +48,10 @@ The gem is available as open source under the terms of the [ISC License](https:/
|
|
|
48
48
|
## Code of Conduct
|
|
49
49
|
|
|
50
50
|
Everyone interacting in the JSON::Repair project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [code of conduct](https://github.com/sashazykov/json-repair-rb/blob/main/CODE_OF_CONDUCT.md).
|
|
51
|
+
|
|
52
|
+
## Similar libraries in other languages
|
|
53
|
+
|
|
54
|
+
- Typescript: https://github.com/josdejong/jsonrepair
|
|
55
|
+
- Go: https://github.com/RealAlexandreAI/json-repair
|
|
56
|
+
- JavaScript: https://github.com/RyanMarcus/dirty-json
|
|
57
|
+
- Python: https://github.com/mangiucugna/json_repair
|
data/Steepfile
ADDED
|
@@ -35,21 +35,28 @@ module JSON
|
|
|
35
35
|
LOWERCASE_E = 'e' # 0x65
|
|
36
36
|
UPPERCASE_F = 'F' # 0x46
|
|
37
37
|
LOWERCASE_F = 'f' # 0x66
|
|
38
|
-
NON_BREAKING_SPACE =
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
38
|
+
NON_BREAKING_SPACE = ' ' # 0xa0
|
|
39
|
+
MONGOLIAN_VOWEL_SEPARATOR = '' # 0x180e
|
|
40
|
+
EN_QUAD = ' ' # 0x2000
|
|
41
|
+
ZERO_WIDTH_SPACE = '' # 0x200b
|
|
42
|
+
NARROW_NO_BREAK_SPACE = ' ' # 0x202f
|
|
43
|
+
MEDIUM_MATHEMATICAL_SPACE = ' ' # 0x205f
|
|
44
|
+
IDEOGRAPHIC_SPACE = ' ' # 0x3000
|
|
45
|
+
ZERO_WIDTH_NO_BREAK_SPACE = '' # 0xfeff
|
|
46
|
+
DOUBLE_QUOTE_LEFT = '“' # 0x201c
|
|
47
|
+
DOUBLE_QUOTE_RIGHT = '”' # 0x201d
|
|
48
|
+
QUOTE_LEFT = '‘' # 0x2018
|
|
49
|
+
QUOTE_RIGHT = '’' # 0x2019
|
|
48
50
|
GRAVE_ACCENT = '`' # 0x0060
|
|
49
|
-
ACUTE_ACCENT =
|
|
51
|
+
ACUTE_ACCENT = '´' # 0x00b4
|
|
50
52
|
|
|
51
53
|
REGEX_DELIMITER = %r{^[,:\[\]/{}()\n+]+$}
|
|
54
|
+
REGEX_UNQUOTED_STRING_DELIMITER = %r{^[,\[\]/{}\n+]+$}
|
|
52
55
|
REGEX_START_OF_VALUE = /^[\[{\w-]$/
|
|
56
|
+
# matches "https://" and other schemas
|
|
57
|
+
REGEX_URL_START = %r{^(http|https|ftp|mailto|file|data|irc)://$}
|
|
58
|
+
# matches all valid URL characters EXCEPT "[", "]", and "," (important JSON delimiters)
|
|
59
|
+
REGEX_URL_CHAR = %r{^[A-Za-z0-9\-._~:/?#@!$&'()*+;=]$}
|
|
53
60
|
|
|
54
61
|
# Functions to check character chars
|
|
55
62
|
def hex?(char)
|
|
@@ -70,8 +77,19 @@ module JSON
|
|
|
70
77
|
REGEX_DELIMITER.match?(char)
|
|
71
78
|
end
|
|
72
79
|
|
|
73
|
-
def
|
|
74
|
-
|
|
80
|
+
def unquoted_string_delimiter?(char)
|
|
81
|
+
REGEX_UNQUOTED_STRING_DELIMITER.match?(char)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
REGEX_FUNCTION_NAME_CHAR_START = /\A[a-zA-Z_$]\z/
|
|
85
|
+
REGEX_FUNCTION_NAME_CHAR = /\A[a-zA-Z0-9_$]\z/
|
|
86
|
+
|
|
87
|
+
def function_name_char_start?(char)
|
|
88
|
+
!char.nil? && REGEX_FUNCTION_NAME_CHAR_START.match?(char)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def function_name_char?(char)
|
|
92
|
+
!char.nil? && REGEX_FUNCTION_NAME_CHAR.match?(char)
|
|
75
93
|
end
|
|
76
94
|
|
|
77
95
|
def start_of_value?(char)
|
|
@@ -86,11 +104,22 @@ module JSON
|
|
|
86
104
|
[SPACE, NEWLINE, TAB, RETURN].include?(char)
|
|
87
105
|
end
|
|
88
106
|
|
|
107
|
+
def whitespace_except_newline?(char)
|
|
108
|
+
[SPACE, TAB, RETURN].include?(char)
|
|
109
|
+
end
|
|
110
|
+
|
|
89
111
|
def special_whitespace?(char)
|
|
112
|
+
return false unless char
|
|
113
|
+
|
|
90
114
|
[
|
|
91
|
-
NON_BREAKING_SPACE,
|
|
115
|
+
NON_BREAKING_SPACE,
|
|
116
|
+
MONGOLIAN_VOWEL_SEPARATOR,
|
|
117
|
+
NARROW_NO_BREAK_SPACE,
|
|
118
|
+
MEDIUM_MATHEMATICAL_SPACE,
|
|
119
|
+
IDEOGRAPHIC_SPACE,
|
|
120
|
+
ZERO_WIDTH_NO_BREAK_SPACE
|
|
92
121
|
].include?(char) ||
|
|
93
|
-
(char >= EN_QUAD && char <=
|
|
122
|
+
(char >= EN_QUAD && char <= ZERO_WIDTH_SPACE)
|
|
94
123
|
end
|
|
95
124
|
|
|
96
125
|
def quote?(char)
|
|
@@ -149,7 +178,7 @@ module JSON
|
|
|
149
178
|
|
|
150
179
|
def parse_keyword(name, value)
|
|
151
180
|
if @json[@index, name.length] == name
|
|
152
|
-
@output
|
|
181
|
+
@output << value
|
|
153
182
|
@index += name.length
|
|
154
183
|
true
|
|
155
184
|
else
|
|
@@ -161,10 +190,6 @@ module JSON
|
|
|
161
190
|
text[0...start] + text[start + count..]
|
|
162
191
|
end
|
|
163
192
|
|
|
164
|
-
def function_name?(text)
|
|
165
|
-
/^\w+$/.match?(text)
|
|
166
|
-
end
|
|
167
|
-
|
|
168
193
|
def ends_with_comma_or_newline?(text)
|
|
169
194
|
/[,\n][ \t\r]*$/.match?(text)
|
|
170
195
|
end
|
data/lib/json/repair/version.rb
CHANGED
data/lib/json/repair.rb
CHANGED
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative 'repair/version'
|
|
4
|
-
require_relative '
|
|
4
|
+
require_relative 'repairer'
|
|
5
5
|
|
|
6
6
|
module JSON
|
|
7
|
-
|
|
8
|
-
class JSONRepairError < StandardError; end
|
|
7
|
+
class JSONRepairError < StandardError; end
|
|
9
8
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
end
|
|
9
|
+
def self.repair(json)
|
|
10
|
+
Repairer.new(json).repair
|
|
13
11
|
end
|
|
14
12
|
end
|