json_mend 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -5
- data/benchmark_comparison.rb +144 -0
- data/lib/json_mend/parser.rb +36 -10
- data/lib/json_mend/version.rb +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7aae532930afd3eabb3485f83995b7e1ee22763c4f17f1c7940af3b270f33b60
|
|
4
|
+
data.tar.gz: 52e364f66106063b8fec6335eff7171031cb4ac2e0f92a55cedae9d4febd205f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 62803c4814d02e3850e23bb64fba0f31fbbfc5d6fa73b84dd2e6dee7bf670709f7c778506e64a65d0c170e26c54c239fb6f27063183199dc760e18906832630c
|
|
7
|
+
data.tar.gz: 7657395e2d7fed895bdc2f405ac9cb019497f7445c64e9bc3900b188124910c353d4ee3673bee90009ccc6e133d7d2aad2552971e8e1fdf12e7c3671d31d1ca7
|
data/.rubocop.yml
CHANGED
|
@@ -7,19 +7,19 @@ AllCops:
|
|
|
7
7
|
SuggestExtensions: false
|
|
8
8
|
|
|
9
9
|
Metrics/AbcSize:
|
|
10
|
-
Max:
|
|
10
|
+
Max: 65
|
|
11
11
|
|
|
12
12
|
Metrics/ClassLength:
|
|
13
|
-
Max:
|
|
13
|
+
Max: 820
|
|
14
14
|
|
|
15
15
|
Metrics/CyclomaticComplexity:
|
|
16
|
-
Max:
|
|
16
|
+
Max: 35
|
|
17
17
|
|
|
18
18
|
Metrics/MethodLength:
|
|
19
|
-
Max:
|
|
19
|
+
Max: 80
|
|
20
20
|
|
|
21
21
|
Metrics/PerceivedComplexity:
|
|
22
|
-
Max:
|
|
22
|
+
Max: 35
|
|
23
23
|
|
|
24
24
|
Metrics/BlockNesting:
|
|
25
25
|
Max: 5
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# gem install benchmark-ips json-repair json_mend
|
|
4
|
+
# ruby -Ilib benchmark_comparison.rb
|
|
5
|
+
require 'benchmark/ips'
|
|
6
|
+
require 'json'
|
|
7
|
+
|
|
8
|
+
# --- Load Libraries ---
|
|
9
|
+
begin
|
|
10
|
+
require 'json_mend'
|
|
11
|
+
rescue LoadError
|
|
12
|
+
abort "❌ Could not load 'json_mend'. Make sure you are in the gem root or have it installed."
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
begin
|
|
16
|
+
require 'json/repair'
|
|
17
|
+
rescue LoadError
|
|
18
|
+
puts "❌ Could not load 'json-repair'. Benchmarks for it will be skipped."
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
puts '========================================================='
|
|
22
|
+
puts ' 🚀 JSON Repair Benchmark (IPS) '
|
|
23
|
+
puts " JsonMend (v#{JsonMend::VERSION}) vs json-repair-rb (v#{JSON::Repair::VERSION})"
|
|
24
|
+
puts '========================================================='
|
|
25
|
+
puts
|
|
26
|
+
|
|
27
|
+
# --- Test Data ---
|
|
28
|
+
|
|
29
|
+
json_object = '{"id": 1, "name": "Test", "active": true, "tags": ["a", "b"]}'
|
|
30
|
+
|
|
31
|
+
TEST_CASES = {
|
|
32
|
+
valid_single: {
|
|
33
|
+
label: 'Valid Single JSON',
|
|
34
|
+
input: json_object
|
|
35
|
+
},
|
|
36
|
+
concatenated: {
|
|
37
|
+
label: 'Concatenated JSON (x10)',
|
|
38
|
+
input: json_object * 10
|
|
39
|
+
},
|
|
40
|
+
simple_fix: {
|
|
41
|
+
label: 'Simple Fix (Missing Quotes)',
|
|
42
|
+
input: '{name: "Alice", age: 30, city: "Wonderland"}'
|
|
43
|
+
},
|
|
44
|
+
trailing_comma: {
|
|
45
|
+
label: 'Trailing Commas',
|
|
46
|
+
input: '{"items": [1, 2, 3,], "active": true,}'
|
|
47
|
+
},
|
|
48
|
+
comments: {
|
|
49
|
+
label: 'Comments (// and #)',
|
|
50
|
+
input: <<~JSON
|
|
51
|
+
{
|
|
52
|
+
"key": "value", // This is a comment
|
|
53
|
+
"config": {
|
|
54
|
+
"timeout": 100 # Another comment
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
JSON
|
|
58
|
+
},
|
|
59
|
+
complex: {
|
|
60
|
+
label: 'Complex & Mixed Errors',
|
|
61
|
+
input: <<~JSON
|
|
62
|
+
{
|
|
63
|
+
name: "Broken",
|
|
64
|
+
"nested": [
|
|
65
|
+
{id: 1,},
|
|
66
|
+
{id: 2}
|
|
67
|
+
],
|
|
68
|
+
"dangling": [1, 2, 3
|
|
69
|
+
JSON
|
|
70
|
+
},
|
|
71
|
+
garbage: {
|
|
72
|
+
label: 'Heavy Garbage / Hallucinations',
|
|
73
|
+
input: 'Here is the JSON: ```json {"a": 1} ``` and some other text.'
|
|
74
|
+
},
|
|
75
|
+
python_style: {
|
|
76
|
+
label: 'Python Literals (True/None)',
|
|
77
|
+
input: '{"is_valid": True, "missing": None, "wrong_bool": False}'
|
|
78
|
+
},
|
|
79
|
+
single_quotes: {
|
|
80
|
+
label: 'Single Quotes (JS Style)',
|
|
81
|
+
input: "{'id': 123, 'status': 'pending', 'meta': {'active': true}}"
|
|
82
|
+
},
|
|
83
|
+
deep_nesting: {
|
|
84
|
+
label: 'Deeply Nested (Stack Test)',
|
|
85
|
+
input: "#{'{"a":' * 50}1#{'}' * 50}"
|
|
86
|
+
},
|
|
87
|
+
unbalanced: {
|
|
88
|
+
label: 'Truncated / Unbalanced',
|
|
89
|
+
input: '{"users": [{"id": 1, "name": "Alice"}, {"id": 2'
|
|
90
|
+
},
|
|
91
|
+
unescaped_control: {
|
|
92
|
+
label: 'Unescaped Newlines/Tabs',
|
|
93
|
+
input: "{\"bio\": \"This is a \n multi-line string \t with tabs.\"}"
|
|
94
|
+
},
|
|
95
|
+
large_array: {
|
|
96
|
+
label: 'Large Single Array (Throughput)',
|
|
97
|
+
input: "[#{(1..1000).map { |i| %({"id": #{i}, "val": "item_#{i}"}) }.join(',')}]"
|
|
98
|
+
},
|
|
99
|
+
concatenated_complex: {
|
|
100
|
+
label: 'Concatenated + Broken (LLM Stream)',
|
|
101
|
+
input: '{"part": 1} {part: 2, "broken": true} {"part": 3}'
|
|
102
|
+
}
|
|
103
|
+
}.freeze
|
|
104
|
+
|
|
105
|
+
# Helper to check if a library supports the input before benchmarking
|
|
106
|
+
def supported?(library_proc, input)
|
|
107
|
+
library_proc.call(input)
|
|
108
|
+
true
|
|
109
|
+
rescue StandardError
|
|
110
|
+
false
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# --- Run Benchmarks ---
|
|
114
|
+
|
|
115
|
+
TEST_CASES.each_value do |data|
|
|
116
|
+
puts "\n\n🔸 Scenario: #{data[:label]}"
|
|
117
|
+
puts '-' * 40
|
|
118
|
+
|
|
119
|
+
Benchmark.ips do |x|
|
|
120
|
+
x.config(time: 2, warmup: 1) # Short duration for quick checks
|
|
121
|
+
|
|
122
|
+
# 1. JsonMend
|
|
123
|
+
if supported?(->(i) { JsonMend.repair(i) }, data[:input])
|
|
124
|
+
x.report('JsonMend') do
|
|
125
|
+
JsonMend.repair(data[:input])
|
|
126
|
+
end
|
|
127
|
+
else
|
|
128
|
+
puts ' JsonMend: ❌ Not Supported'
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# 2. json-repair
|
|
132
|
+
if defined?(JSON::Repair)
|
|
133
|
+
if supported?(->(i) { JSON.repair(i) }, data[:input])
|
|
134
|
+
x.report('json-repair') do
|
|
135
|
+
JSON.repair(data[:input])
|
|
136
|
+
end
|
|
137
|
+
else
|
|
138
|
+
puts ' json-repair: ❌ Not Supported'
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
x.compare!
|
|
143
|
+
end
|
|
144
|
+
end
|
data/lib/json_mend/parser.rb
CHANGED
|
@@ -222,8 +222,10 @@ module JsonMend
|
|
|
222
222
|
# Parses the key of an object, including the special logic for merging dangling arrays.
|
|
223
223
|
# Returns [key, was_array_merged_flag]
|
|
224
224
|
def parse_object_key(object)
|
|
225
|
+
char = peek_char
|
|
226
|
+
|
|
225
227
|
# First, check for and handle the dangling array merge logic.
|
|
226
|
-
if try_to_merge_dangling_array(object)
|
|
228
|
+
if char == '[' && try_to_merge_dangling_array(object)
|
|
227
229
|
return [nil, true, false] # Signal that an array was merged.
|
|
228
230
|
end
|
|
229
231
|
|
|
@@ -231,7 +233,7 @@ module JsonMend
|
|
|
231
233
|
@context.push(:object_key)
|
|
232
234
|
is_bracketed = false
|
|
233
235
|
|
|
234
|
-
if
|
|
236
|
+
if char == '['
|
|
235
237
|
@scanner.getch # Consume '['
|
|
236
238
|
arr = parse_array
|
|
237
239
|
key = arr.first.to_s
|
|
@@ -355,7 +357,7 @@ module JsonMend
|
|
|
355
357
|
char = prepare_string_parsing
|
|
356
358
|
|
|
357
359
|
# A valid string can only start with a valid quote or, in our case, with a literal
|
|
358
|
-
while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char.match?(/[\p{L}0-9]/)
|
|
360
|
+
while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char.match?(/[\p{L}0-9$_-]/)
|
|
359
361
|
return '' if TERMINATORS_STRING_GUESSED.include?(char)
|
|
360
362
|
|
|
361
363
|
@scanner.getch
|
|
@@ -434,7 +436,7 @@ module JsonMend
|
|
|
434
436
|
when '“'
|
|
435
437
|
lstring_delimiter = '“'
|
|
436
438
|
rstring_delimiter = '”'
|
|
437
|
-
when /[\p{L}0-9]/
|
|
439
|
+
when /[\p{L}0-9$_-]/
|
|
438
440
|
# Could be a boolean/null, but not if it's an object key.
|
|
439
441
|
if BOOLEAN_OR_NULL_CHARS.include?(char.downcase) && !current_context?(:object_key)
|
|
440
442
|
# parse_literal is non-destructive if it fails to match.
|
|
@@ -509,6 +511,17 @@ module JsonMend
|
|
|
509
511
|
unmatched_delimiter = false
|
|
510
512
|
# --- Main Parsing Loop ---
|
|
511
513
|
while !@scanner.eos? && char != rstring_delimiter
|
|
514
|
+
# Fast-path for unquoted keys (e.g. { key: val })
|
|
515
|
+
# consumes a chunk of valid identifier characters at once
|
|
516
|
+
if missing_quotes && current_context?(:object_key)
|
|
517
|
+
chunk = @scanner.scan(/[a-zA-Z0-9_$-]+/)
|
|
518
|
+
if chunk
|
|
519
|
+
string_parts << chunk
|
|
520
|
+
char = peek_char
|
|
521
|
+
next
|
|
522
|
+
end
|
|
523
|
+
end
|
|
524
|
+
|
|
512
525
|
break if context_termination_reached?(
|
|
513
526
|
char:,
|
|
514
527
|
missing_quotes:
|
|
@@ -1170,16 +1183,29 @@ module JsonMend
|
|
|
1170
1183
|
|
|
1171
1184
|
# Peeks the next character without advancing the scanner
|
|
1172
1185
|
def peek_char(offset = 0)
|
|
1173
|
-
|
|
1186
|
+
# Handle the common 0-offset case
|
|
1187
|
+
if offset.zero?
|
|
1188
|
+
# peek(1) returns the next BYTE, not character
|
|
1189
|
+
byte_str = @scanner.peek(1)
|
|
1190
|
+
return nil if byte_str.empty?
|
|
1191
|
+
|
|
1192
|
+
# Fast path: If it's a standard ASCII char (0-127), return it directly.
|
|
1193
|
+
# This avoids the regex overhead for standard JSON characters ({, [, ", etc).
|
|
1194
|
+
return byte_str if byte_str.getbyte(0) < 128
|
|
1195
|
+
|
|
1196
|
+
# Slow path: If it's a multibyte char (e.g. “), use regex to match the full character.
|
|
1197
|
+
return @scanner.check(/./m)
|
|
1198
|
+
end
|
|
1174
1199
|
|
|
1200
|
+
# For offsets > 0, we must scan to skip correctly (as characters can be variable width)
|
|
1175
1201
|
saved_pos = @scanner.pos
|
|
1176
|
-
|
|
1202
|
+
res = nil
|
|
1177
1203
|
(offset + 1).times do
|
|
1178
|
-
|
|
1179
|
-
break if
|
|
1204
|
+
res = @scanner.getch
|
|
1205
|
+
break if res.nil?
|
|
1180
1206
|
end
|
|
1181
1207
|
@scanner.pos = saved_pos
|
|
1182
|
-
|
|
1208
|
+
res
|
|
1183
1209
|
end
|
|
1184
1210
|
|
|
1185
1211
|
def current_context?(value)
|
|
@@ -1192,7 +1218,7 @@ module JsonMend
|
|
|
1192
1218
|
|
|
1193
1219
|
# Checks if the character signifies the start of a string or literal
|
|
1194
1220
|
def string_start?(char)
|
|
1195
|
-
STRING_DELIMITERS.include?(char) || char&.match?(
|
|
1221
|
+
STRING_DELIMITERS.include?(char) || char&.match?(/[\p{L}$_]/)
|
|
1196
1222
|
end
|
|
1197
1223
|
|
|
1198
1224
|
# Checks if the character signifies the start of a number
|
data/lib/json_mend/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: json_mend
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Oleksii Vasyliev
|
|
@@ -15,28 +15,28 @@ dependencies:
|
|
|
15
15
|
requirements:
|
|
16
16
|
- - ">="
|
|
17
17
|
- !ruby/object:Gem::Version
|
|
18
|
-
version: '
|
|
18
|
+
version: '2.3'
|
|
19
19
|
type: :runtime
|
|
20
20
|
prerelease: false
|
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
22
22
|
requirements:
|
|
23
23
|
- - ">="
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
|
-
version: '
|
|
25
|
+
version: '2.3'
|
|
26
26
|
- !ruby/object:Gem::Dependency
|
|
27
27
|
name: strscan
|
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
|
29
29
|
requirements:
|
|
30
30
|
- - ">="
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
|
-
version: '
|
|
32
|
+
version: '3'
|
|
33
33
|
type: :runtime
|
|
34
34
|
prerelease: false
|
|
35
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
36
36
|
requirements:
|
|
37
37
|
- - ">="
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
|
-
version: '
|
|
39
|
+
version: '3'
|
|
40
40
|
description: JsonMend is a robust Ruby gem designed to repair broken or malformed
|
|
41
41
|
JSON strings. It is specifically optimized to handle common errors found in JSON
|
|
42
42
|
generated by Large Language Models (LLMs), such as missing quotes, trailing commas,
|
|
@@ -54,6 +54,7 @@ files:
|
|
|
54
54
|
- LICENSE
|
|
55
55
|
- README.md
|
|
56
56
|
- Rakefile
|
|
57
|
+
- benchmark_comparison.rb
|
|
57
58
|
- lib/json_mend.rb
|
|
58
59
|
- lib/json_mend/parser.rb
|
|
59
60
|
- lib/json_mend/version.rb
|