json_mend 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 85624e37a002e82e9edcb6d7fffe16799429e75389662d8059e9e89f8b6f14d8
4
- data.tar.gz: 19309bc419b6f4481193e140eea31c2d24153ee87265cf2f4e2ecca1f0421bf4
3
+ metadata.gz: 7aae532930afd3eabb3485f83995b7e1ee22763c4f17f1c7940af3b270f33b60
4
+ data.tar.gz: 52e364f66106063b8fec6335eff7171031cb4ac2e0f92a55cedae9d4febd205f
5
5
  SHA512:
6
- metadata.gz: 85aa783092d768f3ff9543e2de2b59b0bbfe7a285e7d5db91bfb3d5c12dab233d2735f6f605ede6cab701b5085bc740332e91d93fddb90f9361b1033e0edf57e
7
- data.tar.gz: 75d3dc3b22f72748fe5b21f916ba930a04bd03aaea0d9aca8126ce38b2fdf9cfae9ad38c33f35a7c4e0c39ee7784b79df06d60772bcdd062e529372cc1bbd733
6
+ metadata.gz: 62803c4814d02e3850e23bb64fba0f31fbbfc5d6fa73b84dd2e6dee7bf670709f7c778506e64a65d0c170e26c54c239fb6f27063183199dc760e18906832630c
7
+ data.tar.gz: 7657395e2d7fed895bdc2f405ac9cb019497f7445c64e9bc3900b188124910c353d4ee3673bee90009ccc6e133d7d2aad2552971e8e1fdf12e7c3671d31d1ca7
data/.rubocop.yml CHANGED
@@ -7,19 +7,19 @@ AllCops:
7
7
  SuggestExtensions: false
8
8
 
9
9
  Metrics/AbcSize:
10
- Max: 60
10
+ Max: 65
11
11
 
12
12
  Metrics/ClassLength:
13
- Max: 800
13
+ Max: 820
14
14
 
15
15
  Metrics/CyclomaticComplexity:
16
- Max: 30
16
+ Max: 35
17
17
 
18
18
  Metrics/MethodLength:
19
- Max: 70
19
+ Max: 80
20
20
 
21
21
  Metrics/PerceivedComplexity:
22
- Max: 32
22
+ Max: 35
23
23
 
24
24
  Metrics/BlockNesting:
25
25
  Max: 5
@@ -0,0 +1,144 @@
1
+ # frozen_string_literal: true
2
+
3
+ # gem install benchmark-ips json-repair json_mend
4
+ # ruby -Ilib benchmark_comparison.rb
5
+ require 'benchmark/ips'
6
+ require 'json'
7
+
8
+ # --- Load Libraries ---
9
+ begin
10
+ require 'json_mend'
11
+ rescue LoadError
12
+ abort "❌ Could not load 'json_mend'. Make sure you are in the gem root or have it installed."
13
+ end
14
+
15
+ begin
16
+ require 'json/repair'
17
+ rescue LoadError
18
+ puts "❌ Could not load 'json-repair'. Benchmarks for it will be skipped."
19
+ end
20
+
21
+ puts '========================================================='
22
+ puts ' 🚀 JSON Repair Benchmark (IPS) '
23
+ puts " JsonMend (v#{JsonMend::VERSION}) vs json-repair-rb (v#{JSON::Repair::VERSION})"
24
+ puts '========================================================='
25
+ puts
26
+
27
+ # --- Test Data ---
28
+
29
+ json_object = '{"id": 1, "name": "Test", "active": true, "tags": ["a", "b"]}'
30
+
31
+ TEST_CASES = {
32
+ valid_single: {
33
+ label: 'Valid Single JSON',
34
+ input: json_object
35
+ },
36
+ concatenated: {
37
+ label: 'Concatenated JSON (x10)',
38
+ input: json_object * 10
39
+ },
40
+ simple_fix: {
41
+ label: 'Simple Fix (Missing Quotes)',
42
+ input: '{name: "Alice", age: 30, city: "Wonderland"}'
43
+ },
44
+ trailing_comma: {
45
+ label: 'Trailing Commas',
46
+ input: '{"items": [1, 2, 3,], "active": true,}'
47
+ },
48
+ comments: {
49
+ label: 'Comments (// and #)',
50
+ input: <<~JSON
51
+ {
52
+ "key": "value", // This is a comment
53
+ "config": {
54
+ "timeout": 100 # Another comment
55
+ }
56
+ }
57
+ JSON
58
+ },
59
+ complex: {
60
+ label: 'Complex & Mixed Errors',
61
+ input: <<~JSON
62
+ {
63
+ name: "Broken",
64
+ "nested": [
65
+ {id: 1,},
66
+ {id: 2}
67
+ ],
68
+ "dangling": [1, 2, 3
69
+ JSON
70
+ },
71
+ garbage: {
72
+ label: 'Heavy Garbage / Hallucinations',
73
+ input: 'Here is the JSON: ```json {"a": 1} ``` and some other text.'
74
+ },
75
+ python_style: {
76
+ label: 'Python Literals (True/None)',
77
+ input: '{"is_valid": True, "missing": None, "wrong_bool": False}'
78
+ },
79
+ single_quotes: {
80
+ label: 'Single Quotes (JS Style)',
81
+ input: "{'id': 123, 'status': 'pending', 'meta': {'active': true}}"
82
+ },
83
+ deep_nesting: {
84
+ label: 'Deeply Nested (Stack Test)',
85
+ input: "#{'{"a":' * 50}1#{'}' * 50}"
86
+ },
87
+ unbalanced: {
88
+ label: 'Truncated / Unbalanced',
89
+ input: '{"users": [{"id": 1, "name": "Alice"}, {"id": 2'
90
+ },
91
+ unescaped_control: {
92
+ label: 'Unescaped Newlines/Tabs',
93
+ input: "{\"bio\": \"This is a \n multi-line string \t with tabs.\"}"
94
+ },
95
+ large_array: {
96
+ label: 'Large Single Array (Throughput)',
97
+ input: "[#{(1..1000).map { |i| %({"id": #{i}, "val": "item_#{i}"}) }.join(',')}]"
98
+ },
99
+ concatenated_complex: {
100
+ label: 'Concatenated + Broken (LLM Stream)',
101
+ input: '{"part": 1} {part: 2, "broken": true} {"part": 3}'
102
+ }
103
+ }.freeze
104
+
105
+ # Helper to check if a library supports the input before benchmarking
106
+ def supported?(library_proc, input)
107
+ library_proc.call(input)
108
+ true
109
+ rescue StandardError
110
+ false
111
+ end
112
+
113
+ # --- Run Benchmarks ---
114
+
115
+ TEST_CASES.each_value do |data|
116
+ puts "\n\n🔸 Scenario: #{data[:label]}"
117
+ puts '-' * 40
118
+
119
+ Benchmark.ips do |x|
120
+ x.config(time: 2, warmup: 1) # Short duration for quick checks
121
+
122
+ # 1. JsonMend
123
+ if supported?(->(i) { JsonMend.repair(i) }, data[:input])
124
+ x.report('JsonMend') do
125
+ JsonMend.repair(data[:input])
126
+ end
127
+ else
128
+ puts ' JsonMend: ❌ Not Supported'
129
+ end
130
+
131
+ # 2. json-repair
132
+ if defined?(JSON::Repair)
133
+ if supported?(->(i) { JSON.repair(i) }, data[:input])
134
+ x.report('json-repair') do
135
+ JSON.repair(data[:input])
136
+ end
137
+ else
138
+ puts ' json-repair: ❌ Not Supported'
139
+ end
140
+ end
141
+
142
+ x.compare!
143
+ end
144
+ end
@@ -222,8 +222,10 @@ module JsonMend
222
222
  # Parses the key of an object, including the special logic for merging dangling arrays.
223
223
  # Returns [key, was_array_merged_flag]
224
224
  def parse_object_key(object)
225
+ char = peek_char
226
+
225
227
  # First, check for and handle the dangling array merge logic.
226
- if try_to_merge_dangling_array(object)
228
+ if char == '[' && try_to_merge_dangling_array(object)
227
229
  return [nil, true, false] # Signal that an array was merged.
228
230
  end
229
231
 
@@ -231,7 +233,7 @@ module JsonMend
231
233
  @context.push(:object_key)
232
234
  is_bracketed = false
233
235
 
234
- if peek_char == '['
236
+ if char == '['
235
237
  @scanner.getch # Consume '['
236
238
  arr = parse_array
237
239
  key = arr.first.to_s
@@ -355,7 +357,7 @@ module JsonMend
355
357
  char = prepare_string_parsing
356
358
 
357
359
  # A valid string can only start with a valid quote or, in our case, with a literal
358
- while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char.match?(/[\p{L}0-9]/)
360
+ while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char.match?(/[\p{L}0-9$_-]/)
359
361
  return '' if TERMINATORS_STRING_GUESSED.include?(char)
360
362
 
361
363
  @scanner.getch
@@ -434,7 +436,7 @@ module JsonMend
434
436
  when '“'
435
437
  lstring_delimiter = '“'
436
438
  rstring_delimiter = '”'
437
- when /[\p{L}0-9]/
439
+ when /[\p{L}0-9$_-]/
438
440
  # Could be a boolean/null, but not if it's an object key.
439
441
  if BOOLEAN_OR_NULL_CHARS.include?(char.downcase) && !current_context?(:object_key)
440
442
  # parse_literal is non-destructive if it fails to match.
@@ -509,6 +511,17 @@ module JsonMend
509
511
  unmatched_delimiter = false
510
512
  # --- Main Parsing Loop ---
511
513
  while !@scanner.eos? && char != rstring_delimiter
514
+ # Fast-path for unquoted keys (e.g. { key: val })
515
+ # consumes a chunk of valid identifier characters at once
516
+ if missing_quotes && current_context?(:object_key)
517
+ chunk = @scanner.scan(/[a-zA-Z0-9_$-]+/)
518
+ if chunk
519
+ string_parts << chunk
520
+ char = peek_char
521
+ next
522
+ end
523
+ end
524
+
512
525
  break if context_termination_reached?(
513
526
  char:,
514
527
  missing_quotes:
@@ -1170,16 +1183,29 @@ module JsonMend
1170
1183
 
1171
1184
  # Peeks the next character without advancing the scanner
1172
1185
  def peek_char(offset = 0)
1173
- return @scanner.check(/./m) if offset.zero?
1186
+ # Handle the common 0-offset case
1187
+ if offset.zero?
1188
+ # peek(1) returns the next BYTE, not character
1189
+ byte_str = @scanner.peek(1)
1190
+ return nil if byte_str.empty?
1191
+
1192
+ # Fast path: If it's a standard ASCII char (0-127), return it directly.
1193
+ # This avoids the regex overhead for standard JSON characters ({, [, ", etc).
1194
+ return byte_str if byte_str.getbyte(0) < 128
1195
+
1196
+ # Slow path: If it's a multibyte char (e.g. “), use regex to match the full character.
1197
+ return @scanner.check(/./m)
1198
+ end
1174
1199
 
1200
+ # For offsets > 0, we must scan to skip correctly (as characters can be variable width)
1175
1201
  saved_pos = @scanner.pos
1176
- c = nil
1202
+ res = nil
1177
1203
  (offset + 1).times do
1178
- c = @scanner.getch
1179
- break if c.nil?
1204
+ res = @scanner.getch
1205
+ break if res.nil?
1180
1206
  end
1181
1207
  @scanner.pos = saved_pos
1182
- c
1208
+ res
1183
1209
  end
1184
1210
 
1185
1211
  def current_context?(value)
@@ -1192,7 +1218,7 @@ module JsonMend
1192
1218
 
1193
1219
  # Checks if the character signifies the start of a string or literal
1194
1220
  def string_start?(char)
1195
- STRING_DELIMITERS.include?(char) || char&.match?(/\p{L}/)
1221
+ STRING_DELIMITERS.include?(char) || char&.match?(/[\p{L}$_]/)
1196
1222
  end
1197
1223
 
1198
1224
  # Checks if the character signifies the start of a number
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonMend
4
- VERSION = '0.1.2'
4
+ VERSION = '0.1.4'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_mend
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Oleksii Vasyliev
@@ -15,28 +15,28 @@ dependencies:
15
15
  requirements:
16
16
  - - ">="
17
17
  - !ruby/object:Gem::Version
18
- version: '0'
18
+ version: '2.3'
19
19
  type: :runtime
20
20
  prerelease: false
21
21
  version_requirements: !ruby/object:Gem::Requirement
22
22
  requirements:
23
23
  - - ">="
24
24
  - !ruby/object:Gem::Version
25
- version: '0'
25
+ version: '2.3'
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: strscan
28
28
  requirement: !ruby/object:Gem::Requirement
29
29
  requirements:
30
30
  - - ">="
31
31
  - !ruby/object:Gem::Version
32
- version: '0'
32
+ version: '3'
33
33
  type: :runtime
34
34
  prerelease: false
35
35
  version_requirements: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - ">="
38
38
  - !ruby/object:Gem::Version
39
- version: '0'
39
+ version: '3'
40
40
  description: JsonMend is a robust Ruby gem designed to repair broken or malformed
41
41
  JSON strings. It is specifically optimized to handle common errors found in JSON
42
42
  generated by Large Language Models (LLMs), such as missing quotes, trailing commas,
@@ -54,6 +54,7 @@ files:
54
54
  - LICENSE
55
55
  - README.md
56
56
  - Rakefile
57
+ - benchmark_comparison.rb
57
58
  - lib/json_mend.rb
58
59
  - lib/json_mend/parser.rb
59
60
  - lib/json_mend/version.rb