json-repair 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 17c6b285b495a3c053ae838e701205ec682f7648856cadb52d00da5d348f393c
4
- data.tar.gz: 644acd7c8840e0a1edf4399297c1bbcc17739cdd13ab6ee9102880981a00bc84
3
+ metadata.gz: db2b6fb7849a2e75329405c1f85fa7de836b0fa2f079623032571f42d359514d
4
+ data.tar.gz: 1c845714c4c443bad3c9277a2ceae6cef8ff346125f52f89473aaa50b9ff2132
5
5
  SHA512:
6
- metadata.gz: 6bbe9f8d1e5558ab344987a867dc1aae6859c7e33b537cf20b78d97b5db4734c33388d7514b5db85decf8ca156d91efd4fe98d7cd8b42bc2c2fa81822a0ce9bd
7
- data.tar.gz: '0175953daedfe95efb9dc1777d38c2e6034e1822b0463fe50cf52dc946a17cd5a363bab862bc776399bd2e34c249b682298c2989f5ce7fa959cd667c4845d013'
6
+ metadata.gz: 53929154af31033e2f380ed89979430f4339c97c94c088b6f85da27ac251d658b98840e44085c4ba9b4972bab75c1bb0f8ad750beddd4bb79e439efb135e0386
7
+ data.tar.gz: b4b5150aee81c518eaee8847bb2f5d8d8131a15719bb93badce465a2d447ddc361888155b2d33125fdd69d2568424c08440772c61a7f7f5b35922a4d1270adf8
data/.rubocop.yml CHANGED
@@ -10,32 +10,32 @@ Style/Documentation:
10
10
 
11
11
  Metrics/ClassLength:
12
12
  Exclude:
13
- - lib/json/repair/repairer.rb
13
+ - lib/json/repairer.rb
14
14
 
15
15
  Metrics/AbcSize:
16
16
  Exclude:
17
- - lib/json/repair/repairer.rb
17
+ - lib/json/repairer.rb
18
18
 
19
19
  Metrics/MethodLength:
20
20
  Exclude:
21
- - lib/json/repair/repairer.rb
21
+ - lib/json/repairer.rb
22
22
 
23
23
  Metrics/CyclomaticComplexity:
24
24
  Exclude:
25
- - lib/json/repair/repairer.rb
25
+ - lib/json/repairer.rb
26
26
 
27
27
  Metrics/PerceivedComplexity:
28
28
  Exclude:
29
- - lib/json/repair/repairer.rb
29
+ - lib/json/repairer.rb
30
30
 
31
31
  Metrics/BlockLength:
32
32
  Exclude:
33
- - lib/json/repair/repairer.rb
33
+ - lib/json/repairer.rb
34
34
  - spec/**/*
35
35
 
36
36
  Metrics/BlockNesting:
37
37
  Exclude:
38
- - lib/json/repair/repairer.rb
38
+ - lib/json/repairer.rb
39
39
 
40
40
  Metrics/ModuleLength:
41
41
  Exclude:
data/CHANGELOG.md CHANGED
Binary file
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
- # JSON::Repair
1
+ # JSON::Repair [![Gem Version](https://badge.fury.io/rb/json-repair.svg)](https://badge.fury.io/rb/json-repair) [![Build Status](https://github.com/sashazykov/json-repair-rb/actions/workflows/main.yml/badge.svg?branch=main)](https://github.com/sashazykov/json-repair-rb/actions) [![Stand With Ukraine](https://raw.githubusercontent.com/vshymanskyy/StandWithUkraine/main/badges/StandWithUkraine.svg)](https://stand-with-ukraine.pp.ua)
2
2
 
3
- This is a Ruby gem designed to repair broken JSON strings. Inspired by the [jsonrepair js library](https://github.com/josdejong/jsonrepair/). It efficiently handles and corrects malformed JSON data, making it especially useful in scenarios where JSON output from LLMs might not strictly adhere to JSON standards. Whether it's missing quotes, misplaced commas, or unexpected characters, it ensures that the JSON data is valid and can be parsed correctly.
3
+ This is a Ruby gem designed to repair broken JSON strings. Inspired by and based on the [jsonrepair js library](https://github.com/josdejong/jsonrepair/). It efficiently handles and corrects malformed JSON data, making it especially useful in scenarios where JSON output from LLMs might not strictly adhere to JSON standards. Whether it's missing quotes, misplaced commas, or unexpected characters, it ensures that the JSON data is valid and can be parsed correctly.
4
4
 
5
5
  ## Installation
6
6
 
@@ -25,7 +25,7 @@ require 'json/repair'
25
25
 
26
26
  # Example of repairing a JSON string
27
27
  broken_json = '{name: Alice, "age": 25,}'
28
- repaired_json = JSON::Repair.repair(broken_json)
28
+ repaired_json = JSON.repair(broken_json)
29
29
  puts repaired_json # Outputs: {"name": "Alice", "age": 25}
30
30
  ```
31
31
 
@@ -48,3 +48,10 @@ The gem is available as open source under the terms of the [ISC License](https:/
48
48
  ## Code of Conduct
49
49
 
50
50
  Everyone interacting in the JSON::Repair project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [code of conduct](https://github.com/sashazykov/json-repair-rb/blob/main/CODE_OF_CONDUCT.md).
51
+
52
+ ## Similar libraries in other languages
53
+
54
+ - Typescript: https://github.com/josdejong/jsonrepair
55
+ - Go: https://github.com/RealAlexandreAI/json-repair
56
+ - JavaScript: https://github.com/RyanMarcus/dirty-json
57
+ - Python: https://github.com/mangiucugna/json_repair
data/Steepfile ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ target :lib do
4
+ signature 'sig'
5
+ check 'lib'
6
+ end
@@ -35,21 +35,28 @@ module JSON
35
35
  LOWERCASE_E = 'e' # 0x65
36
36
  UPPERCASE_F = 'F' # 0x46
37
37
  LOWERCASE_F = 'f' # 0x66
38
- NON_BREAKING_SPACE = "\u00a0" # 0xa0
39
- EN_QUAD = "\u2000" # 0x2000
40
- HAIR_SPACE = "\u200a" # 0x200a
41
- NARROW_NO_BREAK_SPACE = "\u202f" # 0x202f
42
- MEDIUM_MATHEMATICAL_SPACE = "\u205f" # 0x205f
43
- IDEOGRAPHIC_SPACE = "\u3000" # 0x3000
44
- DOUBLE_QUOTE_LEFT = "\u201c" # 0x201c
45
- DOUBLE_QUOTE_RIGHT = "\u201d" # 0x201d
46
- QUOTE_LEFT = "\u2018" # 0x2018
47
- QUOTE_RIGHT = "\u2019" # 0x2019
38
+ NON_BREAKING_SPACE = ' ' # 0xa0
39
+ MONGOLIAN_VOWEL_SEPARATOR = '᠎' # 0x180e
40
+ EN_QUAD = ' ' # 0x2000
41
+ ZERO_WIDTH_SPACE = '​' # 0x200b
42
+ NARROW_NO_BREAK_SPACE = ' ' # 0x202f
43
+ MEDIUM_MATHEMATICAL_SPACE = ' ' # 0x205f
44
+ IDEOGRAPHIC_SPACE = ' ' # 0x3000
45
+ ZERO_WIDTH_NO_BREAK_SPACE = '' # 0xfeff
46
+ DOUBLE_QUOTE_LEFT = '“' # 0x201c
47
+ DOUBLE_QUOTE_RIGHT = '”' # 0x201d
48
+ QUOTE_LEFT = '‘' # 0x2018
49
+ QUOTE_RIGHT = '’' # 0x2019
48
50
  GRAVE_ACCENT = '`' # 0x0060
49
- ACUTE_ACCENT = "\u00b4" # 0x00b4
51
+ ACUTE_ACCENT = '´' # 0x00b4
50
52
 
51
53
  REGEX_DELIMITER = %r{^[,:\[\]/{}()\n+]+$}
54
+ REGEX_UNQUOTED_STRING_DELIMITER = %r{^[,\[\]/{}\n+]+$}
52
55
  REGEX_START_OF_VALUE = /^[\[{\w-]$/
56
+ # matches "https://" and other schemas
57
+ REGEX_URL_START = %r{^(http|https|ftp|mailto|file|data|irc)://$}
58
+ # matches all valid URL characters EXCEPT "[", "]", and "," (important JSON delimiters)
59
+ REGEX_URL_CHAR = %r{^[A-Za-z0-9\-._~:/?#@!$&'()*+;=]$}
53
60
 
54
61
  # Functions to check character chars
55
62
  def hex?(char)
@@ -70,8 +77,19 @@ module JSON
70
77
  REGEX_DELIMITER.match?(char)
71
78
  end
72
79
 
73
- def delimiter_except_slash?(char)
74
- delimiter?(char) && char != SLASH
80
+ def unquoted_string_delimiter?(char)
81
+ REGEX_UNQUOTED_STRING_DELIMITER.match?(char)
82
+ end
83
+
84
+ REGEX_FUNCTION_NAME_CHAR_START = /\A[a-zA-Z_$]\z/
85
+ REGEX_FUNCTION_NAME_CHAR = /\A[a-zA-Z0-9_$]\z/
86
+
87
+ def function_name_char_start?(char)
88
+ !char.nil? && REGEX_FUNCTION_NAME_CHAR_START.match?(char)
89
+ end
90
+
91
+ def function_name_char?(char)
92
+ !char.nil? && REGEX_FUNCTION_NAME_CHAR.match?(char)
75
93
  end
76
94
 
77
95
  def start_of_value?(char)
@@ -86,11 +104,22 @@ module JSON
86
104
  [SPACE, NEWLINE, TAB, RETURN].include?(char)
87
105
  end
88
106
 
107
+ def whitespace_except_newline?(char)
108
+ [SPACE, TAB, RETURN].include?(char)
109
+ end
110
+
89
111
  def special_whitespace?(char)
112
+ return false unless char
113
+
90
114
  [
91
- NON_BREAKING_SPACE, NARROW_NO_BREAK_SPACE, MEDIUM_MATHEMATICAL_SPACE, IDEOGRAPHIC_SPACE
115
+ NON_BREAKING_SPACE,
116
+ MONGOLIAN_VOWEL_SEPARATOR,
117
+ NARROW_NO_BREAK_SPACE,
118
+ MEDIUM_MATHEMATICAL_SPACE,
119
+ IDEOGRAPHIC_SPACE,
120
+ ZERO_WIDTH_NO_BREAK_SPACE
92
121
  ].include?(char) ||
93
- (char >= EN_QUAD && char <= HAIR_SPACE)
122
+ (char >= EN_QUAD && char <= ZERO_WIDTH_SPACE)
94
123
  end
95
124
 
96
125
  def quote?(char)
@@ -149,7 +178,7 @@ module JSON
149
178
 
150
179
  def parse_keyword(name, value)
151
180
  if @json[@index, name.length] == name
152
- @output += value
181
+ @output << value
153
182
  @index += name.length
154
183
  true
155
184
  else
@@ -161,10 +190,6 @@ module JSON
161
190
  text[0...start] + text[start + count..]
162
191
  end
163
192
 
164
- def function_name?(text)
165
- /^\w+$/.match?(text)
166
- end
167
-
168
193
  def ends_with_comma_or_newline?(text)
169
194
  /[,\n][ \t\r]*$/.match?(text)
170
195
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module JSON
4
4
  module Repair
5
- VERSION = '0.1.0'
5
+ VERSION = '0.3.0'
6
6
  end
7
7
  end
data/lib/json/repair.rb CHANGED
@@ -1,14 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'repair/version'
4
- require_relative 'repair/repairer'
4
+ require_relative 'repairer'
5
5
 
6
6
  module JSON
7
- module Repair
8
- class JSONRepairError < StandardError; end
7
+ class JSONRepairError < StandardError; end
9
8
 
10
- def self.repair(json)
11
- Repairer.new(json).repair
12
- end
9
+ def self.repair(json)
10
+ Repairer.new(json).repair
13
11
  end
14
12
  end