json_completer 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: acf870fc5a65bf4f4f1b586cfa61e012137fc96c0c48e3345d26a38da7d765bf
4
- data.tar.gz: fe454f3e2485ae789840bb9c55c7fe72f3e8c9afffc9eccd0d13cc53ede8c05f
3
+ metadata.gz: 5d460af0d48e2cecf87411ba30d2a6aeac00fe38208d0222bf8b7218e373a2cc
4
+ data.tar.gz: 448401c51bc04e0a38fae036d3a64d94e5090846c39f69d081a8032b0b58e80a
5
5
  SHA512:
6
- metadata.gz: 5cb2ad4d01e5f204cafc9b7fb4df996e319ba2c0dccdce6a16f23eacd2493cbdf1168c068c15586b7e4643863935ca70fe8e2628ed8e0994439fefa2151c60b7
7
- data.tar.gz: 7dcd0cee8613e5a45f9cedbee90386234f31fe36ce832bb5abe160d4993b25a06d891340b9f17c0559026dd8e4b5bb4754a9a350ca480baed8791496f2837437
6
+ metadata.gz: 256f6ba460ef729a9babe9f9355f0d888c3c7f3dc64e3b4c85c2ae69ad6cb6c3a4b107aced36fe55b04e06302034b672f63c94c6db03b074a0462223eee8d5d1
7
+ data.tar.gz: 101f08a619d56129398b751815077897e3f557d581b76d075e41f57098e93ec92aa14d9f7edb1c88dbb8179f26d3aaf0fde3d81cfa986d2c912aa70b77294074
data/README.md CHANGED
@@ -64,6 +64,8 @@ result3 = completer.parse('{"users": [{"name": "Alice"}, {"name": "Bob"}]}')
64
64
  # => {"users" => [{"name" => "Alice"}, {"name" => "Bob"}]}
65
65
  ```
66
66
 
67
+ Stateful `JsonCompleter` instances assume append-only input. If earlier bytes change, create a new instance; truncation to a shorter prefix still resets state automatically.
68
+
67
69
  ### String Output with `.complete`
68
70
 
69
71
  Use `.complete` when you specifically need completed JSON text instead of parsed Ruby objects:
@@ -83,6 +85,7 @@ This is the second-tier option when another layer expects JSON text and you want
83
85
  - **Zero reprocessing**: Maintains parsing state to avoid reparsing previously processed data
84
86
  - **Linear complexity**: Each chunk processed in O(n) time where n = new data size, not total size
85
87
  - **Memory efficient**: Uses token-based accumulation with minimal state overhead
88
+ - **Byte-oriented string scanning**: Walks JSON input as bytes and copies contiguous non-escape string content in slices to reduce per-character overhead on long streamed strings
86
89
  - **Context preservation**: Tracks nested structures without full document analysis
87
90
 
88
91
  ### Common Use Cases
@@ -4,29 +4,31 @@ class JsonCompleter
4
4
  module CompletionEngine
5
5
  def complete(partial_json)
6
6
  input = partial_json
7
+ # Same byte-oriented trick as parse: compare ASCII JSON syntax as integers and avoid
8
+ # allocating transient 1-character strings in the streaming loop.
9
+ input_length = input.bytesize
7
10
 
8
- if @state.nil? || @state.input_length > input.length
11
+ if @state.nil? || @state.input_length > input_length
9
12
  @state = ParsingState.new
10
13
  end
11
14
 
12
15
  return input if input.empty?
13
16
  return input if valid_json_primitive_or_document?(input)
14
17
 
15
- if @state.input_length == input.length && !@state.output_tokens.empty?
18
+ if @state.input_length == input_length && !@state.output_tokens.empty?
16
19
  return finalize_completion(@state.output_tokens.dup, @state.context_stack.dup, @state.incomplete_string_token)
17
20
  end
18
21
 
19
22
  output_tokens = @state.output_tokens.dup
20
23
  context_stack = @state.context_stack.dup
21
24
  index = @state.last_index
22
- length = input.length
23
25
  incomplete_string_token = @state.incomplete_string_token
24
26
 
25
27
  if incomplete_string_token && output_tokens.last&.start_with?('"') && output_tokens.last.end_with?('"')
26
28
  output_tokens.pop
27
29
  end
28
30
 
29
- while index < length
31
+ while index < input_length
30
32
  if incomplete_string_token && index == @state.last_index
31
33
  index, status = Scanners.scan_string(input, index, incomplete_string_token)
32
34
 
@@ -38,32 +40,16 @@ class JsonCompleter
38
40
  next
39
41
  end
40
42
 
41
- char = input[index]
43
+ byte = input.getbyte(index)
42
44
  last_significant_char_in_output = get_last_significant_char(output_tokens)
43
45
 
44
- case char
45
- when '{'
46
- ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
47
- ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
48
- output_tokens << char
49
- context_stack << '{'
50
- index += 1
51
- when '['
52
- ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
53
- ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
54
- output_tokens << char
55
- context_stack << '['
56
- index += 1
57
- when '}'
58
- remove_trailing_comma(output_tokens)
59
- output_tokens << char
60
- context_stack.pop if !context_stack.empty? && context_stack.last == '{'
46
+ # ASCII byte values: 9/10/13/32 = whitespace, 34 = ", 44 = ,, 45 = -, 58 = :,
47
+ # 91/93 = [] , 102/110/116 = f/n/t, 123/125 = {}.
48
+ case byte
49
+ when 9, 10, 13, 32
50
+ output_tokens << input.byteslice(index, 1)
61
51
  index += 1
62
- when ']'
63
- output_tokens << char
64
- context_stack.pop if !context_stack.empty? && context_stack.last == '['
65
- index += 1
66
- when '"'
52
+ when 34
67
53
  ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
68
54
  ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
69
55
 
@@ -75,30 +61,62 @@ class JsonCompleter
75
61
  else
76
62
  incomplete_string_token = string_token
77
63
  end
78
- when ':'
64
+ when 44
65
+ remove_trailing_comma(output_tokens)
66
+ output_tokens << ','
67
+ index += 1
68
+ when 45, 48..57
69
+ ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
70
+ ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
71
+
72
+ num_str, consumed = Scanners.scan_number_literal(input, index)
73
+ output_tokens << num_str
74
+ index += consumed
75
+ when 58
79
76
  remove_trailing_comma(output_tokens) if last_significant_char_in_output == ','
80
- output_tokens << char
77
+ output_tokens << ':'
81
78
  index += 1
82
- when ','
83
- remove_trailing_comma(output_tokens)
84
- output_tokens << char
79
+ when 91
80
+ ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
81
+ ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
82
+ output_tokens << '['
83
+ context_stack << '['
84
+ index += 1
85
+ when 93
86
+ output_tokens << ']'
87
+ context_stack.pop if !context_stack.empty? && context_stack.last == '['
85
88
  index += 1
86
- when 't', 'f', 'n'
89
+ when 102
87
90
  ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
88
91
  ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
89
92
 
90
- keyword_val, consumed = Scanners.scan_keyword_literal(input, index, KEYWORD_MAP[char.downcase])
93
+ keyword_val, consumed = Scanners.scan_keyword_literal(input, index, KEYWORD_MAP['f'])
91
94
  output_tokens << keyword_val
92
95
  index += consumed
93
- when '-', '0'..'9'
96
+ when 110
94
97
  ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
95
98
  ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
96
99
 
97
- num_str, consumed = Scanners.scan_number_literal(input, index)
98
- output_tokens << num_str
100
+ keyword_val, consumed = Scanners.scan_keyword_literal(input, index, KEYWORD_MAP['n'])
101
+ output_tokens << keyword_val
102
+ index += consumed
103
+ when 116
104
+ ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
105
+ ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
106
+
107
+ keyword_val, consumed = Scanners.scan_keyword_literal(input, index, KEYWORD_MAP['t'])
108
+ output_tokens << keyword_val
99
109
  index += consumed
100
- when /\s/
101
- output_tokens << char
110
+ when 123
111
+ ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
112
+ ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
113
+ output_tokens << '{'
114
+ context_stack << '{'
115
+ index += 1
116
+ when 125
117
+ remove_trailing_comma(output_tokens)
118
+ output_tokens << '}'
119
+ context_stack.pop if !context_stack.empty? && context_stack.last == '{'
102
120
  index += 1
103
121
  else
104
122
  index += 1
@@ -109,7 +127,7 @@ class JsonCompleter
109
127
  output_tokens: output_tokens,
110
128
  context_stack: context_stack,
111
129
  last_index: index,
112
- input_length: length,
130
+ input_length: input_length,
113
131
  incomplete_string_token: incomplete_string_token
114
132
  )
115
133
 
@@ -4,72 +4,79 @@ class JsonCompleter
4
4
  module ParserEngine
5
5
  def parse(partial_json)
6
6
  input = partial_json
7
+ # The hot path works on raw bytes, not 1-character Ruby strings. JSON punctuation is ASCII,
8
+ # so getbyte/bytesize let us compare cheap integers while multibyte UTF-8 payload stays intact.
9
+ input_length = input.bytesize
7
10
 
8
11
  if @parse_state.nil? ||
9
- @parse_state.input_length > input.length ||
10
- (@parse_state.input_snapshot && !input.start_with?(@parse_state.input_snapshot))
12
+ @parse_state.input_length > input_length ||
13
+ (@parse_state.input_length < input_length && reset_parse_state_for_input_growth?(input))
14
+ @parse_state = self.class.new_parse_state
15
+ elsif @parse_state.input_length == input_length
16
+ if @parse_state.input_snapshot == input
17
+ finalize_parse_result
18
+ return @parse_state.root
19
+ end
20
+
11
21
  @parse_state = self.class.new_parse_state
12
22
  end
13
23
 
14
24
  return nil if input.empty?
15
25
 
16
26
  begin
17
- if @parse_state.input_length == input.length
18
- finalize_parse_result
19
- return @parse_state.root
20
- end
21
-
22
27
  prepare_parse_state_for_incremental_input
23
28
 
24
29
  index = @parse_state.last_index
25
- while index < input.length
30
+ while index < input_length
26
31
  if @parse_state.token_state
27
32
  index = continue_parse_token(input, index)
28
33
  next
29
34
  end
30
35
 
31
- char = input[index]
32
- if top_level_value_complete? && char !~ /\s/
36
+ byte = input.getbyte(index)
37
+ if top_level_value_complete? && !whitespace_byte?(byte)
33
38
  raise ParseError, 'unexpected token after top-level value'
34
39
  end
35
40
 
36
- case char
37
- when /\s/
41
+ # ASCII byte values: 9/10/13/32 = whitespace, 34 = ", 44 = ,, 45 = -, 58 = :,
42
+ # 91/93 = [] , 102/110/116 = f/n/t, 123/125 = {}.
43
+ case byte
44
+ when 9, 10, 13, 32
38
45
  index += 1
39
- when '{'
40
- start_parse_container({})
46
+ when 34
47
+ start_parse_string_token
41
48
  index += 1
42
- when '['
43
- start_parse_container([])
49
+ when 44
50
+ parse_comma!
44
51
  index += 1
45
- when '}'
46
- close_parse_object!
52
+ when 45, 48..57
53
+ start_parse_number_token(byte)
47
54
  index += 1
48
- when ']'
49
- close_parse_array!
55
+ when 58
56
+ parse_colon!
50
57
  index += 1
51
- when '"'
52
- start_parse_string_token
58
+ when 91
59
+ start_parse_container([])
53
60
  index += 1
54
- when ':'
55
- parse_colon!
61
+ when 93
62
+ close_parse_array!
56
63
  index += 1
57
- when ','
58
- parse_comma!
64
+ when 102, 110, 116
65
+ start_parse_keyword_token(byte)
59
66
  index += 1
60
- when 't', 'f', 'n'
61
- start_parse_keyword_token(char)
67
+ when 123
68
+ start_parse_container({})
62
69
  index += 1
63
- when '-', '0'..'9'
64
- start_parse_number_token(char)
70
+ when 125
71
+ close_parse_object!
65
72
  index += 1
66
73
  else
67
- raise ParseError, "unexpected token #{char.inspect}"
74
+ raise ParseError, "unexpected token #{input.byteslice(index, 1).inspect}"
68
75
  end
69
76
  end
70
77
 
71
78
  @parse_state.last_index = index
72
- @parse_state.input_length = input.length
79
+ @parse_state.input_length = input_length
73
80
  @parse_state.input_snapshot = input
74
81
  finalize_parse_result
75
82
  @parse_state.root
@@ -172,10 +179,10 @@ class JsonCompleter
172
179
  @parse_state.token_state = nil
173
180
  end
174
181
 
175
- def start_parse_number_token(first_char)
182
+ def start_parse_number_token(first_byte)
176
183
  slot = parse_value_slot!
177
184
  token = Scanners::NumberToken.new(slot: slot)
178
- token.append(first_char)
185
+ token.append_byte(first_byte)
179
186
  assign_parse_slot(slot, token.parsed_value)
180
187
  transition_after_parse_value(slot)
181
188
  @parse_state.token_state = token
@@ -183,21 +190,22 @@ class JsonCompleter
183
190
 
184
191
  def continue_parse_number_token(input, index)
185
192
  token = @parse_state.token_state
193
+ length = input.bytesize
186
194
 
187
- while index < input.length && token.append(input[index])
195
+ while index < length && token.append_byte(input.getbyte(index))
188
196
  assign_parse_slot(token.slot, token.parsed_value)
189
197
  index += 1
190
198
  end
191
199
 
192
200
  raise ParseError, 'invalid number literal' if token.invalid?
193
201
 
194
- @parse_state.token_state = nil if index < input.length
202
+ @parse_state.token_state = nil if index < length
195
203
  index
196
204
  end
197
205
 
198
- def start_parse_keyword_token(first_char)
206
+ def start_parse_keyword_token(first_byte)
199
207
  slot = parse_value_slot!
200
- token = Scanners::KeywordToken.new(slot: slot, target: KEYWORD_MAP[first_char], matched: 1)
208
+ token = Scanners::KeywordToken.new(slot: slot, target: keyword_target_for_byte(first_byte), matched: 1)
201
209
  assign_parse_slot(slot, token.parsed_value)
202
210
  transition_after_parse_value(slot)
203
211
  @parse_state.token_state = token
@@ -205,14 +213,15 @@ class JsonCompleter
205
213
 
206
214
  def continue_parse_keyword_token(input, index)
207
215
  token = @parse_state.token_state
216
+ length = input.bytesize
208
217
 
209
- while index < input.length && token.matched < token.target.length && token.append(input[index])
218
+ while index < length && token.matched < token.target.length && token.append_byte(input.getbyte(index))
210
219
  index += 1
211
220
  end
212
221
 
213
- raise ParseError, 'invalid keyword literal' if token.matched < token.target.length && index < input.length
222
+ raise ParseError, 'invalid keyword literal' if token.matched < token.target.length && index < length
214
223
 
215
- @parse_state.token_state = nil if index < input.length || token.matched == token.target.length
224
+ @parse_state.token_state = nil if index < length || token.matched == token.target.length
216
225
  index
217
226
  end
218
227
 
@@ -238,7 +247,6 @@ class JsonCompleter
238
247
 
239
248
  context.mode = :key_or_end
240
249
  context.current_key = nil
241
-
242
250
  end
243
251
  end
244
252
 
@@ -339,6 +347,39 @@ class JsonCompleter
339
347
  token.visible_key_replaced_value = token.context.container[current_key]
340
348
  token.context.container[current_key] = nil
341
349
  end
350
+
351
+ def reset_parse_state_for_input_growth?(input)
352
+ return false unless @parse_state.input_snapshot
353
+ return false unless prefix_validation_required?
354
+
355
+ !input.start_with?(@parse_state.input_snapshot)
356
+ end
357
+
358
+ def prefix_validation_required?
359
+ @parse_state.context_stack.empty?
360
+ end
361
+
362
+ def keyword_target_for_byte(byte)
363
+ case byte
364
+ when 102
365
+ 'false'
366
+ when 110
367
+ 'null'
368
+ when 116
369
+ 'true'
370
+ else
371
+ raise ParseError, "unexpected keyword token byte: #{byte}"
372
+ end
373
+ end
374
+
375
+ def whitespace_byte?(byte)
376
+ case byte
377
+ when 9, 10, 13, 32
378
+ true
379
+ else
380
+ false
381
+ end
382
+ end
342
383
  end
343
384
 
344
385
  include ParserEngine
@@ -14,15 +14,16 @@ class JsonCompleter
14
14
  self.escape_state = :backslash
15
15
  end
16
16
 
17
- def append_char(char)
18
- buffer << char
17
+ def append_slice(input, start_index, length)
18
+ buffer << input.byteslice(start_index, length)
19
19
  end
20
20
 
21
- def append_simple_escape(char)
22
- buffer << char
21
+ # completion keeps escape bytes verbatim, so convert the ASCII byte back into a 1-byte string.
22
+ def append_simple_escape(byte)
23
+ buffer << byte.chr(Encoding::UTF_8)
23
24
  end
24
25
 
25
- def valid_simple_escape?(_char)
26
+ def valid_simple_escape?(_byte)
26
27
  true
27
28
  end
28
29
 
@@ -31,9 +32,9 @@ class JsonCompleter
31
32
  buffer << 'u'
32
33
  end
33
34
 
34
- def append_unicode_digit(char)
35
- unicode_digits << char
36
- buffer << char
35
+ def append_unicode_digit(byte)
36
+ unicode_digits << byte
37
+ buffer << byte.chr(Encoding::UTF_8)
37
38
  end
38
39
 
39
40
  def finish_unicode_escape!; end
@@ -84,37 +85,43 @@ class JsonCompleter
84
85
  self.escape_state = :backslash
85
86
  end
86
87
 
87
- def append_char(char)
88
- buffer << char
88
+ def append_slice(input, start_index, length)
89
+ buffer << input.byteslice(start_index, length)
89
90
  end
90
91
 
91
- def append_simple_escape(char)
92
- buffer << case char
93
- when 'b'
92
+ # ASCII escape bytes: 98/102/110/114/116 = b/f/n/r/t.
93
+ def append_simple_escape(byte)
94
+ buffer << case byte
95
+ when 98
94
96
  "\b"
95
- when 'f'
97
+ when 102
96
98
  "\f"
97
- when 'n'
99
+ when 110
98
100
  "\n"
99
- when 'r'
101
+ when 114
100
102
  "\r"
101
- when 't'
103
+ when 116
102
104
  "\t"
103
105
  else
104
- char
106
+ byte
105
107
  end
106
108
  end
107
109
 
108
- def valid_simple_escape?(char)
109
- ['"', '\\', '/', 'b', 'f', 'n', 'r', 't'].include?(char)
110
+ def valid_simple_escape?(byte)
111
+ case byte
112
+ when 34, 92, 47, 98, 102, 110, 114, 116
113
+ true
114
+ else
115
+ false
116
+ end
110
117
  end
111
118
 
112
119
  def start_unicode_escape!
113
120
  self.unicode_digits = String.new
114
121
  end
115
122
 
116
- def append_unicode_digit(char)
117
- unicode_digits << char
123
+ def append_unicode_digit(byte)
124
+ unicode_digits << byte
118
125
  end
119
126
 
120
127
  def finish_unicode_escape!
@@ -160,94 +167,97 @@ class JsonCompleter
160
167
  self.raw ||= String.new
161
168
  end
162
169
 
163
- def append(char)
170
+ # append_byte consumes ASCII bytes, not 1-character strings:
171
+ # 45 = -, 46 = ., 48..57 = 0..9, 69/101 = E/e.
172
+ def append_byte(byte)
164
173
  case phase
165
174
  when nil
166
- case char
167
- when '-'
168
- raw << char
175
+ case byte
176
+ when 45
177
+ raw << byte
169
178
  self.phase = :sign
170
- when '0'
171
- raw << char
179
+ when 48
180
+ raw << byte
172
181
  self.phase = :zero
173
- when /[0-9]/
174
- raw << char
182
+ when 49..57
183
+ raw << byte
175
184
  self.phase = :int
176
185
  else
177
186
  return false
178
187
  end
179
188
  when :sign
180
- case char
181
- when '0'
182
- raw << char
189
+ case byte
190
+ when 48
191
+ raw << byte
183
192
  self.phase = :zero
184
- when /[0-9]/
185
- raw << char
193
+ when 49..57
194
+ raw << byte
186
195
  self.phase = :int
187
- when '.'
188
- raw << char
196
+ when 46
197
+ raw << byte
189
198
  self.phase = :frac_start
190
199
  else
191
200
  return false
192
201
  end
193
202
  when :zero
194
- if char.match?(/[0-9]/)
203
+ if Scanners.digit_byte?(byte)
195
204
  self.invalid = true
196
205
  return false
197
- elsif char == '.'
198
- raw << char
206
+ elsif byte == 46
207
+ raw << byte
199
208
  self.phase = :frac_start
200
- elsif %w[e E].include?(char)
201
- raw << char
209
+ elsif Scanners.exponent_byte?(byte)
210
+ raw << byte
202
211
  self.phase = :exp_start
203
212
  else
204
213
  return false
205
214
  end
206
215
  when :int
207
- if char.match?(/[0-9]/)
208
- raw << char
209
- elsif char == '.'
210
- raw << char
216
+ if Scanners.digit_byte?(byte)
217
+ raw << byte
218
+ elsif byte == 46
219
+ raw << byte
211
220
  self.phase = :frac_start
212
- elsif %w[e E].include?(char)
213
- raw << char
221
+ elsif Scanners.exponent_byte?(byte)
222
+ raw << byte
214
223
  self.phase = :exp_start
215
224
  else
216
225
  return false
217
226
  end
218
227
  when :frac_start
219
- return false unless char.match?(/[0-9]/)
228
+ return false unless Scanners.digit_byte?(byte)
220
229
 
221
- raw << char
230
+ raw << byte
222
231
  self.phase = :frac
223
232
  when :frac
224
- if char.match?(/[0-9]/)
225
- raw << char
226
- elsif %w[e E].include?(char)
227
- raw << char
233
+ if Scanners.digit_byte?(byte)
234
+ raw << byte
235
+ elsif Scanners.exponent_byte?(byte)
236
+ raw << byte
228
237
  self.phase = :exp_start
229
238
  else
230
239
  return false
231
240
  end
232
241
  when :exp_start
233
- if ['+', '-'].include?(char)
234
- raw << char
242
+ case byte
243
+ when 43, 45
244
+ raw << byte
235
245
  self.phase = :exp_sign
236
- elsif char.match?(/[0-9]/)
237
- raw << char
246
+ when 48..57
247
+ raw << byte
238
248
  self.phase = :exp
239
249
  else
240
250
  return false
241
251
  end
242
252
  when :exp_sign
243
- return false unless char.match?(/[0-9]/)
253
+ return false unless Scanners.digit_byte?(byte)
244
254
 
245
- raw << char
255
+ raw << byte
246
256
  self.phase = :exp
247
257
  when :exp
248
- return false unless char.match?(/[0-9]/)
258
+ return false unless Scanners.digit_byte?(byte)
249
259
 
250
- raw << char
260
+ raw << byte
251
261
  end
252
262
 
253
263
  true
@@ -285,9 +295,9 @@ class JsonCompleter
285
295
  super
286
296
  end
287
297
 
288
- def append(char)
298
+ def append_byte(byte)
289
299
  return false if matched >= target.length
290
- return false unless char.downcase == target[matched]
300
+ return false unless (byte | 0x20) == target.getbyte(matched)
291
301
 
292
302
  self.matched += 1
293
303
  true
@@ -307,13 +317,17 @@ class JsonCompleter
307
317
 
308
318
  def scan_string(input, index, token)
309
319
  strict = token.is_a?(ParsedStringToken)
320
+ # JSON string syntax is ASCII, so scanning bytes is safe here: multibyte UTF-8 content is
321
+ # treated as opaque payload and copied via byteslice until we hit an ASCII delimiter/escape.
322
+ length = input.bytesize
323
+ segment_start = index
310
324
 
311
- while index < input.length
312
- char = input[index]
325
+ while index < length
326
+ byte = input.getbyte(index)
313
327
 
314
328
  if token.unicode_digits
315
- if char.match?(/[0-9a-fA-F]/)
316
- token.append_unicode_digit(char)
329
+ if hex_digit_byte?(byte)
330
+ token.append_unicode_digit(byte)
317
331
  index += 1
318
332
 
319
333
  if token.unicode_digits.length == 4
@@ -323,6 +337,7 @@ class JsonCompleter
323
337
  return [index, :invalid_unicode] if status == :invalid_unicode
324
338
  end
325
339
 
340
+ segment_start = index
326
341
  next
327
342
  end
328
343
 
@@ -332,54 +347,67 @@ class JsonCompleter
332
347
  end
333
348
 
334
349
  if token.escape_state == :backslash
335
- if strict && token.pending_high_surrogate && char != 'u'
350
+ if strict && token.pending_high_surrogate && byte != 117
336
351
  return [index, :invalid_unicode]
337
352
  end
338
353
 
339
- if char == 'u'
354
+ if byte == 117
340
355
  token.start_unicode_escape!
341
356
  index += 1
357
+ segment_start = index
342
358
  next
343
359
  end
344
360
 
345
- return [index, :invalid_escape] unless token.valid_simple_escape?(char)
361
+ return [index, :invalid_escape] unless token.valid_simple_escape?(byte)
346
362
 
347
- token.append_simple_escape(char)
363
+ token.append_simple_escape(byte)
348
364
  token.escape_state = nil
349
365
  index += 1
366
+ segment_start = index
350
367
  next
351
368
  end
352
369
 
353
- case char
354
- when '\\'
355
- token.start_escape!
356
- index += 1
357
- when '"'
370
+ if strict && token.pending_high_surrogate && byte != 92
371
+ return [index, :invalid_unicode]
372
+ end
373
+
374
+ if byte == 34
375
+ token.append_slice(input, segment_start, index - segment_start) if index > segment_start
376
+
358
377
  if strict && token.pending_high_surrogate
359
378
  return [index, :invalid_unicode]
360
379
  end
361
380
 
362
381
  token.terminate!
363
382
  return [index + 1, :terminated]
364
- else
365
- if strict
366
- return [index, :invalid_control_character] if char.ord < 0x20
367
- return [index, :invalid_unicode] if token.pending_high_surrogate
368
- end
383
+ end
369
384
 
370
- token.append_char(char)
385
+ if byte == 92
386
+ token.append_slice(input, segment_start, index - segment_start) if index > segment_start
387
+ token.start_escape!
371
388
  index += 1
389
+ segment_start = index
390
+ next
391
+ end
392
+
393
+ if strict && byte < 0x20
394
+ token.append_slice(input, segment_start, index - segment_start) if index > segment_start
395
+ return [index, :invalid_control_character]
372
396
  end
397
+
398
+ index += 1
373
399
  end
374
400
 
401
+ token.append_slice(input, segment_start, index - segment_start) if index > segment_start
375
402
  [index, :incomplete]
376
403
  end
377
404
 
378
405
  def scan_number_literal(input, index)
379
406
  start_index = index
380
407
  token = NumberToken.new
408
+ length = input.bytesize
381
409
 
382
- while index < input.length && token.append(input[index])
410
+ while index < length && token.append_byte(input.getbyte(index))
383
411
  index += 1
384
412
  end
385
413
 
@@ -389,14 +417,32 @@ class JsonCompleter
389
417
  def scan_keyword_literal(input, index, target_keyword)
390
418
  start_index = index
391
419
  token = KeywordToken.new(target: target_keyword)
420
+ length = input.bytesize
392
421
 
393
- while index < input.length && token.append(input[index])
422
+ while index < length && token.append_byte(input.getbyte(index))
394
423
  index += 1
395
424
  end
396
425
 
397
- return [input[start_index], 1] if token.matched.zero?
426
+ return [input.byteslice(start_index, 1), 1] if token.matched.zero?
398
427
 
399
428
  [target_keyword, index - start_index]
400
429
  end
430
+
431
+ def digit_byte?(byte)
432
+ byte.between?(48, 57)
433
+ end
434
+
435
+ def exponent_byte?(byte)
436
+ case byte
437
+ when 69, 101
438
+ true
439
+ else
440
+ false
441
+ end
442
+ end
443
+
444
+ def hex_digit_byte?(byte)
445
+ digit_byte?(byte) || byte.between?(65, 70) || byte.between?(97, 102)
446
+ end
401
447
  end
402
448
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_completer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aha! (www.aha.io)
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-03-11 00:00:00.000000000 Z
11
+ date: 2026-03-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec