rhales 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ad690a26efe57e8ff9f10abdcb05423516fbfc26be67ba579d07ea8d9bc2ede7
4
- data.tar.gz: 7ce9893a2160814d2783bf6ae0a90809a4a240a3d44d69e4dd3b886bd1b89e65
3
+ metadata.gz: ba52e82c80de6bba26167aaf61b424d72a03fc195ec69be7e38dbd17422cf064
4
+ data.tar.gz: d87dc62997bf10cfde421bb93e7d1025b2374e9cd53a2280acd37c157d553227
5
5
  SHA512:
6
- metadata.gz: 9083d11f2d75d1d301191f29f783488894adb322db93e51071c784f141bcf124b34c9fcc28deb77e8382d159ab82b75cb12f5584523feb8ccb55e883c02ee99d
7
- data.tar.gz: 57a9d7c3f95b02a960488cf2ade3cf4203e98bf70174d6fcc6c29fd3be6d75447bb5b41869ab1991f33b31202599a0c4635aa514d5aa0b410b24dd4ea26abf11
6
+ metadata.gz: 8368580c220c662432ede3e323b9460b6edba4ff5012c57fbbe0b4760572d063aed07225a4e9b29d542d00f169c7ccdab9ddf7c1aa742b67d0a80c438ad91b18
7
+ data.tar.gz: 2ff74a6825fda46507e849dfc19139c7084be205024a03d2b0fd5f8d930d71366ce5c3407fbeeb77acf133bdd5486b17dbc5b068780ec169a1ee29c15669f1e2
@@ -26,6 +26,8 @@ module Rhales
26
26
  def detect(template_html)
27
27
  scanner = StringScanner.new(template_html)
28
28
  validator = SafeInjectionValidator.new(template_html)
29
+ # Build byte-to-char map once for the entire template
30
+ @byte_to_char_map = build_byte_to_char_map(template_html)
29
31
 
30
32
  # Try head section injection points first
31
33
  head_injection_point = detect_head_injection_point(scanner, validator, template_html)
@@ -68,7 +70,9 @@ module Rhales
68
70
 
69
71
  # Find opening <body> tag
70
72
  if scanner.scan_until(/<body\b[^>]*>/i)
71
- body_start = scanner.pos - scanner.matched.length
73
+ # Convert byte position to character position using pre-built map
74
+ byte_body_start = scanner.pos - scanner.matched.length
75
+ body_start = @byte_to_char_map[byte_body_start]
72
76
  safe_position = find_safe_injection_position(validator, body_start)
73
77
  return safe_position if safe_position
74
78
  end
@@ -81,11 +85,15 @@ module Rhales
81
85
 
82
86
  # Find opening <head> tag
83
87
  return nil unless scanner.scan_until(/<head\b[^>]*>/i)
84
- head_start = scanner.pos
88
+ # Convert byte position to character position using pre-built map
89
+ byte_head_start = scanner.pos
90
+ head_start = @byte_to_char_map[byte_head_start]
85
91
 
86
92
  # Find closing </head> tag
87
93
  return nil unless scanner.scan_until(/<\/head>/i)
88
- head_end = scanner.pos - scanner.matched.length
94
+ # Convert byte position to character position using pre-built map
95
+ byte_head_end = scanner.pos - scanner.matched.length
96
+ head_end = @byte_to_char_map[byte_head_end]
89
97
 
90
98
  [head_start, head_end]
91
99
  end
@@ -94,9 +102,13 @@ module Rhales
94
102
  head_content = template_html[head_start...head_end]
95
103
  scanner = StringScanner.new(head_content)
96
104
  last_link_end = nil
105
+ byte_to_char_map = build_byte_to_char_map(head_content)
97
106
 
98
107
  while scanner.scan_until(/<link\b[^>]*\/?>/i)
99
- last_link_end = scanner.pos
108
+ # scanner.pos is byte position within head_content
109
+ byte_pos = scanner.pos
110
+ # Convert to character position using pre-built map
111
+ last_link_end = byte_to_char_map[byte_pos]
100
112
  end
101
113
 
102
114
  last_link_end ? head_start + last_link_end : nil
@@ -106,9 +118,13 @@ module Rhales
106
118
  head_content = template_html[head_start...head_end]
107
119
  scanner = StringScanner.new(head_content)
108
120
  last_meta_end = nil
121
+ byte_to_char_map = build_byte_to_char_map(head_content)
109
122
 
110
123
  while scanner.scan_until(/<meta\b[^>]*\/?>/i)
111
- last_meta_end = scanner.pos
124
+ # scanner.pos is byte position within head_content
125
+ byte_pos = scanner.pos
126
+ # Convert to character position using pre-built map
127
+ last_meta_end = byte_to_char_map[byte_pos]
112
128
  end
113
129
 
114
130
  last_meta_end ? head_start + last_meta_end : nil
@@ -117,14 +133,18 @@ module Rhales
117
133
  def find_after_first_script(template_html, head_start, head_end)
118
134
  head_content = template_html[head_start...head_end]
119
135
  scanner = StringScanner.new(head_content)
136
+ byte_to_char_map = build_byte_to_char_map(head_content)
120
137
 
121
138
  # Find first script opening tag
122
139
  if scanner.scan_until(/<script\b[^>]*>/i)
123
- script_start = scanner.pos - scanner.matched.length
140
+ # Only the script end position is needed for this method, not the start position
124
141
 
125
142
  # Find corresponding closing tag
126
143
  if scanner.scan_until(/<\/script>/i)
127
- first_script_end = scanner.pos
144
+ # scanner.pos is byte position within head_content
145
+ byte_script_end = scanner.pos
146
+ # Convert to character position using pre-built map
147
+ first_script_end = byte_to_char_map[byte_script_end]
128
148
  return head_start + first_script_end
129
149
  end
130
150
  end
@@ -149,5 +169,43 @@ module Rhales
149
169
  # No safe position found
150
170
  nil
151
171
  end
172
+
173
+ # Builds a mapping from byte positions to character positions for efficient
174
+ # conversion when processing UTF-8 strings with StringScanner.
175
+ #
176
+ # This method creates a hash where keys are byte positions and values are
177
+ # the corresponding character positions. For multibyte UTF-8 characters,
178
+ # only the starting byte position has an entry in the map.
179
+ #
180
+ # @param str [String] The UTF-8 encoded string to map
181
+ # @return [Hash<Integer, Integer>] A hash mapping byte positions to character positions
182
+ #
183
+ # @example ASCII string
184
+ # build_byte_to_char_map("Hello")
185
+ # # => {0=>0, 1=>1, 2=>2, 3=>3, 4=>4, 5=>5}
186
+ #
187
+ # @example UTF-8 with multibyte characters
188
+ # build_byte_to_char_map("café") # é is 2 bytes
189
+ # # => {0=>0, 1=>1, 2=>2, 3=>3, 5=>4} # Note: byte 4 is continuation byte
190
+ #
191
+ def build_byte_to_char_map(str)
192
+ map = {}
193
+ char_pos = 0
194
+ byte_pos = 0
195
+
196
+ # Iterate through each character (not byte) in the string
197
+ str.each_char do |char|
198
+ # Map the starting byte position of this character
199
+ map[byte_pos] = char_pos
200
+
201
+ # Advance byte position by the byte size of this character
202
+ byte_pos += char.bytesize
203
+ char_pos += 1
204
+ end
205
+
206
+ # Add final mapping for the end of the string
207
+ map[byte_pos] = char_pos
208
+ map
209
+ end
152
210
  end
153
211
  end
@@ -55,16 +55,21 @@ module Rhales
55
55
  def calculate_unsafe_ranges
56
56
  ranges = []
57
57
  scanner = StringScanner.new(@html)
58
+ byte_to_char_map = build_byte_to_char_map(@html)
58
59
 
59
60
  UNSAFE_CONTEXTS.each do |context|
60
61
  scanner.pos = 0
61
62
 
62
63
  while scanner.scan_until(context[:start])
63
- start_pos = scanner.pos - scanner.matched.length
64
+ # Convert byte position to character position using pre-built map
65
+ byte_start_pos = scanner.pos - scanner.matched.length
66
+ start_pos = byte_to_char_map[byte_start_pos]
64
67
 
65
68
  # Find the corresponding end tag
66
69
  if scanner.scan_until(context[:end])
67
- end_pos = scanner.pos
70
+ # Convert byte position to character position using pre-built map
71
+ byte_end_pos = scanner.pos
72
+ end_pos = byte_to_char_map[byte_end_pos]
68
73
  ranges << (start_pos...end_pos)
69
74
  else
70
75
  # If no closing tag found, consider rest of document unsafe
@@ -99,5 +104,43 @@ module Rhales
99
104
 
100
105
  pos < @html.length && @html[pos] == '<'
101
106
  end
107
+
108
+ # Builds a mapping from byte positions to character positions for efficient
109
+ # conversion when processing UTF-8 strings with StringScanner.
110
+ #
111
+ # This method creates a hash where keys are byte positions and values are
112
+ # the corresponding character positions. For multibyte UTF-8 characters,
113
+ # only the starting byte position has an entry in the map.
114
+ #
115
+ # @param str [String] The UTF-8 encoded string to map
116
+ # @return [Hash<Integer, Integer>] A hash mapping byte positions to character positions
117
+ #
118
+ # @example ASCII string
119
+ # build_byte_to_char_map("Hello")
120
+ # # => {0=>0, 1=>1, 2=>2, 3=>3, 4=>4, 5=>5}
121
+ #
122
+ # @example UTF-8 with multibyte characters
123
+ # build_byte_to_char_map("café") # é is 2 bytes
124
+ # # => {0=>0, 1=>1, 2=>2, 3=>3, 5=>4} # Note: byte 4 is continuation byte
125
+ #
126
+ def build_byte_to_char_map(str)
127
+ map = {}
128
+ char_pos = 0
129
+ byte_pos = 0
130
+
131
+ # Iterate through each character (not byte) in the string
132
+ str.each_char do |char|
133
+ # Map the starting byte position of this character
134
+ map[byte_pos] = char_pos
135
+
136
+ # Advance byte position by the byte size of this character
137
+ byte_pos += char.bytesize
138
+ char_pos += 1
139
+ end
140
+
141
+ # Add final mapping for the end of the string
142
+ map[byte_pos] = char_pos
143
+ map
144
+ end
102
145
  end
103
146
  end
@@ -5,6 +5,6 @@
5
5
  module Rhales
6
6
  # Version information for the RSFC gem
7
7
  unless defined?(Rhales::VERSION)
8
- VERSION = '0.5.3'
8
+ VERSION = '0.5.4'
9
9
  end
10
10
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rhales
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - delano