rhales 0.5.3 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ba52e82c80de6bba26167aaf61b424d72a03fc195ec69be7e38dbd17422cf064
|
|
4
|
+
data.tar.gz: d87dc62997bf10cfde421bb93e7d1025b2374e9cd53a2280acd37c157d553227
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8368580c220c662432ede3e323b9460b6edba4ff5012c57fbbe0b4760572d063aed07225a4e9b29d542d00f169c7ccdab9ddf7c1aa742b67d0a80c438ad91b18
|
|
7
|
+
data.tar.gz: 2ff74a6825fda46507e849dfc19139c7084be205024a03d2b0fd5f8d930d71366ce5c3407fbeeb77acf133bdd5486b17dbc5b068780ec169a1ee29c15669f1e2
|
|
@@ -26,6 +26,8 @@ module Rhales
|
|
|
26
26
|
def detect(template_html)
|
|
27
27
|
scanner = StringScanner.new(template_html)
|
|
28
28
|
validator = SafeInjectionValidator.new(template_html)
|
|
29
|
+
# Build byte-to-char map once for the entire template
|
|
30
|
+
@byte_to_char_map = build_byte_to_char_map(template_html)
|
|
29
31
|
|
|
30
32
|
# Try head section injection points first
|
|
31
33
|
head_injection_point = detect_head_injection_point(scanner, validator, template_html)
|
|
@@ -68,7 +70,9 @@ module Rhales
|
|
|
68
70
|
|
|
69
71
|
# Find opening <body> tag
|
|
70
72
|
if scanner.scan_until(/<body\b[^>]*>/i)
|
|
71
|
-
|
|
73
|
+
# Convert byte position to character position using pre-built map
|
|
74
|
+
byte_body_start = scanner.pos - scanner.matched.length
|
|
75
|
+
body_start = @byte_to_char_map[byte_body_start]
|
|
72
76
|
safe_position = find_safe_injection_position(validator, body_start)
|
|
73
77
|
return safe_position if safe_position
|
|
74
78
|
end
|
|
@@ -81,11 +85,15 @@ module Rhales
|
|
|
81
85
|
|
|
82
86
|
# Find opening <head> tag
|
|
83
87
|
return nil unless scanner.scan_until(/<head\b[^>]*>/i)
|
|
84
|
-
|
|
88
|
+
# Convert byte position to character position using pre-built map
|
|
89
|
+
byte_head_start = scanner.pos
|
|
90
|
+
head_start = @byte_to_char_map[byte_head_start]
|
|
85
91
|
|
|
86
92
|
# Find closing </head> tag
|
|
87
93
|
return nil unless scanner.scan_until(/<\/head>/i)
|
|
88
|
-
|
|
94
|
+
# Convert byte position to character position using pre-built map
|
|
95
|
+
byte_head_end = scanner.pos - scanner.matched.length
|
|
96
|
+
head_end = @byte_to_char_map[byte_head_end]
|
|
89
97
|
|
|
90
98
|
[head_start, head_end]
|
|
91
99
|
end
|
|
@@ -94,9 +102,13 @@ module Rhales
|
|
|
94
102
|
head_content = template_html[head_start...head_end]
|
|
95
103
|
scanner = StringScanner.new(head_content)
|
|
96
104
|
last_link_end = nil
|
|
105
|
+
byte_to_char_map = build_byte_to_char_map(head_content)
|
|
97
106
|
|
|
98
107
|
while scanner.scan_until(/<link\b[^>]*\/?>/i)
|
|
99
|
-
|
|
108
|
+
# scanner.pos is byte position within head_content
|
|
109
|
+
byte_pos = scanner.pos
|
|
110
|
+
# Convert to character position using pre-built map
|
|
111
|
+
last_link_end = byte_to_char_map[byte_pos]
|
|
100
112
|
end
|
|
101
113
|
|
|
102
114
|
last_link_end ? head_start + last_link_end : nil
|
|
@@ -106,9 +118,13 @@ module Rhales
|
|
|
106
118
|
head_content = template_html[head_start...head_end]
|
|
107
119
|
scanner = StringScanner.new(head_content)
|
|
108
120
|
last_meta_end = nil
|
|
121
|
+
byte_to_char_map = build_byte_to_char_map(head_content)
|
|
109
122
|
|
|
110
123
|
while scanner.scan_until(/<meta\b[^>]*\/?>/i)
|
|
111
|
-
|
|
124
|
+
# scanner.pos is byte position within head_content
|
|
125
|
+
byte_pos = scanner.pos
|
|
126
|
+
# Convert to character position using pre-built map
|
|
127
|
+
last_meta_end = byte_to_char_map[byte_pos]
|
|
112
128
|
end
|
|
113
129
|
|
|
114
130
|
last_meta_end ? head_start + last_meta_end : nil
|
|
@@ -117,14 +133,18 @@ module Rhales
|
|
|
117
133
|
def find_after_first_script(template_html, head_start, head_end)
|
|
118
134
|
head_content = template_html[head_start...head_end]
|
|
119
135
|
scanner = StringScanner.new(head_content)
|
|
136
|
+
byte_to_char_map = build_byte_to_char_map(head_content)
|
|
120
137
|
|
|
121
138
|
# Find first script opening tag
|
|
122
139
|
if scanner.scan_until(/<script\b[^>]*>/i)
|
|
123
|
-
|
|
140
|
+
# Only the script end position is needed for this method, not the start position
|
|
124
141
|
|
|
125
142
|
# Find corresponding closing tag
|
|
126
143
|
if scanner.scan_until(/<\/script>/i)
|
|
127
|
-
|
|
144
|
+
# scanner.pos is byte position within head_content
|
|
145
|
+
byte_script_end = scanner.pos
|
|
146
|
+
# Convert to character position using pre-built map
|
|
147
|
+
first_script_end = byte_to_char_map[byte_script_end]
|
|
128
148
|
return head_start + first_script_end
|
|
129
149
|
end
|
|
130
150
|
end
|
|
@@ -149,5 +169,43 @@ module Rhales
|
|
|
149
169
|
# No safe position found
|
|
150
170
|
nil
|
|
151
171
|
end
|
|
172
|
+
|
|
173
|
+
# Builds a mapping from byte positions to character positions for efficient
|
|
174
|
+
# conversion when processing UTF-8 strings with StringScanner.
|
|
175
|
+
#
|
|
176
|
+
# This method creates a hash where keys are byte positions and values are
|
|
177
|
+
# the corresponding character positions. For multibyte UTF-8 characters,
|
|
178
|
+
# only the starting byte position has an entry in the map.
|
|
179
|
+
#
|
|
180
|
+
# @param str [String] The UTF-8 encoded string to map
|
|
181
|
+
# @return [Hash<Integer, Integer>] A hash mapping byte positions to character positions
|
|
182
|
+
#
|
|
183
|
+
# @example ASCII string
|
|
184
|
+
# build_byte_to_char_map("Hello")
|
|
185
|
+
# # => {0=>0, 1=>1, 2=>2, 3=>3, 4=>4, 5=>5}
|
|
186
|
+
#
|
|
187
|
+
# @example UTF-8 with multibyte characters
|
|
188
|
+
# build_byte_to_char_map("café") # é is 2 bytes
|
|
189
|
+
# # => {0=>0, 1=>1, 2=>2, 3=>3, 5=>4} # Note: byte 4 is continuation byte
|
|
190
|
+
#
|
|
191
|
+
def build_byte_to_char_map(str)
|
|
192
|
+
map = {}
|
|
193
|
+
char_pos = 0
|
|
194
|
+
byte_pos = 0
|
|
195
|
+
|
|
196
|
+
# Iterate through each character (not byte) in the string
|
|
197
|
+
str.each_char do |char|
|
|
198
|
+
# Map the starting byte position of this character
|
|
199
|
+
map[byte_pos] = char_pos
|
|
200
|
+
|
|
201
|
+
# Advance byte position by the byte size of this character
|
|
202
|
+
byte_pos += char.bytesize
|
|
203
|
+
char_pos += 1
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Add final mapping for the end of the string
|
|
207
|
+
map[byte_pos] = char_pos
|
|
208
|
+
map
|
|
209
|
+
end
|
|
152
210
|
end
|
|
153
211
|
end
|
|
@@ -55,16 +55,21 @@ module Rhales
|
|
|
55
55
|
def calculate_unsafe_ranges
|
|
56
56
|
ranges = []
|
|
57
57
|
scanner = StringScanner.new(@html)
|
|
58
|
+
byte_to_char_map = build_byte_to_char_map(@html)
|
|
58
59
|
|
|
59
60
|
UNSAFE_CONTEXTS.each do |context|
|
|
60
61
|
scanner.pos = 0
|
|
61
62
|
|
|
62
63
|
while scanner.scan_until(context[:start])
|
|
63
|
-
|
|
64
|
+
# Convert byte position to character position using pre-built map
|
|
65
|
+
byte_start_pos = scanner.pos - scanner.matched.length
|
|
66
|
+
start_pos = byte_to_char_map[byte_start_pos]
|
|
64
67
|
|
|
65
68
|
# Find the corresponding end tag
|
|
66
69
|
if scanner.scan_until(context[:end])
|
|
67
|
-
|
|
70
|
+
# Convert byte position to character position using pre-built map
|
|
71
|
+
byte_end_pos = scanner.pos
|
|
72
|
+
end_pos = byte_to_char_map[byte_end_pos]
|
|
68
73
|
ranges << (start_pos...end_pos)
|
|
69
74
|
else
|
|
70
75
|
# If no closing tag found, consider rest of document unsafe
|
|
@@ -99,5 +104,43 @@ module Rhales
|
|
|
99
104
|
|
|
100
105
|
pos < @html.length && @html[pos] == '<'
|
|
101
106
|
end
|
|
107
|
+
|
|
108
|
+
# Builds a mapping from byte positions to character positions for efficient
|
|
109
|
+
# conversion when processing UTF-8 strings with StringScanner.
|
|
110
|
+
#
|
|
111
|
+
# This method creates a hash where keys are byte positions and values are
|
|
112
|
+
# the corresponding character positions. For multibyte UTF-8 characters,
|
|
113
|
+
# only the starting byte position has an entry in the map.
|
|
114
|
+
#
|
|
115
|
+
# @param str [String] The UTF-8 encoded string to map
|
|
116
|
+
# @return [Hash<Integer, Integer>] A hash mapping byte positions to character positions
|
|
117
|
+
#
|
|
118
|
+
# @example ASCII string
|
|
119
|
+
# build_byte_to_char_map("Hello")
|
|
120
|
+
# # => {0=>0, 1=>1, 2=>2, 3=>3, 4=>4, 5=>5}
|
|
121
|
+
#
|
|
122
|
+
# @example UTF-8 with multibyte characters
|
|
123
|
+
# build_byte_to_char_map("café") # é is 2 bytes
|
|
124
|
+
# # => {0=>0, 1=>1, 2=>2, 3=>3, 5=>4} # Note: byte 4 is continuation byte
|
|
125
|
+
#
|
|
126
|
+
def build_byte_to_char_map(str)
|
|
127
|
+
map = {}
|
|
128
|
+
char_pos = 0
|
|
129
|
+
byte_pos = 0
|
|
130
|
+
|
|
131
|
+
# Iterate through each character (not byte) in the string
|
|
132
|
+
str.each_char do |char|
|
|
133
|
+
# Map the starting byte position of this character
|
|
134
|
+
map[byte_pos] = char_pos
|
|
135
|
+
|
|
136
|
+
# Advance byte position by the byte size of this character
|
|
137
|
+
byte_pos += char.bytesize
|
|
138
|
+
char_pos += 1
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Add final mapping for the end of the string
|
|
142
|
+
map[byte_pos] = char_pos
|
|
143
|
+
map
|
|
144
|
+
end
|
|
102
145
|
end
|
|
103
146
|
end
|
data/lib/rhales/version.rb
CHANGED