gammo 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +32 -0
- data/Gemfile.lock +6 -6
- data/README.md +334 -10
- data/Rakefile +5 -1
- data/lib/gammo/attributes.rb +5 -0
- data/lib/gammo/css_selector/ast/combinator.rb +92 -0
- data/lib/gammo/css_selector/ast/selector/attrib_selector.rb +86 -0
- data/lib/gammo/css_selector/ast/selector/class_selector.rb +19 -0
- data/lib/gammo/css_selector/ast/selector/id_selector.rb +18 -0
- data/lib/gammo/css_selector/ast/selector/negation.rb +21 -0
- data/lib/gammo/css_selector/ast/selector/pseudo_class.rb +92 -0
- data/lib/gammo/css_selector/ast/selector.rb +100 -0
- data/lib/gammo/css_selector/context.rb +17 -0
- data/lib/gammo/css_selector/errors.rb +6 -0
- data/lib/gammo/css_selector/node_set.rb +44 -0
- data/lib/gammo/css_selector/parser.rb +790 -0
- data/lib/gammo/css_selector/parser.y +321 -0
- data/lib/gammo/css_selector.rb +33 -0
- data/lib/gammo/modules/subclassify.rb +31 -0
- data/lib/gammo/node.rb +2 -0
- data/lib/gammo/parser/foreign.rb +3 -3
- data/lib/gammo/parser/insertion_mode/after_after_body.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_body.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_frameset.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_head.rb +1 -1
- data/lib/gammo/parser/insertion_mode/before_head.rb +1 -1
- data/lib/gammo/parser/insertion_mode/before_html.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_body.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_column_group.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_frameset.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_head.rb +3 -2
- data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_select.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_table.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_template.rb +1 -1
- data/lib/gammo/parser/insertion_mode/initial.rb +1 -1
- data/lib/gammo/parser/insertion_mode/text.rb +1 -1
- data/lib/gammo/parser/insertion_mode.rb +1 -1
- data/lib/gammo/tokenizer/tokens.rb +10 -1
- data/lib/gammo/tokenizer.rb +10 -10
- data/lib/gammo/version.rb +1 -1
- data/lib/gammo/xpath/ast/axis.rb +1 -1
- data/lib/gammo/xpath/ast/expression.rb +2 -0
- data/lib/gammo/xpath/ast/function.rb +1 -1
- data/lib/gammo/xpath/ast/node_test.rb +1 -1
- data/lib/gammo/xpath/ast/path.rb +1 -0
- data/lib/gammo/xpath.rb +4 -5
- metadata +17 -4
- data/.travis.yml +0 -6
- data/lib/gammo/xpath/ast/subclassify.rb +0 -35
@@ -0,0 +1,321 @@
|
|
1
|
+
class Gammo::CSSSelector::Parser
|
2
|
+
|
3
|
+
token T_COMMA
|
4
|
+
T_PLUS
|
5
|
+
T_MINUS
|
6
|
+
T_HYPHEN
|
7
|
+
T_DIMENSION
|
8
|
+
T_NUMBER
|
9
|
+
T_STRING
|
10
|
+
T_IDENT
|
11
|
+
T_NOT
|
12
|
+
T_RBRACK
|
13
|
+
T_HASH
|
14
|
+
T_S
|
15
|
+
T_GREATER
|
16
|
+
T_TILDE
|
17
|
+
T_DOT
|
18
|
+
T_PIPE
|
19
|
+
T_ASTERISK
|
20
|
+
T_LBRACK
|
21
|
+
T_PREFIXMATCH
|
22
|
+
T_SUFFIXMATCH
|
23
|
+
T_SUBSTRINGMATCH
|
24
|
+
T_EQUAL
|
25
|
+
T_INCLUDES
|
26
|
+
T_DASHMATCH
|
27
|
+
T_COLON
|
28
|
+
T_FUNCTION
|
29
|
+
T_RPAREN
|
30
|
+
T_ASTERISK
|
31
|
+
|
32
|
+
start selectors_group
|
33
|
+
|
34
|
+
rule
|
35
|
+
optional_whitespaces:
|
36
|
+
| repeatable_whitespaces
|
37
|
+
|
38
|
+
repeatable_whitespaces:
|
39
|
+
T_S
|
40
|
+
| repeatable_whitespaces T_S
|
41
|
+
|
42
|
+
# selectors_group: selector [ COMMA S* selector ]*
|
43
|
+
selectors_group:
|
44
|
+
repeatable_selectors { result = val[0] }
|
45
|
+
|
46
|
+
repeatable_selectors:
|
47
|
+
selector {
|
48
|
+
result = AST::SelectorsGroup.new
|
49
|
+
result << val[0]
|
50
|
+
}
|
51
|
+
| repeatable_selectors optional_whitespaces T_COMMA optional_whitespaces selector {
|
52
|
+
result = val[0]
|
53
|
+
result << val[4]
|
54
|
+
}
|
55
|
+
|
56
|
+
# selector: simple_selector_sequence [ combinator simple_selector_sequence ]*
|
57
|
+
selector:
|
58
|
+
repeatable_simple_selector_sequence
|
59
|
+
|
60
|
+
# combinators can be surrounded by whitespace
|
61
|
+
# combinator: PLUS S* | GREATER S* | TILDE S* | S+
|
62
|
+
combinator:
|
63
|
+
optional_whitespaces T_PLUS optional_whitespaces { result = :next_sibling }
|
64
|
+
| optional_whitespaces T_GREATER optional_whitespaces { result = :child }
|
65
|
+
| optional_whitespaces T_TILDE optional_whitespaces { result = :subsequent_sibling }
|
66
|
+
| repeatable_whitespaces { result = :descendant }
|
67
|
+
|
68
|
+
repeatable_simple_selector_sequence:
|
69
|
+
simple_selector_sequence {
|
70
|
+
result = val[0]
|
71
|
+
}
|
72
|
+
| repeatable_simple_selector_sequence combinator simple_selector_sequence {
|
73
|
+
result = val[0]
|
74
|
+
result.combine(AST::Combinator.fetch(val[1]).new(val[2]))
|
75
|
+
}
|
76
|
+
|
77
|
+
# simple_selector_sequence:
|
78
|
+
# [ type_selector | universal ]
|
79
|
+
# [ HASH | class | attrib | pseudo | negation ]*
|
80
|
+
# | [ HASH | class | attrib | pseudo | negation ]+
|
81
|
+
simple_selector_sequence:
|
82
|
+
type_selector
|
83
|
+
| universal
|
84
|
+
| type_selector repeatable_selector_operators {
|
85
|
+
val[0].selectors.concat(val[1])
|
86
|
+
result = val[0]
|
87
|
+
}
|
88
|
+
| universal repeatable_selector_operators {
|
89
|
+
val[0].selectors.concat(val[1])
|
90
|
+
result = val[0]
|
91
|
+
}
|
92
|
+
| repeatable_selector_operators {
|
93
|
+
any = AST::Selector::Universal.new
|
94
|
+
any.selectors.concat(val[0])
|
95
|
+
result = any
|
96
|
+
}
|
97
|
+
|
98
|
+
repeatable_selector_operators:
|
99
|
+
selector_operators { result = [val[0]] }
|
100
|
+
| repeatable_selector_operators selector_operators { result = val[0] << val[1] }
|
101
|
+
|
102
|
+
selector_operators:
|
103
|
+
hash
|
104
|
+
| class
|
105
|
+
| attrib
|
106
|
+
| pseudo
|
107
|
+
| negation
|
108
|
+
|
109
|
+
# type_selector: [ namespace_prefix ]? element_name
|
110
|
+
type_selector:
|
111
|
+
element_name { result = AST::Selector::Type.new(element_name: val[0]) }
|
112
|
+
| namespace_prefix element_name { result = AST::Selector::Type.new(element_name: val[1], namespace_prefix: val[0]) }
|
113
|
+
|
114
|
+
# namespace_prefix: [ IDENT | '*' ]? '|'
|
115
|
+
namespace_prefix:
|
116
|
+
T_PIPE { result = val[0] }
|
117
|
+
| T_IDENT T_PIPE { result = val[0] }
|
118
|
+
| T_ASTERISK T_PIPE { result = val[1] }
|
119
|
+
|
120
|
+
# element_name: IDENT
|
121
|
+
element_name: T_IDENT { result = val[0] }
|
122
|
+
|
123
|
+
# universal: [ namespace_prefix ]? '*'
|
124
|
+
universal:
|
125
|
+
namespace_prefix T_ASTERISK { result = AST::Selector::Universal.new(namespace_prefix: val[0]) }
|
126
|
+
| T_ASTERISK { result = AST::Selector::Universal.new }
|
127
|
+
|
128
|
+
# class: '.' IDENT
|
129
|
+
class: T_DOT T_IDENT { result = AST::Selector::Class.new(val[1]) }
|
130
|
+
|
131
|
+
# attrib: '[' S* [ namespace_prefix ]? IDENT S*
|
132
|
+
# [ [ PREFIXMATCH |
|
133
|
+
# SUFFIXMATCH |
|
134
|
+
# SUBSTRINGMATCH |
|
135
|
+
# '=' |
|
136
|
+
# INCLUDES |
|
137
|
+
# DASHMATCH ] S* [ IDENT | STRING ] S*
|
138
|
+
# ]? ']'
|
139
|
+
attrib:
|
140
|
+
T_LBRACK optional_whitespaces namespace_prefix T_IDENT optional_whitespaces optional_attrib_clause T_RBRACK {
|
141
|
+
op, value = val[4]
|
142
|
+
result = AST::Selector::Attrib.fetch(op).new(key: val[2], value: value, namespace_prefix: val[2])
|
143
|
+
}
|
144
|
+
| T_LBRACK optional_whitespaces T_IDENT optional_whitespaces optional_attrib_clause T_RBRACK {
|
145
|
+
op, value = val[4]
|
146
|
+
result = AST::Selector::Attrib.fetch(op).new(key: val[2], value: value)
|
147
|
+
}
|
148
|
+
|
149
|
+
optional_attrib_clause:
|
150
|
+
| attrib_operators optional_whitespaces T_IDENT optional_whitespaces { result = [val[0], val[2]] }
|
151
|
+
| attrib_operators optional_whitespaces T_STRING optional_whitespaces { result = [val[0], val[2]] }
|
152
|
+
|
153
|
+
attrib_operators:
|
154
|
+
T_PREFIXMATCH { result = :prefix_match }
|
155
|
+
| T_SUFFIXMATCH { result = :suffix_match }
|
156
|
+
| T_SUBSTRINGMATCH { result = :substring_match }
|
157
|
+
| T_EQUAL { result = :equal }
|
158
|
+
| T_INCLUDES { result = :includes }
|
159
|
+
| T_DASHMATCH { result = :dash_match }
|
160
|
+
|
161
|
+
# pseudo: ':' ':'? [ IDENT | functional_pseudo ]
|
162
|
+
pseudo:
|
163
|
+
T_COLON optional_colon T_IDENT { result = AST::Selector::Pseudo.fetch(val[2]).new }
|
164
|
+
| T_COLON optional_colon functional_pseudo { result = val[2] }
|
165
|
+
|
166
|
+
optional_colon: | T_COLON
|
167
|
+
|
168
|
+
# functional_pseudo: FUNCTION S* expression ')'
|
169
|
+
functional_pseudo:
|
170
|
+
T_FUNCTION optional_whitespaces repeatable_expressions T_RPAREN { result = AST::Selector::Pseudo.fetch(val[0].slice(0..-2)).new(val[2]) }
|
171
|
+
|
172
|
+
# expression: [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+
|
173
|
+
expression:
|
174
|
+
T_PLUS | T_MINUS | T_HYPHEN | T_DIMENSION | T_NUMBER | T_STRING | T_IDENT { result = val[0] }
|
175
|
+
|
176
|
+
repeatable_expressions:
|
177
|
+
expression optional_whitespaces { result = [val[0]] }
|
178
|
+
| repeatable_expressions expression optional_whitespaces {
|
179
|
+
val[0] << val[1]
|
180
|
+
result = val[0]
|
181
|
+
}
|
182
|
+
|
183
|
+
# negation: NOT S* negation_arg S* ')'
|
184
|
+
negation:
|
185
|
+
T_NOT optional_whitespaces negation_arg optional_whitespaces T_RPAREN { result = AST::Selector::Negation.new(val[2]) }
|
186
|
+
|
187
|
+
# negation_arg: type_selector | universal | HASH | class | attrib | pseudo
|
188
|
+
negation_arg:
|
189
|
+
type_selector | universal | hash | class | attrib | pseudo
|
190
|
+
|
191
|
+
hash:
|
192
|
+
T_HASH { result = AST::Selector::ID.new(val[0]) }
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
---- inner
|
197
|
+
|
198
|
+
NONASCII = /[^\0-\177]/
|
199
|
+
UNICODE = /\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?/
|
200
|
+
ESCAPE = /#{UNICODE}|\\[^\n\r\f0-9a-f]/
|
201
|
+
NMCHAR = /[_a-z0-9-]|#{NONASCII}|#{ESCAPE}/
|
202
|
+
NMSTART = /[_a-z]|#{NONASCII}|#{ESCAPE}/
|
203
|
+
NUM = /[0-9]+|[0-9]*\.[0-9]+/
|
204
|
+
NAME = /#{NMCHAR}+/
|
205
|
+
IDENT = /[-]?#{NMSTART}#{NMCHAR}*/
|
206
|
+
NL = /\n|\r\n|\r|\f/
|
207
|
+
STRING1 = /\"([^\n\r\f\\"]|\\#{NL}|#{NONASCII}|#{ESCAPE})*\"/
|
208
|
+
STRING2 = /\'([^\n\r\f\\']|\\#{NL}|#{NONASCII}|#{ESCAPE})*\'/
|
209
|
+
STRING = /#{STRING1}|#{STRING2}/
|
210
|
+
INVALID1 = /\"([^\n\r\f\\"]|\\#{NL}|#{NONASCII}|#{ESCAPE})*/
|
211
|
+
INVALID2 = /\'([^\n\r\f\\']|\\#{NL}|#{NONASCII}|#{ESCAPE})*/
|
212
|
+
INVALID = /#{INVALID1}|#{INVALID2}/
|
213
|
+
W = /[ \t\r\n\f]*/
|
214
|
+
D = /d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?/
|
215
|
+
E = /e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?/
|
216
|
+
N = /n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n/
|
217
|
+
O = /o|\\0{0,4}(4f|6f)(\r\n|[ \t\r\n\f])?|\\o/
|
218
|
+
T = /t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t/
|
219
|
+
V = /v|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\v/
|
220
|
+
S = /[ \t\r\n\f]+/
|
221
|
+
|
222
|
+
require 'strscan'
|
223
|
+
require 'forwardable'
|
224
|
+
require 'gammo/css_selector/errors'
|
225
|
+
require 'gammo/css_selector/ast/selector'
|
226
|
+
require 'gammo/css_selector/ast/combinator'
|
227
|
+
|
228
|
+
extend Forwardable
|
229
|
+
def_delegators :@scanner, :scan, :eos?
|
230
|
+
|
231
|
+
def initialize(input)
|
232
|
+
super()
|
233
|
+
@yydebug = true
|
234
|
+
@input = input
|
235
|
+
@scanner = StringScanner.new(input)
|
236
|
+
end
|
237
|
+
|
238
|
+
def parse
|
239
|
+
@query = []
|
240
|
+
advance { |symbol, val| @query << [symbol, val] }
|
241
|
+
do_parse
|
242
|
+
end
|
243
|
+
|
244
|
+
def token(symbol, val, &block)
|
245
|
+
@prev_token = symbol
|
246
|
+
block.call(symbol, val)
|
247
|
+
end
|
248
|
+
|
249
|
+
def next_token
|
250
|
+
@query.shift
|
251
|
+
end
|
252
|
+
|
253
|
+
EXPR_TOKENS = {
|
254
|
+
'=' => :T_EQUAL,
|
255
|
+
'[' => :T_LBRACK,
|
256
|
+
']' => :T_RBRACK,
|
257
|
+
')' => :T_RPAREN,
|
258
|
+
'.' => :T_DOT,
|
259
|
+
',' => :T_COMMA,
|
260
|
+
':' => :T_COLON
|
261
|
+
}.freeze
|
262
|
+
|
263
|
+
# Declaring the regexp consisting of EXPR_TOKENS keys to keep the token order.
|
264
|
+
EXPRS = /=|\[|\]|@|,|\.|\)|\:/
|
265
|
+
|
266
|
+
def fetch(key, constraints)
|
267
|
+
unless symbol = constraints[key]
|
268
|
+
fail ParseError, "unexpected token: #{symbol}, want = #{constraints.keys}"
|
269
|
+
end
|
270
|
+
yield symbol
|
271
|
+
end
|
272
|
+
|
273
|
+
LEXER_TOKENS = []
|
274
|
+
Pattern = Struct.new(:pattern, :token, :range)
|
275
|
+
def self.map(pattern, token, range: nil)
|
276
|
+
LEXER_TOKENS << Pattern.new(pattern, token, range)
|
277
|
+
end
|
278
|
+
|
279
|
+
map(S, :T_S)
|
280
|
+
map(/\~=/, :T_INCLUDES)
|
281
|
+
map(/\|=/, :T_DASHMATCH)
|
282
|
+
map(/\^=/, :T_PREFIXMATCH)
|
283
|
+
map(/\$=/, :T_SUFFIXMATCH)
|
284
|
+
map(/\*=/, :T_SUBSTRINGMATCH)
|
285
|
+
map(/<!--/, :T_CDO)
|
286
|
+
map(/-->/, :T_CDC)
|
287
|
+
map(/#{IDENT}\(/, :T_FUNCTION)
|
288
|
+
map(/#{NUM}%/, :T_PERCENTAGE)
|
289
|
+
map(/#{NUM}#{IDENT}/, :T_DIMENSION)
|
290
|
+
map(IDENT, :T_IDENT)
|
291
|
+
map(STRING, :T_STRING, range: 1..-2) # Remove quotes
|
292
|
+
map(NUM, :T_NUMBER)
|
293
|
+
map(/##{NAME}/, :T_HASH, range: 1..-1) # Remove hash ('#')
|
294
|
+
map(/#{W}\+/, :T_PLUS)
|
295
|
+
map(/#{W}\-/, :T_MINUS)
|
296
|
+
map(/#{W}>/, :T_GREATER)
|
297
|
+
map(/#{W},/, :T_COMMA)
|
298
|
+
map(/#{W}~/, :T_TILDE)
|
299
|
+
map(/:#{N}#{O}#{T}\(/, :T_NOT)
|
300
|
+
map(/@#{IDENT}/, :T_ATKEYWORD)
|
301
|
+
map(/#{INVALID}/, :T_INVALID)
|
302
|
+
map(/\|/, :T_PIPE)
|
303
|
+
map(/\*/, :T_ASTERISK)
|
304
|
+
|
305
|
+
# TODO: ignore comment token
|
306
|
+
def advance(&block)
|
307
|
+
@prev_token = nil
|
308
|
+
until eos?
|
309
|
+
next if LEXER_TOKENS.find do |pattern|
|
310
|
+
next false unless matched = scan(pattern.pattern)
|
311
|
+
matched = matched[pattern.range] if pattern.range
|
312
|
+
token pattern.token, matched, &block
|
313
|
+
break true
|
314
|
+
end
|
315
|
+
if expr = scan(EXPRS)
|
316
|
+
fetch(expr, EXPR_TOKENS) { |symbol| token symbol, expr, &block }
|
317
|
+
next
|
318
|
+
end
|
319
|
+
fail ParseError, "unexpected token: '#{@scanner.string[@scanner.pos..-1]}'"
|
320
|
+
end
|
321
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'gammo/css_selector/context'
|
2
|
+
require 'gammo/css_selector/parser'
|
3
|
+
|
4
|
+
module Gammo
|
5
|
+
module CSSSelector
|
6
|
+
# Class for traversing DOM tree built by Gammo::Parser by a given expresison.
|
7
|
+
# @!visibility private
|
8
|
+
class Traverser
|
9
|
+
# Constructs an instance of Gammo::CSSSelector::Traverser.
|
10
|
+
# @param [String] expr
|
11
|
+
# @!visibility private
|
12
|
+
def initialize(expr)
|
13
|
+
@expr = expr
|
14
|
+
end
|
15
|
+
|
16
|
+
# Evaluates a given expression and returns a node set.
|
17
|
+
# @param [Gammo::CSSSelector::Context] context
|
18
|
+
# @return [Gammo::CSSSelector::NodeSet]
|
19
|
+
# @!visibility private
|
20
|
+
def evaluate(context)
|
21
|
+
Parser.new(@expr).parse.evaluate(context)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Traverses DOM tree by a given expression, and returns a node set.
|
26
|
+
# @param [String] expr
|
27
|
+
# @return [Gammo::CSSSelector::NodeSet]
|
28
|
+
def query_selector_all(expr)
|
29
|
+
Traverser.new(expr).evaluate(Context.new(node: self))
|
30
|
+
end
|
31
|
+
alias_method :css, :query_selector_all
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Gammo
|
2
|
+
# Class for making subclass declarable/fetchable
|
3
|
+
# @!visibility private
|
4
|
+
module Subclassify
|
5
|
+
NotFoundError = Class.new(ArgumentError)
|
6
|
+
|
7
|
+
# @!visibility private
|
8
|
+
def map
|
9
|
+
@map ||= {}
|
10
|
+
end
|
11
|
+
|
12
|
+
# @!visibility private
|
13
|
+
def declare(key)
|
14
|
+
look_for_superclass.map[key] = self
|
15
|
+
end
|
16
|
+
|
17
|
+
# @!visibility private
|
18
|
+
def fetch(key)
|
19
|
+
fail NotFoundError, "%s not found" % key unless klass = map[key.to_sym]
|
20
|
+
klass
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
# @!visibility private
|
26
|
+
def look_for_superclass
|
27
|
+
klass = superclass
|
28
|
+
ancestors.find { |ancestor| ancestor == klass }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/lib/gammo/node.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'gammo/css_selector'
|
1
2
|
require 'gammo/xpath'
|
2
3
|
require 'gammo/attributes'
|
3
4
|
|
@@ -58,6 +59,7 @@ module Gammo
|
|
58
59
|
# Represents the root document token.
|
59
60
|
class Document < Node
|
60
61
|
include XPath
|
62
|
+
include CSSSelector
|
61
63
|
end
|
62
64
|
|
63
65
|
# Represents the element token including start, end and self-closing token.
|
data/lib/gammo/parser/foreign.rb
CHANGED
@@ -177,7 +177,7 @@ module Gammo
|
|
177
177
|
|
178
178
|
def parse_foreign_content
|
179
179
|
case token
|
180
|
-
when Tokenizer::
|
180
|
+
when Tokenizer::CharacterToken
|
181
181
|
self.frameset_ok = token.data.lstrip.sub(/\A\x00*/, '').lstrip.empty? if frameset_ok
|
182
182
|
token.data = token.data.gsub(/\x00/, "\ufffd")
|
183
183
|
add_text token.data
|
@@ -249,11 +249,11 @@ module Gammo
|
|
249
249
|
if math_ml_text_integration_point?(node)
|
250
250
|
return false if token.instance_of?(Tokenizer::StartTagToken) && token.tag != Tags::Mglyph &&
|
251
251
|
token.tag != Tags::Malignmark
|
252
|
-
return false if token.instance_of?(Tokenizer::
|
252
|
+
return false if token.instance_of?(Tokenizer::CharacterToken)
|
253
253
|
end
|
254
254
|
return false if node.namespace == 'math' && node.tag == Tags::AnnotationXml && \
|
255
255
|
token.instance_of?(Tokenizer::StartTagToken) && token.tag == Tags::Svg
|
256
|
-
return false if html_integration_point?(node) && (token.instance_of?(Tokenizer::StartTagToken) || token.instance_of?(Tokenizer::
|
256
|
+
return false if html_integration_point?(node) && (token.instance_of?(Tokenizer::StartTagToken) || token.instance_of?(Tokenizer::CharacterToken))
|
257
257
|
return false if token.instance_of? Tokenizer::ErrorToken
|
258
258
|
true
|
259
259
|
end
|
@@ -2,7 +2,7 @@ module Gammo
|
|
2
2
|
class Parser
|
3
3
|
# Section 12.2.6.4.4.
|
4
4
|
class InHead < InsertionMode
|
5
|
-
def
|
5
|
+
def character_token(token)
|
6
6
|
s = token.data.lstrip
|
7
7
|
if s.length < token.data.length
|
8
8
|
# add the initial whitespace to the current node.
|
@@ -90,7 +90,8 @@ module Gammo
|
|
90
90
|
end
|
91
91
|
|
92
92
|
def default(_)
|
93
|
-
parser.
|
93
|
+
parser.open_elements.pop
|
94
|
+
parser.insertion_mode = AfterHead
|
94
95
|
halt false
|
95
96
|
end
|
96
97
|
end
|
@@ -2,7 +2,7 @@ module Gammo
|
|
2
2
|
class Parser
|
3
3
|
# Section 12.2.6.4.9.
|
4
4
|
class InTable < InsertionMode
|
5
|
-
def
|
5
|
+
def character_token(token)
|
6
6
|
token.data = token.data.gsub("\x00", "")
|
7
7
|
case parser.open_elements.last.tag
|
8
8
|
when Tags::Table, Tags::Tbody, Tags::Tfoot, Tags::Thead, Tags::Tr
|
@@ -12,7 +12,7 @@ module Gammo
|
|
12
12
|
def process
|
13
13
|
case token = parser.token
|
14
14
|
when Tokenizer::ErrorToken then consume(:error_token)
|
15
|
-
when Tokenizer::
|
15
|
+
when Tokenizer::CharacterToken then consume(:character_token)
|
16
16
|
when Tokenizer::StartTagToken then consume(:start_tag_token)
|
17
17
|
when Tokenizer::EndTagToken then consume(:end_tag_token)
|
18
18
|
when Tokenizer::SelfClosingTagToken then consume(:self_closing_tag_token)
|
@@ -14,6 +14,15 @@ module Gammo
|
|
14
14
|
def concat(s)
|
15
15
|
data << s
|
16
16
|
end
|
17
|
+
|
18
|
+
def to_s
|
19
|
+
s = "<#{self.class}"
|
20
|
+
members = []
|
21
|
+
members << "tag=\"#{tag}\"" if tag
|
22
|
+
members << "data=\"#{data}\"" if data
|
23
|
+
members << "attributes=\"#{attributes}\"" if attributes && !attributes.empty?
|
24
|
+
"<#{self.class} #{members.join(' ')}>"
|
25
|
+
end
|
17
26
|
end
|
18
27
|
|
19
28
|
class EscapedToken < BaseToken
|
@@ -56,7 +65,7 @@ module Gammo
|
|
56
65
|
end
|
57
66
|
|
58
67
|
ErrorToken = Class.new(BaseToken)
|
59
|
-
|
68
|
+
CharacterToken = Class.new(EscapedToken)
|
60
69
|
StartTagToken = Class.new(BaseToken)
|
61
70
|
EndTagToken = Class.new(BaseToken)
|
62
71
|
SelfClosingTagToken = Class.new(BaseToken)
|