gammo 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +32 -0
  3. data/Gemfile.lock +6 -6
  4. data/README.md +334 -10
  5. data/Rakefile +5 -1
  6. data/lib/gammo/attributes.rb +5 -0
  7. data/lib/gammo/css_selector/ast/combinator.rb +92 -0
  8. data/lib/gammo/css_selector/ast/selector/attrib_selector.rb +86 -0
  9. data/lib/gammo/css_selector/ast/selector/class_selector.rb +19 -0
  10. data/lib/gammo/css_selector/ast/selector/id_selector.rb +18 -0
  11. data/lib/gammo/css_selector/ast/selector/negation.rb +21 -0
  12. data/lib/gammo/css_selector/ast/selector/pseudo_class.rb +92 -0
  13. data/lib/gammo/css_selector/ast/selector.rb +100 -0
  14. data/lib/gammo/css_selector/context.rb +17 -0
  15. data/lib/gammo/css_selector/errors.rb +6 -0
  16. data/lib/gammo/css_selector/node_set.rb +44 -0
  17. data/lib/gammo/css_selector/parser.rb +790 -0
  18. data/lib/gammo/css_selector/parser.y +321 -0
  19. data/lib/gammo/css_selector.rb +33 -0
  20. data/lib/gammo/modules/subclassify.rb +31 -0
  21. data/lib/gammo/node.rb +2 -0
  22. data/lib/gammo/parser/foreign.rb +3 -3
  23. data/lib/gammo/parser/insertion_mode/after_after_body.rb +1 -1
  24. data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +1 -1
  25. data/lib/gammo/parser/insertion_mode/after_body.rb +1 -1
  26. data/lib/gammo/parser/insertion_mode/after_frameset.rb +1 -1
  27. data/lib/gammo/parser/insertion_mode/after_head.rb +1 -1
  28. data/lib/gammo/parser/insertion_mode/before_head.rb +1 -1
  29. data/lib/gammo/parser/insertion_mode/before_html.rb +1 -1
  30. data/lib/gammo/parser/insertion_mode/in_body.rb +1 -1
  31. data/lib/gammo/parser/insertion_mode/in_column_group.rb +1 -1
  32. data/lib/gammo/parser/insertion_mode/in_frameset.rb +1 -1
  33. data/lib/gammo/parser/insertion_mode/in_head.rb +3 -2
  34. data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +1 -1
  35. data/lib/gammo/parser/insertion_mode/in_select.rb +1 -1
  36. data/lib/gammo/parser/insertion_mode/in_table.rb +1 -1
  37. data/lib/gammo/parser/insertion_mode/in_template.rb +1 -1
  38. data/lib/gammo/parser/insertion_mode/initial.rb +1 -1
  39. data/lib/gammo/parser/insertion_mode/text.rb +1 -1
  40. data/lib/gammo/parser/insertion_mode.rb +1 -1
  41. data/lib/gammo/tokenizer/tokens.rb +10 -1
  42. data/lib/gammo/tokenizer.rb +10 -10
  43. data/lib/gammo/version.rb +1 -1
  44. data/lib/gammo/xpath/ast/axis.rb +1 -1
  45. data/lib/gammo/xpath/ast/expression.rb +2 -0
  46. data/lib/gammo/xpath/ast/function.rb +1 -1
  47. data/lib/gammo/xpath/ast/node_test.rb +1 -1
  48. data/lib/gammo/xpath/ast/path.rb +1 -0
  49. data/lib/gammo/xpath.rb +4 -5
  50. metadata +17 -4
  51. data/.travis.yml +0 -6
  52. data/lib/gammo/xpath/ast/subclassify.rb +0 -35
@@ -0,0 +1,321 @@
1
+ class Gammo::CSSSelector::Parser
2
+
3
+ token T_COMMA
4
+ T_PLUS
5
+ T_MINUS
6
+ T_HYPHEN
7
+ T_DIMENSION
8
+ T_NUMBER
9
+ T_STRING
10
+ T_IDENT
11
+ T_NOT
12
+ T_RBRACK
13
+ T_HASH
14
+ T_S
15
+ T_GREATER
16
+ T_TILDE
17
+ T_DOT
18
+ T_PIPE
19
+ T_ASTERISK
20
+ T_LBRACK
21
+ T_PREFIXMATCH
22
+ T_SUFFIXMATCH
23
+ T_SUBSTRINGMATCH
24
+ T_EQUAL
25
+ T_INCLUDES
26
+ T_DASHMATCH
27
+ T_COLON
28
+ T_FUNCTION
29
+ T_RPAREN
30
+ T_ASTERISK
31
+
32
+ start selectors_group
33
+
34
+ rule
35
+ optional_whitespaces:
36
+ | repeatable_whitespaces
37
+
38
+ repeatable_whitespaces:
39
+ T_S
40
+ | repeatable_whitespaces T_S
41
+
42
+ # selectors_group: selector [ COMMA S* selector ]*
43
+ selectors_group:
44
+ repeatable_selectors { result = val[0] }
45
+
46
+ repeatable_selectors:
47
+ selector {
48
+ result = AST::SelectorsGroup.new
49
+ result << val[0]
50
+ }
51
+ | repeatable_selectors optional_whitespaces T_COMMA optional_whitespaces selector {
52
+ result = val[0]
53
+ result << val[4]
54
+ }
55
+
56
+ # selector: simple_selector_sequence [ combinator simple_selector_sequence ]*
57
+ selector:
58
+ repeatable_simple_selector_sequence
59
+
60
+ # combinators can be surrounded by whitespace
61
+ # combinator: PLUS S* | GREATER S* | TILDE S* | S+
62
+ combinator:
63
+ optional_whitespaces T_PLUS optional_whitespaces { result = :next_sibling }
64
+ | optional_whitespaces T_GREATER optional_whitespaces { result = :child }
65
+ | optional_whitespaces T_TILDE optional_whitespaces { result = :subsequent_sibling }
66
+ | repeatable_whitespaces { result = :descendant }
67
+
68
+ repeatable_simple_selector_sequence:
69
+ simple_selector_sequence {
70
+ result = val[0]
71
+ }
72
+ | repeatable_simple_selector_sequence combinator simple_selector_sequence {
73
+ result = val[0]
74
+ result.combine(AST::Combinator.fetch(val[1]).new(val[2]))
75
+ }
76
+
77
+ # simple_selector_sequence:
78
+ # [ type_selector | universal ]
79
+ # [ HASH | class | attrib | pseudo | negation ]*
80
+ # | [ HASH | class | attrib | pseudo | negation ]+
81
+ simple_selector_sequence:
82
+ type_selector
83
+ | universal
84
+ | type_selector repeatable_selector_operators {
85
+ val[0].selectors.concat(val[1])
86
+ result = val[0]
87
+ }
88
+ | universal repeatable_selector_operators {
89
+ val[0].selectors.concat(val[1])
90
+ result = val[0]
91
+ }
92
+ | repeatable_selector_operators {
93
+ any = AST::Selector::Universal.new
94
+ any.selectors.concat(val[0])
95
+ result = any
96
+ }
97
+
98
+ repeatable_selector_operators:
99
+ selector_operators { result = [val[0]] }
100
+ | repeatable_selector_operators selector_operators { result = val[0] << val[1] }
101
+
102
+ selector_operators:
103
+ hash
104
+ | class
105
+ | attrib
106
+ | pseudo
107
+ | negation
108
+
109
+ # type_selector: [ namespace_prefix ]? element_name
110
+ type_selector:
111
+ element_name { result = AST::Selector::Type.new(element_name: val[0]) }
112
+ | namespace_prefix element_name { result = AST::Selector::Type.new(element_name: val[1], namespace_prefix: val[0]) }
113
+
114
+ # namespace_prefix: [ IDENT | '*' ]? '|'
115
+ namespace_prefix:
116
+ T_PIPE { result = val[0] }
117
+ | T_IDENT T_PIPE { result = val[0] }
118
+ | T_ASTERISK T_PIPE { result = val[1] }
119
+
120
+ # element_name: IDENT
121
+ element_name: T_IDENT { result = val[0] }
122
+
123
+ # universal: [ namespace_prefix ]? '*'
124
+ universal:
125
+ namespace_prefix T_ASTERISK { result = AST::Selector::Universal.new(namespace_prefix: val[0]) }
126
+ | T_ASTERISK { result = AST::Selector::Universal.new }
127
+
128
+ # class: '.' IDENT
129
+ class: T_DOT T_IDENT { result = AST::Selector::Class.new(val[1]) }
130
+
131
+ # attrib: '[' S* [ namespace_prefix ]? IDENT S*
132
+ # [ [ PREFIXMATCH |
133
+ # SUFFIXMATCH |
134
+ # SUBSTRINGMATCH |
135
+ # '=' |
136
+ # INCLUDES |
137
+ # DASHMATCH ] S* [ IDENT | STRING ] S*
138
+ # ]? ']'
139
+ attrib:
140
+ T_LBRACK optional_whitespaces namespace_prefix T_IDENT optional_whitespaces optional_attrib_clause T_RBRACK {
141
+ op, value = val[4]
142
+ result = AST::Selector::Attrib.fetch(op).new(key: val[2], value: value, namespace_prefix: val[2])
143
+ }
144
+ | T_LBRACK optional_whitespaces T_IDENT optional_whitespaces optional_attrib_clause T_RBRACK {
145
+ op, value = val[4]
146
+ result = AST::Selector::Attrib.fetch(op).new(key: val[2], value: value)
147
+ }
148
+
149
+ optional_attrib_clause:
150
+ | attrib_operators optional_whitespaces T_IDENT optional_whitespaces { result = [val[0], val[2]] }
151
+ | attrib_operators optional_whitespaces T_STRING optional_whitespaces { result = [val[0], val[2]] }
152
+
153
+ attrib_operators:
154
+ T_PREFIXMATCH { result = :prefix_match }
155
+ | T_SUFFIXMATCH { result = :suffix_match }
156
+ | T_SUBSTRINGMATCH { result = :substring_match }
157
+ | T_EQUAL { result = :equal }
158
+ | T_INCLUDES { result = :includes }
159
+ | T_DASHMATCH { result = :dash_match }
160
+
161
+ # pseudo: ':' ':'? [ IDENT | functional_pseudo ]
162
+ pseudo:
163
+ T_COLON optional_colon T_IDENT { result = AST::Selector::Pseudo.fetch(val[2]).new }
164
+ | T_COLON optional_colon functional_pseudo { result = val[2] }
165
+
166
+ optional_colon: | T_COLON
167
+
168
+ # functional_pseudo: FUNCTION S* expression ')'
169
+ functional_pseudo:
170
+ T_FUNCTION optional_whitespaces repeatable_expressions T_RPAREN { result = AST::Selector::Pseudo.fetch(val[0].slice(0..-2)).new(val[2]) }
171
+
172
+ # expression: [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+
173
+ expression:
174
+ T_PLUS | T_MINUS | T_HYPHEN | T_DIMENSION | T_NUMBER | T_STRING | T_IDENT { result = val[0] }
175
+
176
+ repeatable_expressions:
177
+ expression optional_whitespaces { result = [val[0]] }
178
+ | repeatable_expressions expression optional_whitespaces {
179
+ val[0] << val[1]
180
+ result = val[0]
181
+ }
182
+
183
+ # negation: NOT S* negation_arg S* ')'
184
+ negation:
185
+ T_NOT optional_whitespaces negation_arg optional_whitespaces T_RPAREN { result = AST::Selector::Negation.new(val[2]) }
186
+
187
+ # negation_arg: type_selector | universal | HASH | class | attrib | pseudo
188
+ negation_arg:
189
+ type_selector | universal | hash | class | attrib | pseudo
190
+
191
+ hash:
192
+ T_HASH { result = AST::Selector::ID.new(val[0]) }
193
+
194
+ end
195
+
196
+ ---- inner
197
+
198
+ NONASCII = /[^\0-\177]/
199
+ UNICODE = /\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?/
200
+ ESCAPE = /#{UNICODE}|\\[^\n\r\f0-9a-f]/
201
+ NMCHAR = /[_a-z0-9-]|#{NONASCII}|#{ESCAPE}/
202
+ NMSTART = /[_a-z]|#{NONASCII}|#{ESCAPE}/
203
+ NUM = /[0-9]+|[0-9]*\.[0-9]+/
204
+ NAME = /#{NMCHAR}+/
205
+ IDENT = /[-]?#{NMSTART}#{NMCHAR}*/
206
+ NL = /\n|\r\n|\r|\f/
207
+ STRING1 = /\"([^\n\r\f\\"]|\\#{NL}|#{NONASCII}|#{ESCAPE})*\"/
208
+ STRING2 = /\'([^\n\r\f\\']|\\#{NL}|#{NONASCII}|#{ESCAPE})*\'/
209
+ STRING = /#{STRING1}|#{STRING2}/
210
+ INVALID1 = /\"([^\n\r\f\\"]|\\#{NL}|#{NONASCII}|#{ESCAPE})*/
211
+ INVALID2 = /\'([^\n\r\f\\']|\\#{NL}|#{NONASCII}|#{ESCAPE})*/
212
+ INVALID = /#{INVALID1}|#{INVALID2}/
213
+ W = /[ \t\r\n\f]*/
214
+ D = /d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?/
215
+ E = /e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?/
216
+ N = /n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n/
217
+ O = /o|\\0{0,4}(4f|6f)(\r\n|[ \t\r\n\f])?|\\o/
218
+ T = /t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t/
219
+ V = /v|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\v/
220
+ S = /[ \t\r\n\f]+/
221
+
222
+ require 'strscan'
223
+ require 'forwardable'
224
+ require 'gammo/css_selector/errors'
225
+ require 'gammo/css_selector/ast/selector'
226
+ require 'gammo/css_selector/ast/combinator'
227
+
228
+ extend Forwardable
229
+ def_delegators :@scanner, :scan, :eos?
230
+
231
+ def initialize(input)
232
+ super()
233
+ @yydebug = true
234
+ @input = input
235
+ @scanner = StringScanner.new(input)
236
+ end
237
+
238
+ def parse
239
+ @query = []
240
+ advance { |symbol, val| @query << [symbol, val] }
241
+ do_parse
242
+ end
243
+
244
+ def token(symbol, val, &block)
245
+ @prev_token = symbol
246
+ block.call(symbol, val)
247
+ end
248
+
249
+ def next_token
250
+ @query.shift
251
+ end
252
+
253
+ EXPR_TOKENS = {
254
+ '=' => :T_EQUAL,
255
+ '[' => :T_LBRACK,
256
+ ']' => :T_RBRACK,
257
+ ')' => :T_RPAREN,
258
+ '.' => :T_DOT,
259
+ ',' => :T_COMMA,
260
+ ':' => :T_COLON
261
+ }.freeze
262
+
263
+ # Declaring the regexp consisting of EXPR_TOKENS keys to keep the token order.
264
+ EXPRS = /=|\[|\]|@|,|\.|\)|\:/
265
+
266
+ def fetch(key, constraints)
267
+ unless symbol = constraints[key]
268
+ fail ParseError, "unexpected token: #{symbol}, want = #{constraints.keys}"
269
+ end
270
+ yield symbol
271
+ end
272
+
273
+ LEXER_TOKENS = []
274
+ Pattern = Struct.new(:pattern, :token, :range)
275
+ def self.map(pattern, token, range: nil)
276
+ LEXER_TOKENS << Pattern.new(pattern, token, range)
277
+ end
278
+
279
+ map(S, :T_S)
280
+ map(/\~=/, :T_INCLUDES)
281
+ map(/\|=/, :T_DASHMATCH)
282
+ map(/\^=/, :T_PREFIXMATCH)
283
+ map(/\$=/, :T_SUFFIXMATCH)
284
+ map(/\*=/, :T_SUBSTRINGMATCH)
285
+ map(/<!--/, :T_CDO)
286
+ map(/-->/, :T_CDC)
287
+ map(/#{IDENT}\(/, :T_FUNCTION)
288
+ map(/#{NUM}%/, :T_PERCENTAGE)
289
+ map(/#{NUM}#{IDENT}/, :T_DIMENSION)
290
+ map(IDENT, :T_IDENT)
291
+ map(STRING, :T_STRING, range: 1..-2) # Remove quotes
292
+ map(NUM, :T_NUMBER)
293
+ map(/##{NAME}/, :T_HASH, range: 1..-1) # Remove hash ('#')
294
+ map(/#{W}\+/, :T_PLUS)
295
+ map(/#{W}\-/, :T_MINUS)
296
+ map(/#{W}>/, :T_GREATER)
297
+ map(/#{W},/, :T_COMMA)
298
+ map(/#{W}~/, :T_TILDE)
299
+ map(/:#{N}#{O}#{T}\(/, :T_NOT)
300
+ map(/@#{IDENT}/, :T_ATKEYWORD)
301
+ map(/#{INVALID}/, :T_INVALID)
302
+ map(/\|/, :T_PIPE)
303
+ map(/\*/, :T_ASTERISK)
304
+
305
+ # TODO: ignore comment token
306
+ def advance(&block)
307
+ @prev_token = nil
308
+ until eos?
309
+ next if LEXER_TOKENS.find do |pattern|
310
+ next false unless matched = scan(pattern.pattern)
311
+ matched = matched[pattern.range] if pattern.range
312
+ token pattern.token, matched, &block
313
+ break true
314
+ end
315
+ if expr = scan(EXPRS)
316
+ fetch(expr, EXPR_TOKENS) { |symbol| token symbol, expr, &block }
317
+ next
318
+ end
319
+ fail ParseError, "unexpected token: '#{@scanner.string[@scanner.pos..-1]}'"
320
+ end
321
+ end
@@ -0,0 +1,33 @@
1
+ require 'gammo/css_selector/context'
2
+ require 'gammo/css_selector/parser'
3
+
4
+ module Gammo
5
+ module CSSSelector
6
+ # Class for traversing DOM tree built by Gammo::Parser by a given expresison.
7
+ # @!visibility private
8
+ class Traverser
9
+ # Constructs an instance of Gammo::CSSSelector::Traverser.
10
+ # @param [String] expr
11
+ # @!visibility private
12
+ def initialize(expr)
13
+ @expr = expr
14
+ end
15
+
16
+ # Evaluates a given expression and returns a node set.
17
+ # @param [Gammo::CSSSelector::Context] context
18
+ # @return [Gammo::CSSSelector::NodeSet]
19
+ # @!visibility private
20
+ def evaluate(context)
21
+ Parser.new(@expr).parse.evaluate(context)
22
+ end
23
+ end
24
+
25
+ # Traverses DOM tree by a given expression, and returns a node set.
26
+ # @param [String] expr
27
+ # @return [Gammo::CSSSelector::NodeSet]
28
+ def query_selector_all(expr)
29
+ Traverser.new(expr).evaluate(Context.new(node: self))
30
+ end
31
+ alias_method :css, :query_selector_all
32
+ end
33
+ end
@@ -0,0 +1,31 @@
1
+ module Gammo
2
+ # Class for making subclass declarable/fetchable
3
+ # @!visibility private
4
+ module Subclassify
5
+ NotFoundError = Class.new(ArgumentError)
6
+
7
+ # @!visibility private
8
+ def map
9
+ @map ||= {}
10
+ end
11
+
12
+ # @!visibility private
13
+ def declare(key)
14
+ look_for_superclass.map[key] = self
15
+ end
16
+
17
+ # @!visibility private
18
+ def fetch(key)
19
+ fail NotFoundError, "%s not found" % key unless klass = map[key.to_sym]
20
+ klass
21
+ end
22
+
23
+ private
24
+
25
+ # @!visibility private
26
+ def look_for_superclass
27
+ klass = superclass
28
+ ancestors.find { |ancestor| ancestor == klass }
29
+ end
30
+ end
31
+ end
data/lib/gammo/node.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require 'gammo/css_selector'
1
2
  require 'gammo/xpath'
2
3
  require 'gammo/attributes'
3
4
 
@@ -58,6 +59,7 @@ module Gammo
58
59
  # Represents the root document token.
59
60
  class Document < Node
60
61
  include XPath
62
+ include CSSSelector
61
63
  end
62
64
 
63
65
  # Represents the element token including start, end and self-closing token.
@@ -177,7 +177,7 @@ module Gammo
177
177
 
178
178
  def parse_foreign_content
179
179
  case token
180
- when Tokenizer::TextToken
180
+ when Tokenizer::CharacterToken
181
181
  self.frameset_ok = token.data.lstrip.sub(/\A\x00*/, '').lstrip.empty? if frameset_ok
182
182
  token.data = token.data.gsub(/\x00/, "\ufffd")
183
183
  add_text token.data
@@ -249,11 +249,11 @@ module Gammo
249
249
  if math_ml_text_integration_point?(node)
250
250
  return false if token.instance_of?(Tokenizer::StartTagToken) && token.tag != Tags::Mglyph &&
251
251
  token.tag != Tags::Malignmark
252
- return false if token.instance_of?(Tokenizer::TextToken)
252
+ return false if token.instance_of?(Tokenizer::CharacterToken)
253
253
  end
254
254
  return false if node.namespace == 'math' && node.tag == Tags::AnnotationXml && \
255
255
  token.instance_of?(Tokenizer::StartTagToken) && token.tag == Tags::Svg
256
- return false if html_integration_point?(node) && (token.instance_of?(Tokenizer::StartTagToken) || token.instance_of?(Tokenizer::TextToken))
256
+ return false if html_integration_point?(node) && (token.instance_of?(Tokenizer::StartTagToken) || token.instance_of?(Tokenizer::CharacterToken))
257
257
  return false if token.instance_of? Tokenizer::ErrorToken
258
258
  true
259
259
  end
@@ -7,7 +7,7 @@ module Gammo
7
7
  halt true
8
8
  end
9
9
 
10
- def text_token(token)
10
+ def character_token(token)
11
11
  halt InBody.new(parser).process if token.data.lstrip.length.zero?
12
12
  end
13
13
 
@@ -6,7 +6,7 @@ module Gammo
6
6
  parser.document.append_child Node::Comment.new(data: token.data)
7
7
  end
8
8
 
9
- def text_token(token)
9
+ def character_token(token)
10
10
  halt InBody.new(parser).process unless token.data.gsub(/[^\s]/, '').empty?
11
11
  end
12
12
 
@@ -8,7 +8,7 @@ module Gammo
8
8
  true
9
9
  end
10
10
 
11
- def text_token(token)
11
+ def character_token(token)
12
12
  s = token.data.lstrip
13
13
  halt InBody.new(parser).process if s.length.zero?
14
14
  end
@@ -8,7 +8,7 @@ module Gammo
8
8
  parser.add_child Node::Comment.new(data: token.data)
9
9
  end
10
10
 
11
- def text_token(token)
11
+ def character_token(token)
12
12
  s = token.data.gsub(/[^\s]/, '')
13
13
  parser.add_text(s) unless s.empty?
14
14
  end
@@ -2,7 +2,7 @@ module Gammo
2
2
  class Parser
3
3
  # Section 12.2.6.4.5.
4
4
  class AfterHead < InsertionMode
5
- def text_token(token)
5
+ def character_token(token)
6
6
  s = token.data.lstrip
7
7
  if s.length < token.data.length
8
8
  # add the initial whitespace to the current node.
@@ -2,7 +2,7 @@ module Gammo
2
2
  class Parser
3
3
  # Section 12.2.6.4.3
4
4
  class BeforeHead < InsertionMode
5
- def text_token(token)
5
+ def character_token(token)
6
6
  token.data = token.data.lstrip
7
7
  halt true if token.data.length.zero?
8
8
  end
@@ -7,7 +7,7 @@ module Gammo
7
7
  halt true
8
8
  end
9
9
 
10
- def text_token(token)
10
+ def character_token(token)
11
11
  token.data = token.data.lstrip
12
12
  # it's all whitespace so ignore it.
13
13
  halt true if token.data.length.zero?
@@ -2,7 +2,7 @@ module Gammo
2
2
  class Parser
3
3
  # Section 12.2.6.4.6.
4
4
  class InBody < InsertionMode
5
- def text_token(token)
5
+ def character_token(token)
6
6
  data = token.data
7
7
  node = parser.open_elements.last
8
8
  case node.tag
@@ -2,7 +2,7 @@ module Gammo
2
2
  class Parser
3
3
  # Section 12.2.6.4.12.
4
4
  class InColumnGroup < InsertionMode
5
- def text_token(token)
5
+ def character_token(token)
6
6
  s = token.data.lstrip
7
7
  if s.length < token.data.length
8
8
  # add the initial whitespace to the current node.
@@ -6,7 +6,7 @@ module Gammo
6
6
  parser.add_child Node::Comment.new(data: token.data)
7
7
  end
8
8
 
9
- def text_token(token)
9
+ def character_token(token)
10
10
  text = token.data.each_char.with_object(String.new) { |c, s| s << c if c == ?\s }
11
11
  parser.add_text(text) if text != ''
12
12
  end
@@ -2,7 +2,7 @@ module Gammo
2
2
  class Parser
3
3
  # Section 12.2.6.4.4.
4
4
  class InHead < InsertionMode
5
- def text_token(token)
5
+ def character_token(token)
6
6
  s = token.data.lstrip
7
7
  if s.length < token.data.length
8
8
  # add the initial whitespace to the current node.
@@ -90,7 +90,8 @@ module Gammo
90
90
  end
91
91
 
92
92
  def default(_)
93
- parser.parse_implied_token(Tokenizer::EndTagToken, Tags::Head, Tags::Head.to_s)
93
+ parser.open_elements.pop
94
+ parser.insertion_mode = AfterHead
94
95
  halt false
95
96
  end
96
97
  end
@@ -36,7 +36,7 @@ module Gammo
36
36
  end
37
37
  end
38
38
 
39
- def text_token(token)
39
+ def character_token(token)
40
40
  halt InHead.new(parser).process if token.data.lstrip == ''
41
41
  end
42
42
 
@@ -2,7 +2,7 @@ module Gammo
2
2
  class Parser
3
3
  # Section 12.2.6.4.16.
4
4
  class InSelect < InsertionMode
5
- def text_token(token)
5
+ def character_token(token)
6
6
  parser.add_text token.data.gsub("\x00",'')
7
7
  end
8
8
 
@@ -2,7 +2,7 @@ module Gammo
2
2
  class Parser
3
3
  # Section 12.2.6.4.9.
4
4
  class InTable < InsertionMode
5
- def text_token(token)
5
+ def character_token(token)
6
6
  token.data = token.data.gsub("\x00", "")
7
7
  case parser.open_elements.last.tag
8
8
  when Tags::Table, Tags::Tbody, Tags::Tfoot, Tags::Thead, Tags::Tr
@@ -2,7 +2,7 @@ module Gammo
2
2
  class Parser
3
3
  # Section 12.2.6.4.18.
4
4
  class InTemplate < InsertionMode
5
- def text_token(token)
5
+ def character_token(token)
6
6
  halt InBody.new(parser).process
7
7
  end
8
8
 
@@ -4,7 +4,7 @@ module Gammo
4
4
  class Parser
5
5
  # Section 12.2.6.4.1
6
6
  class Initial < InsertionMode
7
- def text_token(token)
7
+ def character_token(token)
8
8
  token.data = token.data.lstrip
9
9
  # it's all whitespace so ignore it.
10
10
  halt true if token.data.length.zero?
@@ -6,7 +6,7 @@ module Gammo
6
6
  parser.open_elements.pop
7
7
  end
8
8
 
9
- def text_token(token)
9
+ def character_token(token)
10
10
  d = token.data
11
11
  n = parser.open_elements.last
12
12
  if n.tag == Tags::Textarea && n.first_child.nil?
@@ -12,7 +12,7 @@ module Gammo
12
12
  def process
13
13
  case token = parser.token
14
14
  when Tokenizer::ErrorToken then consume(:error_token)
15
- when Tokenizer::TextToken then consume(:text_token)
15
+ when Tokenizer::CharacterToken then consume(:character_token)
16
16
  when Tokenizer::StartTagToken then consume(:start_tag_token)
17
17
  when Tokenizer::EndTagToken then consume(:end_tag_token)
18
18
  when Tokenizer::SelfClosingTagToken then consume(:self_closing_tag_token)
@@ -14,6 +14,15 @@ module Gammo
14
14
  def concat(s)
15
15
  data << s
16
16
  end
17
+
18
+ def to_s
19
+ s = "<#{self.class}"
20
+ members = []
21
+ members << "tag=\"#{tag}\"" if tag
22
+ members << "data=\"#{data}\"" if data
23
+ members << "attributes=\"#{attributes}\"" if attributes && !attributes.empty?
24
+ "<#{self.class} #{members.join(' ')}>"
25
+ end
17
26
  end
18
27
 
19
28
  class EscapedToken < BaseToken
@@ -56,7 +65,7 @@ module Gammo
56
65
  end
57
66
 
58
67
  ErrorToken = Class.new(BaseToken)
59
- TextToken = Class.new(EscapedToken)
68
+ CharacterToken = Class.new(EscapedToken)
60
69
  StartTagToken = Class.new(BaseToken)
61
70
  EndTagToken = Class.new(BaseToken)
62
71
  SelfClosingTagToken = Class.new(BaseToken)