rubyjedi-oga 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +362 -0
  4. data/README.md +317 -0
  5. data/doc/css/common.css +77 -0
  6. data/doc/css_selectors.md +935 -0
  7. data/doc/manually_creating_documents.md +67 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/doc/xml_namespaces.md +63 -0
  10. data/ext/c/extconf.rb +11 -0
  11. data/ext/c/lexer.c +2595 -0
  12. data/ext/c/lexer.h +16 -0
  13. data/ext/c/lexer.rl +198 -0
  14. data/ext/c/liboga.c +6 -0
  15. data/ext/c/liboga.h +11 -0
  16. data/ext/java/Liboga.java +14 -0
  17. data/ext/java/org/liboga/xml/Lexer.java +1363 -0
  18. data/ext/java/org/liboga/xml/Lexer.rl +223 -0
  19. data/ext/ragel/base_lexer.rl +633 -0
  20. data/lib/oga.rb +57 -0
  21. data/lib/oga/blacklist.rb +40 -0
  22. data/lib/oga/css/lexer.rb +743 -0
  23. data/lib/oga/css/parser.rb +976 -0
  24. data/lib/oga/entity_decoder.rb +21 -0
  25. data/lib/oga/html/entities.rb +2150 -0
  26. data/lib/oga/html/parser.rb +25 -0
  27. data/lib/oga/html/sax_parser.rb +18 -0
  28. data/lib/oga/lru.rb +160 -0
  29. data/lib/oga/oga.rb +57 -0
  30. data/lib/oga/version.rb +3 -0
  31. data/lib/oga/whitelist.rb +20 -0
  32. data/lib/oga/xml/attribute.rb +136 -0
  33. data/lib/oga/xml/cdata.rb +17 -0
  34. data/lib/oga/xml/character_node.rb +37 -0
  35. data/lib/oga/xml/comment.rb +17 -0
  36. data/lib/oga/xml/default_namespace.rb +13 -0
  37. data/lib/oga/xml/doctype.rb +82 -0
  38. data/lib/oga/xml/document.rb +108 -0
  39. data/lib/oga/xml/element.rb +428 -0
  40. data/lib/oga/xml/entities.rb +122 -0
  41. data/lib/oga/xml/html_void_elements.rb +15 -0
  42. data/lib/oga/xml/lexer.rb +550 -0
  43. data/lib/oga/xml/namespace.rb +48 -0
  44. data/lib/oga/xml/node.rb +219 -0
  45. data/lib/oga/xml/node_set.rb +333 -0
  46. data/lib/oga/xml/parser.rb +631 -0
  47. data/lib/oga/xml/processing_instruction.rb +37 -0
  48. data/lib/oga/xml/pull_parser.rb +175 -0
  49. data/lib/oga/xml/querying.rb +56 -0
  50. data/lib/oga/xml/sax_parser.rb +192 -0
  51. data/lib/oga/xml/text.rb +66 -0
  52. data/lib/oga/xml/traversal.rb +50 -0
  53. data/lib/oga/xml/xml_declaration.rb +65 -0
  54. data/lib/oga/xpath/evaluator.rb +1798 -0
  55. data/lib/oga/xpath/lexer.rb +1958 -0
  56. data/lib/oga/xpath/parser.rb +622 -0
  57. data/oga.gemspec +45 -0
  58. metadata +227 -0
@@ -0,0 +1,122 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Module for encoding/decoding XML and HTML entities. The mapping of HTML
5
+ # entities can be found in {Oga::HTML::Entities::DECODE_MAPPING}.
6
+ #
7
+ module Entities
8
+ ##
9
+ # Hash containing XML entities and the corresponding characters.
10
+ #
11
+ # The `&` mapping must come last to ensure proper conversion of non
12
+ # encoded to encoded forms (see {Oga::XML::Text#to_xml}).
13
+ #
14
+ # @return [Hash]
15
+ #
16
+ DECODE_MAPPING = {
17
+ '&lt;' => '<',
18
+ '&gt;' => '>',
19
+ '&apos;' => "'",
20
+ '&quot;' => '"',
21
+ '&amp;' => '&',
22
+ }
23
+
24
+ ##
25
+ # Hash containing characters and the corresponding XML entities.
26
+ #
27
+ # @return [Hash]
28
+ #
29
+ ENCODE_MAPPING = {
30
+ '&' => '&amp;',
31
+ '>' => '&gt;',
32
+ '<' => '&lt;',
33
+ }
34
+
35
+ ##
36
+ # Hash containing characters and the corresponding XML entities to use
37
+ # when encoding XML/HTML attribute values.
38
+ #
39
+ # @return [Hash]
40
+ #
41
+ ENCODE_ATTRIBUTE_MAPPING = {
42
+ '&' => '&amp;',
43
+ '>' => '&gt;',
44
+ '<' => '&lt;',
45
+ "'" => '&apos;',
46
+ '"' => '&quot;'
47
+ }
48
+
49
+ ##
50
+ # @return [String]
51
+ #
52
+ AMPERSAND = '&'.freeze
53
+
54
+ ##
55
+ # Regexp for matching XML/HTML entities such as "&nbsp;".
56
+ #
57
+ # @return [Regexp]
58
+ #
59
+ REGULAR_ENTITY = /&[a-zA-Z0-9]+;/
60
+
61
+ ##
62
+ # Regexp for matching XML/HTML entities such as "&#38;".
63
+ #
64
+ # @return [Regexp]
65
+ #
66
+ CODEPOINT_ENTITY = /&#(x)?([a-zA-Z0-9]+);/
67
+
68
+ ##
69
+ # @return [Regexp]
70
+ #
71
+ ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|'))
72
+
73
+ ##
74
+ # @return [Regexp]
75
+ #
76
+ ENCODE_ATTRIBUTE_REGEXP =
77
+ Regexp.new(ENCODE_ATTRIBUTE_MAPPING.keys.join('|'))
78
+
79
+ ##
80
+ # Decodes XML entities.
81
+ #
82
+ # @param [String] input
83
+ # @param [Hash] mapping
84
+ # @return [String]
85
+ #
86
+ def self.decode(input, mapping = DECODE_MAPPING)
87
+ return input unless input.include?(AMPERSAND)
88
+
89
+ input = input.gsub(REGULAR_ENTITY, mapping)
90
+
91
+ if input.include?(AMPERSAND)
92
+ input = input.gsub(CODEPOINT_ENTITY) do |match|
93
+ [$1 ? Integer($2, 16) : Integer($2, 10)].pack('U*')
94
+ end
95
+ end
96
+
97
+ input
98
+ end
99
+
100
+ ##
101
+ # Encodes special characters as XML entities.
102
+ #
103
+ # @param [String] input
104
+ # @param [Hash] mapping
105
+ # @return [String]
106
+ #
107
+ def self.encode(input, mapping = ENCODE_MAPPING)
108
+ input.gsub(ENCODE_REGEXP, mapping)
109
+ end
110
+
111
+ ##
112
+ # Encodes special characters in an XML attribute value.
113
+ #
114
+ # @param [String] input
115
+ # @return [String]
116
+ #
117
+ def self.encode_attribute(input)
118
+ input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING)
119
+ end
120
+ end # Entities
121
+ end # XML
122
+ end # Oga
@@ -0,0 +1,15 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Names of the HTML void elements that should be handled when HTML lexing
5
+ # is enabled.
6
+ #
7
+ # @api private
8
+ # @return [Oga::Whitelist]
9
+ #
10
+ HTML_VOID_ELEMENTS = Whitelist.new(%w{
11
+ area base br col command embed hr img input keygen link meta param source
12
+ track wbr
13
+ })
14
+ end # XML
15
+ end # Oga
@@ -0,0 +1,550 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Low level lexer that supports both XML and HTML (using an extra option).
5
+ # To lex HTML input set the `:html` option to `true` when creating an
6
+ # instance of the lexer:
7
+ #
8
+ # lexer = Oga::XML::Lexer.new(:html => true)
9
+ #
10
+ # This lexer can process both String and IO instances. IO instances are
11
+ # processed on a line by line basis. This can greatly reduce memory usage
12
+ # in exchange for a slightly slower runtime.
13
+ #
14
+ # ## Thread Safety
15
+ #
16
+ # Since this class keeps track of an internal state you can not use the
17
+ # same instance between multiple threads at the same time. For example, the
18
+ # following will not work reliably:
19
+ #
20
+ # # Don't do this!
21
+ # lexer = Oga::XML::Lexer.new('....')
22
+ # threads = []
23
+ #
24
+ # 2.times do
25
+ # threads << Thread.new do
26
+ # lexer.advance do |*args|
27
+ # p args
28
+ # end
29
+ # end
30
+ # end
31
+ #
32
+ # threads.each(&:join)
33
+ #
34
+ # However, it is perfectly save to use different instances per thread.
35
+ # There is no _global_ state used by this lexer.
36
+ #
37
+ # ## Strict Mode
38
+ #
39
+ # By default the lexer is rather permissive regarding the input. For
40
+ # example, missing closing tags are inserted by default. To disable this
41
+ # behaviour the lexer can be run in "strict mode" by setting `:strict` to
42
+ # `true`:
43
+ #
44
+ # lexer = Oga::XML::Lexer.new('...', :strict => true)
45
+ #
46
+ # Strict mode only applies to XML documents.
47
+ #
48
+ # @private
49
+ #
50
+ class Lexer
51
+ # These are all constant/frozen to remove the need for String allocations
52
+ # every time they are referenced in the lexer.
53
+ HTML_SCRIPT = 'script'.freeze
54
+ HTML_STYLE = 'style'.freeze
55
+
56
+ # Elements that are allowed directly in a <table> element.
57
+ HTML_TABLE_ALLOWED = Whitelist.new(
58
+ %w{thead tbody tfoot tr caption colgroup col}
59
+ )
60
+
61
+ HTML_SCRIPT_ELEMENTS = Whitelist.new(%w{script template})
62
+
63
+ HTML_TABLE_ROW_ELEMENTS = Whitelist.new(%w{tr}) + HTML_SCRIPT_ELEMENTS
64
+
65
+ # Elements that should be closed automatically before a new opening tag is
66
+ # processed.
67
+ HTML_CLOSE_SELF = {
68
+ 'head' => Blacklist.new(%w{head body}),
69
+ 'body' => Blacklist.new(%w{head body}),
70
+ 'li' => Blacklist.new(%w{li}),
71
+ 'dt' => Blacklist.new(%w{dt dd}),
72
+ 'dd' => Blacklist.new(%w{dt dd}),
73
+ 'p' => Blacklist.new(%w{
74
+ address article aside blockquote details div dl fieldset figcaption
75
+ figure footer form h1 h2 h3 h4 h5 h6 header hgroup hr main menu nav
76
+ ol p pre section table ul
77
+ }),
78
+ 'rb' => Blacklist.new(%w{rb rt rtc rp}),
79
+ 'rt' => Blacklist.new(%w{rb rt rtc rp}),
80
+ 'rtc' => Blacklist.new(%w{rb rtc}),
81
+ 'rp' => Blacklist.new(%w{rb rt rtc rp}),
82
+ 'optgroup' => Blacklist.new(%w{optgroup}),
83
+ 'option' => Blacklist.new(%w{optgroup option}),
84
+ 'colgroup' => Whitelist.new(%w{col template}),
85
+ 'caption' => HTML_TABLE_ALLOWED.to_blacklist,
86
+ 'table' => HTML_TABLE_ALLOWED + HTML_SCRIPT_ELEMENTS,
87
+ 'thead' => HTML_TABLE_ROW_ELEMENTS,
88
+ 'tbody' => HTML_TABLE_ROW_ELEMENTS,
89
+ 'tfoot' => HTML_TABLE_ROW_ELEMENTS,
90
+ 'tr' => Whitelist.new(%w{td th}) + HTML_SCRIPT_ELEMENTS,
91
+ 'td' => Blacklist.new(%w{td th}) + HTML_TABLE_ALLOWED,
92
+ 'th' => Blacklist.new(%w{td th}) + HTML_TABLE_ALLOWED
93
+ }
94
+
95
+ HTML_CLOSE_SELF.keys.each do |key|
96
+ HTML_CLOSE_SELF[key.upcase] = HTML_CLOSE_SELF[key]
97
+ end
98
+
99
+ ##
100
+ # Names of HTML tags of which the content should be lexed as-is.
101
+ #
102
+ LITERAL_HTML_ELEMENTS = Whitelist.new([HTML_SCRIPT, HTML_STYLE])
103
+
104
+ ##
105
+ # @param [String|IO] data The data to lex. This can either be a String or
106
+ # an IO instance.
107
+ #
108
+ # @param [Hash] options
109
+ #
110
+ # @option options [TrueClass|FalseClass] :html When set to `true` the
111
+ # lexer will treat the input as HTML instead of XML. This makes it
112
+ # possible to lex HTML void elements such as `<link href="">`.
113
+ #
114
+ # @option options [TrueClass|FalseClass] :strict Enables/disables strict
115
+ # parsing of XML documents, disabled by default.
116
+ #
117
+ def initialize(data, options = {})
118
+ @data = data
119
+ @html = options[:html]
120
+ @strict = options[:strict] || false
121
+
122
+ reset
123
+ end
124
+
125
+ ##
126
+ # Resets the internal state of the lexer. Typically you don't need to
127
+ # call this method yourself as its called by #lex after lexing a given
128
+ # String.
129
+ #
130
+ def reset
131
+ @line = 1
132
+ @elements = []
133
+
134
+ @data.rewind if @data.respond_to?(:rewind)
135
+
136
+ reset_native
137
+ end
138
+
139
+ ##
140
+ # Yields the data to lex to the supplied block.
141
+ #
142
+ # @return [String]
143
+ # @yieldparam [String]
144
+ #
145
+ def read_data
146
+ if @data.is_a?(String)
147
+ yield @data
148
+
149
+ # IO, StringIO, etc
150
+ # THINK: read(N) would be nice, but currently this screws up the C code
151
+ elsif @data.respond_to?(:each_line)
152
+ @data.each_line { |line| yield line }
153
+
154
+ # Enumerator, Array, etc
155
+ elsif @data.respond_to?(:each)
156
+ @data.each { |chunk| yield chunk }
157
+ end
158
+ end
159
+
160
+ ##
161
+ # Gathers all the tokens for the input and returns them as an Array.
162
+ #
163
+ # This method resets the internal state of the lexer after consuming the
164
+ # input.
165
+ #
166
+ # @see #advance
167
+ # @return [Array]
168
+ #
169
+ def lex
170
+ tokens = []
171
+
172
+ advance do |type, value, line|
173
+ tokens << [type, value, line]
174
+ end
175
+
176
+ reset
177
+
178
+ tokens
179
+ end
180
+
181
+ ##
182
+ # Advances through the input and generates the corresponding tokens. Each
183
+ # token is yielded to the supplied block.
184
+ #
185
+ # Each token is an Array in the following format:
186
+ #
187
+ # [TYPE, VALUE]
188
+ #
189
+ # The type is a symbol, the value is either nil or a String.
190
+ #
191
+ # This method stores the supplied block in `@block` and resets it after
192
+ # the lexer loop has finished.
193
+ #
194
+ # This method does *not* reset the internal state of the lexer.
195
+ #
196
+ # @yieldparam [Symbol] type
197
+ # @yieldparam [String] value
198
+ # @yieldparam [Fixnum] line
199
+ #
200
+ def advance(&block)
201
+ @block = block
202
+
203
+ read_data do |chunk|
204
+ advance_native(chunk)
205
+ end
206
+
207
+ # Add any missing closing tags
208
+ if !strict? and !@elements.empty?
209
+ @elements.length.times { on_element_end }
210
+ end
211
+ ensure
212
+ @block = nil
213
+ end
214
+
215
+ ##
216
+ # @return [TrueClass|FalseClass]
217
+ #
218
+ def html?
219
+ @html == true
220
+ end
221
+
222
+ ##
223
+ # @return [TrueClass|FalseClass]
224
+ #
225
+ def strict?
226
+ @strict
227
+ end
228
+
229
+ ##
230
+ # @return [TrueClass|FalseClass]
231
+ #
232
+ def html_script?
233
+ html? && current_element == HTML_SCRIPT
234
+ end
235
+
236
+ ##
237
+ # @return [TrueClass|FalseClass]
238
+ #
239
+ def html_style?
240
+ html? && current_element == HTML_STYLE
241
+ end
242
+
243
+ private
244
+
245
+ ##
246
+ # @param [Fixnum] amount The amount of lines to advance.
247
+ #
248
+ def advance_line(amount = 1)
249
+ @line += amount
250
+ end
251
+
252
+ ##
253
+ # Calls the supplied block with the information of the current token.
254
+ #
255
+ # @param [Symbol] type The token type.
256
+ # @param [String] value The token value.
257
+ #
258
+ # @yieldparam [String] type
259
+ # @yieldparam [String] value
260
+ # @yieldparam [Fixnum] line
261
+ #
262
+ def add_token(type, value = nil)
263
+ @block.call(type, value, @line)
264
+ end
265
+
266
+ ##
267
+ # Returns the name of the element we're currently in.
268
+ #
269
+ # @return [String]
270
+ #
271
+ def current_element
272
+ @elements.last
273
+ end
274
+
275
+ ##
276
+ # Called when processing a single quote.
277
+ #
278
+ def on_string_squote
279
+ add_token(:T_STRING_SQUOTE)
280
+ end
281
+
282
+ ##
283
+ # Called when processing a double quote.
284
+ #
285
+ def on_string_dquote
286
+ add_token(:T_STRING_DQUOTE)
287
+ end
288
+
289
+ ##
290
+ # Called when processing the body of a string.
291
+ #
292
+ # @param [String] value The data between the quotes.
293
+ #
294
+ def on_string_body(value)
295
+ add_token(:T_STRING_BODY, value)
296
+ end
297
+
298
+ ##
299
+ # Called when a doctype starts.
300
+ #
301
+ def on_doctype_start
302
+ add_token(:T_DOCTYPE_START)
303
+ end
304
+
305
+ ##
306
+ # Called on the identifier specifying the type of the doctype.
307
+ #
308
+ # @param [String] value
309
+ #
310
+ def on_doctype_type(value)
311
+ add_token(:T_DOCTYPE_TYPE, value)
312
+ end
313
+
314
+ ##
315
+ # Called on the identifier specifying the name of the doctype.
316
+ #
317
+ # @param [String] value
318
+ #
319
+ def on_doctype_name(value)
320
+ add_token(:T_DOCTYPE_NAME, value)
321
+ end
322
+
323
+ ##
324
+ # Called on the end of a doctype.
325
+ #
326
+ def on_doctype_end
327
+ add_token(:T_DOCTYPE_END)
328
+ end
329
+
330
+ ##
331
+ # Called on an inline doctype block.
332
+ #
333
+ # @param [String] value
334
+ #
335
+ def on_doctype_inline(value)
336
+ add_token(:T_DOCTYPE_INLINE, value)
337
+ end
338
+
339
+ ##
340
+ # Called on the open CDATA tag.
341
+ #
342
+ def on_cdata_start
343
+ add_token(:T_CDATA_START)
344
+ end
345
+
346
+ ##
347
+ # Called on the closing CDATA tag.
348
+ #
349
+ def on_cdata_end
350
+ add_token(:T_CDATA_END)
351
+ end
352
+
353
+ ##
354
+ # Called for the body of a CDATA tag.
355
+ #
356
+ # @param [String] value
357
+ #
358
+ def on_cdata_body(value)
359
+ add_token(:T_CDATA_BODY, value)
360
+ end
361
+
362
+ ##
363
+ # Called on the open comment tag.
364
+ #
365
+ def on_comment_start
366
+ add_token(:T_COMMENT_START)
367
+ end
368
+
369
+ ##
370
+ # Called on the closing comment tag.
371
+ #
372
+ def on_comment_end
373
+ add_token(:T_COMMENT_END)
374
+ end
375
+
376
+ ##
377
+ # Called on a comment.
378
+ #
379
+ # @param [String] value
380
+ #
381
+ def on_comment_body(value)
382
+ add_token(:T_COMMENT_BODY, value)
383
+ end
384
+
385
+ ##
386
+ # Called on the start of an XML declaration tag.
387
+ #
388
+ def on_xml_decl_start
389
+ add_token(:T_XML_DECL_START)
390
+ end
391
+
392
+ ##
393
+ # Called on the end of an XML declaration tag.
394
+ #
395
+ def on_xml_decl_end
396
+ add_token(:T_XML_DECL_END)
397
+ end
398
+
399
+ ##
400
+ # Called on the start of a processing instruction.
401
+ #
402
+ def on_proc_ins_start
403
+ add_token(:T_PROC_INS_START)
404
+ end
405
+
406
+ ##
407
+ # Called on a processing instruction name.
408
+ #
409
+ # @param [String] value
410
+ #
411
+ def on_proc_ins_name(value)
412
+ add_token(:T_PROC_INS_NAME, value)
413
+ end
414
+
415
+ ##
416
+ # Called on the body of a processing instruction.
417
+ #
418
+ # @param [String] value
419
+ #
420
+ def on_proc_ins_body(value)
421
+ add_token(:T_PROC_INS_BODY, value)
422
+ end
423
+
424
+ ##
425
+ # Called on the end of a processing instruction.
426
+ #
427
+ def on_proc_ins_end
428
+ add_token(:T_PROC_INS_END)
429
+ end
430
+
431
+ ##
432
+ # Called on the name of an element.
433
+ #
434
+ # @param [String] name The name of the element, including namespace.
435
+ #
436
+ def on_element_name(name)
437
+ before_html_element_name(name) if html?
438
+
439
+ add_element(name)
440
+ end
441
+
442
+ ##
443
+ # Handles inserting of any missing tags whenever a new HTML tag is opened.
444
+ #
445
+ # @param [String] name
446
+ #
447
+ def before_html_element_name(name)
448
+ close_current = HTML_CLOSE_SELF[current_element]
449
+
450
+ if close_current and !close_current.allow?(name)
451
+ on_element_end
452
+ end
453
+
454
+ # Close remaining parent elements. This for example ensures that a
455
+ # "<tbody>" not only closes an unclosed "<th>" but also the surrounding,
456
+ # unclosed "<tr>".
457
+ while close_current = HTML_CLOSE_SELF[current_element]
458
+ if close_current.allow?(name)
459
+ break
460
+ else
461
+ on_element_end
462
+ end
463
+ end
464
+ end
465
+
466
+ ##
467
+ # @param [String] name
468
+ #
469
+ def add_element(name)
470
+ @elements << name
471
+
472
+ add_token(:T_ELEM_NAME, name)
473
+ end
474
+
475
+ ##
476
+ # Called on the element namespace.
477
+ #
478
+ # @param [String] namespace
479
+ #
480
+ def on_element_ns(namespace)
481
+ add_token(:T_ELEM_NS, namespace)
482
+ end
483
+
484
+ ##
485
+ # Called on the closing `>` of the open tag of an element.
486
+ #
487
+ def on_element_open_end
488
+ return unless html?
489
+
490
+ # Only downcase the name if we can't find an all lower/upper version of
491
+ # the element name. This can save us a *lot* of String allocations.
492
+ if HTML_VOID_ELEMENTS.allow?(current_element) \
493
+ or HTML_VOID_ELEMENTS.allow?(current_element.downcase)
494
+ add_token(:T_ELEM_END)
495
+ @elements.pop
496
+ end
497
+ end
498
+
499
+ ##
500
+ # Called on the closing tag of an element.
501
+ #
502
+ # @param [String] name The name of the element (minus namespace
503
+ # prefix). This is not set for self closing tags.
504
+ #
505
+ def on_element_end(name = nil)
506
+ return if @elements.empty?
507
+
508
+ if html? and name and @elements.include?(name)
509
+ while current_element != name
510
+ add_token(:T_ELEM_END)
511
+ @elements.pop
512
+ end
513
+ end
514
+
515
+ add_token(:T_ELEM_END)
516
+
517
+ @elements.pop
518
+ end
519
+
520
+ ##
521
+ # Called on regular text values.
522
+ #
523
+ # @param [String] value
524
+ #
525
+ def on_text(value)
526
+ return if value.empty?
527
+
528
+ add_token(:T_TEXT, value)
529
+ end
530
+
531
+ ##
532
+ # Called on attribute namespaces.
533
+ #
534
+ # @param [String] value
535
+ #
536
+ def on_attribute_ns(value)
537
+ add_token(:T_ATTR_NS, value)
538
+ end
539
+
540
+ ##
541
+ # Called on tag attributes.
542
+ #
543
+ # @param [String] value
544
+ #
545
+ def on_attribute(value)
546
+ add_token(:T_ATTR, value)
547
+ end
548
+ end # Lexer
549
+ end # XML
550
+ end # Oga