oga 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +13 -0
- data/LICENSE +19 -0
- data/README.md +171 -0
- data/doc/DCO.md +25 -0
- data/doc/changelog.md +7 -0
- data/doc/css/common.css +76 -0
- data/doc/migrating_from_nokogiri.md +169 -0
- data/ext/c/extconf.rb +13 -0
- data/ext/c/lexer.c +1518 -0
- data/ext/c/lexer.h +8 -0
- data/ext/c/lexer.rl +121 -0
- data/ext/c/liboga.c +6 -0
- data/ext/c/liboga.h +11 -0
- data/ext/java/Liboga.java +14 -0
- data/ext/java/org/liboga/xml/Lexer.java +829 -0
- data/ext/java/org/liboga/xml/Lexer.rl +151 -0
- data/ext/ragel/base_lexer.rl +323 -0
- data/lib/oga.rb +43 -0
- data/lib/oga/html/parser.rb +25 -0
- data/lib/oga/oga.rb +27 -0
- data/lib/oga/version.rb +3 -0
- data/lib/oga/xml/attribute.rb +111 -0
- data/lib/oga/xml/cdata.rb +24 -0
- data/lib/oga/xml/character_node.rb +39 -0
- data/lib/oga/xml/comment.rb +24 -0
- data/lib/oga/xml/doctype.rb +91 -0
- data/lib/oga/xml/document.rb +99 -0
- data/lib/oga/xml/element.rb +340 -0
- data/lib/oga/xml/lexer.rb +399 -0
- data/lib/oga/xml/namespace.rb +42 -0
- data/lib/oga/xml/node.rb +175 -0
- data/lib/oga/xml/node_set.rb +313 -0
- data/lib/oga/xml/parser.rb +556 -0
- data/lib/oga/xml/processing_instruction.rb +39 -0
- data/lib/oga/xml/pull_parser.rb +166 -0
- data/lib/oga/xml/querying.rb +32 -0
- data/lib/oga/xml/text.rb +16 -0
- data/lib/oga/xml/traversal.rb +48 -0
- data/lib/oga/xml/xml_declaration.rb +76 -0
- data/lib/oga/xpath/evaluator.rb +1748 -0
- data/lib/oga/xpath/lexer.rb +2043 -0
- data/lib/oga/xpath/node.rb +10 -0
- data/lib/oga/xpath/parser.rb +535 -0
- data/oga.gemspec +45 -0
- metadata +221 -0
@@ -0,0 +1,340 @@
|
|
1
|
+
module Oga
|
2
|
+
module XML
|
3
|
+
##
|
4
|
+
# Class that contains information about an XML element such as the name,
|
5
|
+
# attributes and child nodes.
|
6
|
+
#
|
7
|
+
# @!attribute [rw] name
|
8
|
+
# The name of the element.
|
9
|
+
# @return [String]
|
10
|
+
#
|
11
|
+
# @!attribute [ww] namespace_name
|
12
|
+
# The name of the namespace.
|
13
|
+
# @return [String]
|
14
|
+
#
|
15
|
+
# @!attribute [rw] attributes
|
16
|
+
# The attributes of the element.
|
17
|
+
# @return [Array<Oga::XML::Attribute>]
|
18
|
+
#
|
19
|
+
# @!attribute [rw] namespaces
|
20
|
+
# The registered namespaces.
|
21
|
+
# @return [Hash]
|
22
|
+
#
|
23
|
+
class Element < Node
|
24
|
+
include Querying
|
25
|
+
|
26
|
+
attr_accessor :name, :namespace_name, :attributes, :namespaces
|
27
|
+
|
28
|
+
##
|
29
|
+
# The attribute prefix/namespace used for registering element namespaces.
|
30
|
+
#
|
31
|
+
# @return [String]
|
32
|
+
#
|
33
|
+
XMLNS_PREFIX = 'xmlns'.freeze
|
34
|
+
|
35
|
+
##
|
36
|
+
# @param [Hash] options
|
37
|
+
#
|
38
|
+
# @option options [String] :name The name of the element.
|
39
|
+
#
|
40
|
+
# @option options [String] :namespace_name The name of the namespace.
|
41
|
+
#
|
42
|
+
# @option options [Array<Oga::XML::Attribute>] :attributes The attributes
|
43
|
+
# of the element as an Array.
|
44
|
+
#
|
45
|
+
def initialize(options = {})
|
46
|
+
super
|
47
|
+
|
48
|
+
@name = options[:name]
|
49
|
+
@namespace_name = options[:namespace_name]
|
50
|
+
@attributes = options[:attributes] || []
|
51
|
+
@namespaces = options[:namespaces] || {}
|
52
|
+
|
53
|
+
link_attributes
|
54
|
+
register_namespaces_from_attributes
|
55
|
+
end
|
56
|
+
|
57
|
+
##
|
58
|
+
# Returns an attribute matching the given name (with or without the
|
59
|
+
# namespace).
|
60
|
+
#
|
61
|
+
# @example
|
62
|
+
# # find an attribute that only has the name "foo"
|
63
|
+
# attribute('foo')
|
64
|
+
#
|
65
|
+
# # find an attribute with namespace "foo" and name bar"
|
66
|
+
# attribute('foo:bar')
|
67
|
+
#
|
68
|
+
# @param [String|Symbol] name The name (with or without the namespace)
|
69
|
+
# of the attribute.
|
70
|
+
#
|
71
|
+
# @return [Oga::XML::Attribute]
|
72
|
+
#
|
73
|
+
def attribute(name)
|
74
|
+
name, ns = split_name(name)
|
75
|
+
|
76
|
+
attributes.each do |attr|
|
77
|
+
return attr if attribute_matches?(attr, ns, name)
|
78
|
+
end
|
79
|
+
|
80
|
+
return
|
81
|
+
end
|
82
|
+
|
83
|
+
alias_method :attr, :attribute
|
84
|
+
|
85
|
+
##
|
86
|
+
# Returns the value of the given attribute.
|
87
|
+
#
|
88
|
+
# @example
|
89
|
+
# element.get('class') # => "container"
|
90
|
+
#
|
91
|
+
# @see [#attribute]
|
92
|
+
#
|
93
|
+
def get(name)
|
94
|
+
found = attribute(name)
|
95
|
+
|
96
|
+
return found ? found.value : nil
|
97
|
+
end
|
98
|
+
|
99
|
+
##
|
100
|
+
# Adds a new attribute to the element.
|
101
|
+
#
|
102
|
+
# @param [Oga::XML::Attribute] attribute
|
103
|
+
#
|
104
|
+
def add_attribute(attribute)
|
105
|
+
attribute.element = self
|
106
|
+
|
107
|
+
attributes << attribute
|
108
|
+
end
|
109
|
+
|
110
|
+
##
|
111
|
+
# Sets the value of an attribute to the given value. If the attribute does
|
112
|
+
# not exist it is created automatically.
|
113
|
+
#
|
114
|
+
# @param [String] name The name of the attribute, optionally including the
|
115
|
+
# namespace.
|
116
|
+
#
|
117
|
+
# @param [String] value The new value of the attribute.
|
118
|
+
#
|
119
|
+
def set(name, value)
|
120
|
+
found = attribute(name)
|
121
|
+
|
122
|
+
if found
|
123
|
+
found.value = value
|
124
|
+
else
|
125
|
+
if name.include?(':')
|
126
|
+
ns, name = name.split(':')
|
127
|
+
else
|
128
|
+
ns = nil
|
129
|
+
end
|
130
|
+
|
131
|
+
attr = Attribute.new(
|
132
|
+
:name => name,
|
133
|
+
:namespace_name => ns,
|
134
|
+
:value => value
|
135
|
+
)
|
136
|
+
|
137
|
+
add_attribute(attr)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
##
|
142
|
+
# Returns the namespace of the element.
|
143
|
+
#
|
144
|
+
# @return [Oga::XML::Namespace]
|
145
|
+
#
|
146
|
+
def namespace
|
147
|
+
return @namespace ||= available_namespaces[namespace_name]
|
148
|
+
end
|
149
|
+
|
150
|
+
##
|
151
|
+
# Returns the text of all child nodes joined together.
|
152
|
+
#
|
153
|
+
# @return [String]
|
154
|
+
#
|
155
|
+
def text
|
156
|
+
return children.text
|
157
|
+
end
|
158
|
+
|
159
|
+
##
|
160
|
+
# Returns the text of the current element only.
|
161
|
+
#
|
162
|
+
# @return [String]
|
163
|
+
#
|
164
|
+
def inner_text
|
165
|
+
text = ''
|
166
|
+
|
167
|
+
text_nodes.each do |node|
|
168
|
+
text << node.text
|
169
|
+
end
|
170
|
+
|
171
|
+
return text
|
172
|
+
end
|
173
|
+
|
174
|
+
##
|
175
|
+
# Returns any {Oga::XML::Text} nodes that are a direct child of this
|
176
|
+
# element.
|
177
|
+
#
|
178
|
+
# @return [Oga::XML::NodeSet]
|
179
|
+
#
|
180
|
+
def text_nodes
|
181
|
+
nodes = NodeSet.new
|
182
|
+
|
183
|
+
children.each do |child|
|
184
|
+
nodes << child if child.is_a?(Text)
|
185
|
+
end
|
186
|
+
|
187
|
+
return nodes
|
188
|
+
end
|
189
|
+
|
190
|
+
##
|
191
|
+
# Sets the inner text of the current element to the given String.
|
192
|
+
#
|
193
|
+
# @param [String] text
|
194
|
+
#
|
195
|
+
def inner_text=(text)
|
196
|
+
children.each do |child|
|
197
|
+
child.remove if child.is_a?(Text)
|
198
|
+
end
|
199
|
+
|
200
|
+
children << XML::Text.new(:text => text)
|
201
|
+
end
|
202
|
+
|
203
|
+
##
|
204
|
+
# Converts the element and its child elements to XML.
|
205
|
+
#
|
206
|
+
# @return [String]
|
207
|
+
#
|
208
|
+
def to_xml
|
209
|
+
ns = namespace ? "#{namespace}:" : ''
|
210
|
+
body = children.map(&:to_xml).join('')
|
211
|
+
attrs = ''
|
212
|
+
|
213
|
+
attributes.each do |attr|
|
214
|
+
attrs << attr.to_xml
|
215
|
+
end
|
216
|
+
|
217
|
+
attrs = " #{attrs}" unless attrs.empty?
|
218
|
+
|
219
|
+
return "<#{ns}#{name}#{attrs}>#{body}</#{ns}#{name}>"
|
220
|
+
end
|
221
|
+
|
222
|
+
##
|
223
|
+
# @return [String]
|
224
|
+
#
|
225
|
+
def inspect
|
226
|
+
segments = []
|
227
|
+
|
228
|
+
[:name, :namespace, :attributes, :children].each do |attr|
|
229
|
+
value = send(attr)
|
230
|
+
|
231
|
+
if !value or (value.respond_to?(:empty?) and value.empty?)
|
232
|
+
next
|
233
|
+
end
|
234
|
+
|
235
|
+
segments << "#{attr}: #{value.inspect}"
|
236
|
+
end
|
237
|
+
|
238
|
+
return "Element(#{segments.join(' ')})"
|
239
|
+
end
|
240
|
+
|
241
|
+
##
|
242
|
+
# @return [Symbol]
|
243
|
+
#
|
244
|
+
def node_type
|
245
|
+
return :element
|
246
|
+
end
|
247
|
+
|
248
|
+
##
|
249
|
+
# Registers a new namespace for the current element and its child
|
250
|
+
# elements.
|
251
|
+
#
|
252
|
+
# @param [String] name
|
253
|
+
# @param [String] uri
|
254
|
+
# @see [Oga::XML::Namespace#initialize]
|
255
|
+
#
|
256
|
+
def register_namespace(name, uri)
|
257
|
+
if namespaces[name]
|
258
|
+
raise ArgumentError, "The namespace #{name.inspect} already exists"
|
259
|
+
end
|
260
|
+
|
261
|
+
namespaces[name] = Namespace.new(:name => name, :uri => uri)
|
262
|
+
end
|
263
|
+
|
264
|
+
##
|
265
|
+
# Returns a Hash containing all the namespaces available to the current
|
266
|
+
# element.
|
267
|
+
#
|
268
|
+
# @return [Hash]
|
269
|
+
#
|
270
|
+
def available_namespaces
|
271
|
+
merged = namespaces
|
272
|
+
node = parent
|
273
|
+
|
274
|
+
while node && node.respond_to?(:namespaces)
|
275
|
+
merged = merged.merge(node.namespaces)
|
276
|
+
node = node.parent
|
277
|
+
end
|
278
|
+
|
279
|
+
return merged
|
280
|
+
end
|
281
|
+
|
282
|
+
private
|
283
|
+
|
284
|
+
##
|
285
|
+
# Registers namespaces based on any "xmlns" attributes. Once a namespace
|
286
|
+
# has been registered the corresponding attribute is removed.
|
287
|
+
#
|
288
|
+
def register_namespaces_from_attributes
|
289
|
+
self.attributes = attributes.reject do |attr|
|
290
|
+
# We're using `namespace_name` opposed to `namespace.name` as "xmlns"
|
291
|
+
# is not a registered namespace.
|
292
|
+
remove = attr.namespace_name && attr.namespace_name == XMLNS_PREFIX
|
293
|
+
|
294
|
+
register_namespace(attr.name, attr.value) if remove
|
295
|
+
|
296
|
+
remove
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
##
|
301
|
+
# Links all attributes to the current element.
|
302
|
+
#
|
303
|
+
def link_attributes
|
304
|
+
attributes.each do |attr|
|
305
|
+
attr.element = self
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
##
|
310
|
+
# @param [String] name
|
311
|
+
# @return [Array]
|
312
|
+
#
|
313
|
+
def split_name(name)
|
314
|
+
segments = name.to_s.split(':')
|
315
|
+
|
316
|
+
return segments.pop, segments.pop
|
317
|
+
end
|
318
|
+
|
319
|
+
##
|
320
|
+
# @param [Oga::XML::Attribute] attr
|
321
|
+
# @param [String] ns
|
322
|
+
# @param [String] name
|
323
|
+
# @return [TrueClass|FalseClass]
|
324
|
+
#
|
325
|
+
def attribute_matches?(attr, ns, name)
|
326
|
+
name_matches = attr.name == name
|
327
|
+
ns_matches = false
|
328
|
+
|
329
|
+
if ns
|
330
|
+
ns_matches = attr.namespace.to_s == ns
|
331
|
+
|
332
|
+
elsif name_matches and !attr.namespace
|
333
|
+
ns_matches = true
|
334
|
+
end
|
335
|
+
|
336
|
+
return name_matches && ns_matches
|
337
|
+
end
|
338
|
+
end # Element
|
339
|
+
end # XML
|
340
|
+
end # Oga
|
@@ -0,0 +1,399 @@
|
|
1
|
+
module Oga
|
2
|
+
module XML
|
3
|
+
##
|
4
|
+
# Low level lexer that supports both XML and HTML (using an extra option).
|
5
|
+
# To lex HTML input set the `:html` option to `true` when creating an
|
6
|
+
# instance of the lexer:
|
7
|
+
#
|
8
|
+
# lexer = Oga::XML::Lexer.new(:html => true)
|
9
|
+
#
|
10
|
+
# This lexer can process both String and IO instances. IO instances are
|
11
|
+
# processed on a line by line basis. This can greatly reduce memory usage
|
12
|
+
# in exchange for a slightly slower runtime.
|
13
|
+
#
|
14
|
+
# ## Thread Safety
|
15
|
+
#
|
16
|
+
# Since this class keeps track of an internal state you can not use the
|
17
|
+
# same instance between multiple threads at the same time. For example, the
|
18
|
+
# following will not work reliably:
|
19
|
+
#
|
20
|
+
# # Don't do this!
|
21
|
+
# lexer = Oga::XML::Lexer.new('....')
|
22
|
+
# threads = []
|
23
|
+
#
|
24
|
+
# 2.times do
|
25
|
+
# threads << Thread.new do
|
26
|
+
# lexer.advance do |*args|
|
27
|
+
# p args
|
28
|
+
# end
|
29
|
+
# end
|
30
|
+
# end
|
31
|
+
#
|
32
|
+
# threads.each(&:join)
|
33
|
+
#
|
34
|
+
# However, it is perfectly save to use different instances per thread.
|
35
|
+
# There is no _global_ state used by this lexer.
|
36
|
+
#
|
37
|
+
# @!attribute [r] html
|
38
|
+
# @return [TrueClass|FalseClass]
|
39
|
+
#
|
40
|
+
class Lexer
|
41
|
+
attr_reader :html
|
42
|
+
|
43
|
+
##
|
44
|
+
# Names of the HTML void elements that should be handled when HTML lexing
|
45
|
+
# is enabled.
|
46
|
+
#
|
47
|
+
# @return [Set]
|
48
|
+
#
|
49
|
+
HTML_VOID_ELEMENTS = Set.new([
|
50
|
+
'area',
|
51
|
+
'base',
|
52
|
+
'br',
|
53
|
+
'col',
|
54
|
+
'command',
|
55
|
+
'embed',
|
56
|
+
'hr',
|
57
|
+
'img',
|
58
|
+
'input',
|
59
|
+
'keygen',
|
60
|
+
'link',
|
61
|
+
'meta',
|
62
|
+
'param',
|
63
|
+
'source',
|
64
|
+
'track',
|
65
|
+
'wbr'
|
66
|
+
])
|
67
|
+
|
68
|
+
##
|
69
|
+
# @param [String|IO] data The data to lex. This can either be a String or
|
70
|
+
# an IO instance.
|
71
|
+
#
|
72
|
+
# @param [Hash] options
|
73
|
+
#
|
74
|
+
# @option options [Symbol] :html When set to `true` the lexer will treat
|
75
|
+
# the input as HTML instead of SGML/XML. This makes it possible to lex
|
76
|
+
# HTML void elements such as `<link href="">`.
|
77
|
+
#
|
78
|
+
def initialize(data, options = {})
|
79
|
+
@data = data
|
80
|
+
@html = options[:html]
|
81
|
+
|
82
|
+
reset
|
83
|
+
end
|
84
|
+
|
85
|
+
##
|
86
|
+
# Resets the internal state of the lexer. Typically you don't need to
|
87
|
+
# call this method yourself as its called by #lex after lexing a given
|
88
|
+
# String.
|
89
|
+
#
|
90
|
+
def reset
|
91
|
+
@line = 1
|
92
|
+
@elements = []
|
93
|
+
|
94
|
+
@data.rewind if io_input?
|
95
|
+
|
96
|
+
reset_native
|
97
|
+
end
|
98
|
+
|
99
|
+
##
|
100
|
+
# Yields the data to lex to the supplied block.
|
101
|
+
#
|
102
|
+
# @return [String]
|
103
|
+
# @yieldparam [String]
|
104
|
+
#
|
105
|
+
def read_data
|
106
|
+
# We can't check for #each_line since String also defines that. Using
|
107
|
+
# String#each_line has no benefit over just lexing the String in one
|
108
|
+
# go.
|
109
|
+
if io_input?
|
110
|
+
@data.each_line do |line|
|
111
|
+
yield line
|
112
|
+
end
|
113
|
+
else
|
114
|
+
yield @data
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
##
|
119
|
+
# Returns `true` if the input is an IO like object, false otherwise.
|
120
|
+
#
|
121
|
+
# @return [TrueClass|FalseClass]
|
122
|
+
#
|
123
|
+
def io_input?
|
124
|
+
return @data.is_a?(IO) || @data.is_a?(StringIO)
|
125
|
+
end
|
126
|
+
|
127
|
+
##
|
128
|
+
# Gathers all the tokens for the input and returns them as an Array.
|
129
|
+
#
|
130
|
+
# This method resets the internal state of the lexer after consuming the
|
131
|
+
# input.
|
132
|
+
#
|
133
|
+
# @see #advance
|
134
|
+
# @return [Array]
|
135
|
+
#
|
136
|
+
def lex
|
137
|
+
tokens = []
|
138
|
+
|
139
|
+
advance do |type, value, line|
|
140
|
+
tokens << [type, value, line]
|
141
|
+
end
|
142
|
+
|
143
|
+
reset
|
144
|
+
|
145
|
+
return tokens
|
146
|
+
end
|
147
|
+
|
148
|
+
##
|
149
|
+
# Advances through the input and generates the corresponding tokens. Each
|
150
|
+
# token is yielded to the supplied block.
|
151
|
+
#
|
152
|
+
# Each token is an Array in the following format:
|
153
|
+
#
|
154
|
+
# [TYPE, VALUE]
|
155
|
+
#
|
156
|
+
# The type is a symbol, the value is either nil or a String.
|
157
|
+
#
|
158
|
+
# This method stores the supplied block in `@block` and resets it after
|
159
|
+
# the lexer loop has finished.
|
160
|
+
#
|
161
|
+
# This method does *not* reset the internal state of the lexer.
|
162
|
+
#
|
163
|
+
# @yieldparam [Symbol] type
|
164
|
+
# @yieldparam [String] value
|
165
|
+
# @yieldparam [Fixnum] line
|
166
|
+
#
|
167
|
+
def advance(&block)
|
168
|
+
@block = block
|
169
|
+
|
170
|
+
read_data do |chunk|
|
171
|
+
advance_native(chunk)
|
172
|
+
end
|
173
|
+
ensure
|
174
|
+
@block = nil
|
175
|
+
end
|
176
|
+
|
177
|
+
##
|
178
|
+
# @return [TrueClass|FalseClass]
|
179
|
+
#
|
180
|
+
def html?
|
181
|
+
return !!html
|
182
|
+
end
|
183
|
+
|
184
|
+
private
|
185
|
+
|
186
|
+
##
|
187
|
+
# @param [Fixnum] amount The amount of lines to advance.
|
188
|
+
#
|
189
|
+
def advance_line(amount = 1)
|
190
|
+
@line += amount
|
191
|
+
end
|
192
|
+
|
193
|
+
##
|
194
|
+
# Calls the supplied block with the information of the current token.
|
195
|
+
#
|
196
|
+
# @param [Symbol] type The token type.
|
197
|
+
# @param [String] value The token value.
|
198
|
+
#
|
199
|
+
# @yieldparam [String] type
|
200
|
+
# @yieldparam [String] value
|
201
|
+
# @yieldparam [Fixnum] line
|
202
|
+
#
|
203
|
+
def add_token(type, value = nil)
|
204
|
+
@block.call(type, value, @line)
|
205
|
+
end
|
206
|
+
|
207
|
+
##
|
208
|
+
# Returns the name of the element we're currently in.
|
209
|
+
#
|
210
|
+
# @return [String]
|
211
|
+
#
|
212
|
+
def current_element
|
213
|
+
return @elements.last
|
214
|
+
end
|
215
|
+
|
216
|
+
##
|
217
|
+
# Called when processing single/double quoted strings.
|
218
|
+
#
|
219
|
+
# @param [String] value The data between the quotes.
|
220
|
+
#
|
221
|
+
def on_string(value)
|
222
|
+
add_token(:T_STRING, value)
|
223
|
+
end
|
224
|
+
|
225
|
+
##
|
226
|
+
# Called when a doctype starts.
|
227
|
+
#
|
228
|
+
def on_doctype_start
|
229
|
+
add_token(:T_DOCTYPE_START)
|
230
|
+
end
|
231
|
+
|
232
|
+
##
|
233
|
+
# Called on the identifier specifying the type of the doctype.
|
234
|
+
#
|
235
|
+
# @param [String] value
|
236
|
+
#
|
237
|
+
def on_doctype_type(value)
|
238
|
+
add_token(:T_DOCTYPE_TYPE, value)
|
239
|
+
end
|
240
|
+
|
241
|
+
##
|
242
|
+
# Called on the identifier specifying the name of the doctype.
|
243
|
+
#
|
244
|
+
# @param [String] value
|
245
|
+
#
|
246
|
+
def on_doctype_name(value)
|
247
|
+
add_token(:T_DOCTYPE_NAME, value)
|
248
|
+
end
|
249
|
+
|
250
|
+
##
|
251
|
+
# Called on the end of a doctype.
|
252
|
+
#
|
253
|
+
def on_doctype_end
|
254
|
+
add_token(:T_DOCTYPE_END)
|
255
|
+
end
|
256
|
+
|
257
|
+
##
|
258
|
+
# Called on an inline doctype block.
|
259
|
+
#
|
260
|
+
# @param [String] value
|
261
|
+
#
|
262
|
+
def on_doctype_inline(value)
|
263
|
+
add_token(:T_DOCTYPE_INLINE, value)
|
264
|
+
end
|
265
|
+
|
266
|
+
##
|
267
|
+
# Called on a CDATA tag.
|
268
|
+
#
|
269
|
+
def on_cdata(value)
|
270
|
+
add_token(:T_CDATA, value)
|
271
|
+
end
|
272
|
+
|
273
|
+
##
|
274
|
+
# Called on a comment.
|
275
|
+
#
|
276
|
+
# @param [String] value
|
277
|
+
#
|
278
|
+
def on_comment(value)
|
279
|
+
add_token(:T_COMMENT, value)
|
280
|
+
end
|
281
|
+
|
282
|
+
##
|
283
|
+
# Called on the start of an XML declaration tag.
|
284
|
+
#
|
285
|
+
def on_xml_decl_start
|
286
|
+
add_token(:T_XML_DECL_START)
|
287
|
+
end
|
288
|
+
|
289
|
+
##
|
290
|
+
# Called on the end of an XML declaration tag.
|
291
|
+
#
|
292
|
+
def on_xml_decl_end
|
293
|
+
add_token(:T_XML_DECL_END)
|
294
|
+
end
|
295
|
+
|
296
|
+
##
|
297
|
+
# Called on the start of an element.
|
298
|
+
#
|
299
|
+
def on_element_start
|
300
|
+
add_token(:T_ELEM_START)
|
301
|
+
end
|
302
|
+
|
303
|
+
##
|
304
|
+
# Called on the start of a processing instruction.
|
305
|
+
#
|
306
|
+
def on_proc_ins_start
|
307
|
+
add_token(:T_PROC_INS_START)
|
308
|
+
end
|
309
|
+
|
310
|
+
##
|
311
|
+
# Called on a processing instruction name.
|
312
|
+
#
|
313
|
+
# @param [String] value
|
314
|
+
#
|
315
|
+
def on_proc_ins_name(value)
|
316
|
+
add_token(:T_PROC_INS_NAME, value)
|
317
|
+
end
|
318
|
+
|
319
|
+
##
|
320
|
+
# Called on the end of a processing instruction.
|
321
|
+
#
|
322
|
+
def on_proc_ins_end
|
323
|
+
add_token(:T_PROC_INS_END)
|
324
|
+
end
|
325
|
+
|
326
|
+
##
|
327
|
+
# Called on the name of an element.
|
328
|
+
#
|
329
|
+
# @param [String] name The name of the element, including namespace.
|
330
|
+
#
|
331
|
+
def on_element_name(name)
|
332
|
+
@elements << name if html?
|
333
|
+
|
334
|
+
add_token(:T_ELEM_NAME, name)
|
335
|
+
end
|
336
|
+
|
337
|
+
##
|
338
|
+
# Called on the element namespace.
|
339
|
+
#
|
340
|
+
# @param [String] namespace
|
341
|
+
#
|
342
|
+
def on_element_ns(namespace)
|
343
|
+
add_token(:T_ELEM_NS, namespace)
|
344
|
+
end
|
345
|
+
|
346
|
+
##
|
347
|
+
# Called on the closing `>` of the open tag of an element.
|
348
|
+
#
|
349
|
+
def on_element_open_end
|
350
|
+
if html? and HTML_VOID_ELEMENTS.include?(current_element)
|
351
|
+
add_token(:T_ELEM_END)
|
352
|
+
@elements.pop
|
353
|
+
end
|
354
|
+
end
|
355
|
+
|
356
|
+
##
|
357
|
+
# Called on the closing tag of an element.
|
358
|
+
#
|
359
|
+
def on_element_end
|
360
|
+
add_token(:T_ELEM_END)
|
361
|
+
|
362
|
+
@elements.pop if html?
|
363
|
+
end
|
364
|
+
|
365
|
+
##
|
366
|
+
# Called on regular text values.
|
367
|
+
#
|
368
|
+
# @param [String] value
|
369
|
+
#
|
370
|
+
def on_text(value)
|
371
|
+
unless value.empty?
|
372
|
+
add_token(:T_TEXT, value)
|
373
|
+
|
374
|
+
lines = value.count("\n")
|
375
|
+
|
376
|
+
advance_line(lines) if lines > 0
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
##
|
381
|
+
# Called on attribute namespaces.
|
382
|
+
#
|
383
|
+
# @param [String] value
|
384
|
+
#
|
385
|
+
def on_attribute_ns(value)
|
386
|
+
add_token(:T_ATTR_NS, value)
|
387
|
+
end
|
388
|
+
|
389
|
+
##
|
390
|
+
# Called on tag attributes.
|
391
|
+
#
|
392
|
+
# @param [String] value
|
393
|
+
#
|
394
|
+
def on_attribute(value)
|
395
|
+
add_token(:T_ATTR, value)
|
396
|
+
end
|
397
|
+
end # Lexer
|
398
|
+
end # XML
|
399
|
+
end # Oga
|