oga 1.2.3 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc/css_selectors.md +1 -1
- data/lib/oga.rb +6 -1
- data/lib/oga/blacklist.rb +0 -10
- data/lib/oga/css/lexer.rb +530 -255
- data/lib/oga/css/parser.rb +232 -230
- data/lib/oga/entity_decoder.rb +0 -4
- data/lib/oga/html/entities.rb +0 -4
- data/lib/oga/html/parser.rb +0 -4
- data/lib/oga/html/sax_parser.rb +0 -4
- data/lib/oga/lru.rb +0 -26
- data/lib/oga/oga.rb +0 -8
- data/lib/oga/ruby/generator.rb +225 -0
- data/lib/oga/ruby/node.rb +189 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/whitelist.rb +0 -6
- data/lib/oga/xml/attribute.rb +13 -20
- data/lib/oga/xml/cdata.rb +0 -4
- data/lib/oga/xml/character_node.rb +0 -8
- data/lib/oga/xml/comment.rb +0 -4
- data/lib/oga/xml/default_namespace.rb +0 -2
- data/lib/oga/xml/doctype.rb +0 -8
- data/lib/oga/xml/document.rb +10 -14
- data/lib/oga/xml/element.rb +1 -52
- data/lib/oga/xml/entities.rb +0 -26
- data/lib/oga/xml/expanded_name.rb +12 -0
- data/lib/oga/xml/html_void_elements.rb +0 -2
- data/lib/oga/xml/lexer.rb +0 -86
- data/lib/oga/xml/namespace.rb +0 -10
- data/lib/oga/xml/node.rb +18 -34
- data/lib/oga/xml/node_set.rb +0 -50
- data/lib/oga/xml/parser.rb +13 -50
- data/lib/oga/xml/processing_instruction.rb +0 -8
- data/lib/oga/xml/pull_parser.rb +0 -18
- data/lib/oga/xml/querying.rb +58 -19
- data/lib/oga/xml/sax_parser.rb +0 -18
- data/lib/oga/xml/text.rb +0 -12
- data/lib/oga/xml/traversal.rb +0 -4
- data/lib/oga/xml/xml_declaration.rb +0 -8
- data/lib/oga/xpath/compiler.rb +1568 -0
- data/lib/oga/xpath/conversion.rb +102 -0
- data/lib/oga/xpath/lexer.rb +1844 -1238
- data/lib/oga/xpath/parser.rb +182 -153
- metadata +7 -3
- data/lib/oga/xpath/evaluator.rb +0 -1800
data/lib/oga/xml/document.rb
CHANGED
@@ -1,9 +1,7 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# Class used for storing information about an entire XML document. This
|
5
4
|
# includes the doctype, XML declaration, child nodes and more.
|
6
|
-
#
|
7
5
|
class Document
|
8
6
|
include Querying
|
9
7
|
include Traversal
|
@@ -18,14 +16,12 @@ module Oga
|
|
18
16
|
# @return [Symbol]
|
19
17
|
attr_reader :type
|
20
18
|
|
21
|
-
##
|
22
19
|
# @param [Hash] options
|
23
20
|
#
|
24
21
|
# @option options [Oga::XML::NodeSet] :children
|
25
22
|
# @option options [Oga::XML::Doctype] :doctype
|
26
23
|
# @option options [Oga::XML::XmlDeclaration] :xml_declaration
|
27
24
|
# @option options [Symbol] :type
|
28
|
-
#
|
29
25
|
def initialize(options = {})
|
30
26
|
@doctype = options[:doctype]
|
31
27
|
@xml_declaration = options[:xml_declaration]
|
@@ -34,18 +30,14 @@ module Oga
|
|
34
30
|
self.children = options[:children] if options[:children]
|
35
31
|
end
|
36
32
|
|
37
|
-
##
|
38
33
|
# @return [Oga::XML::NodeSet]
|
39
|
-
#
|
40
34
|
def children
|
41
35
|
@children ||= NodeSet.new([], self)
|
42
36
|
end
|
43
37
|
|
44
|
-
##
|
45
38
|
# Sets the child nodes of the document.
|
46
39
|
#
|
47
40
|
# @param [Oga::XML::NodeSet|Array] nodes
|
48
|
-
#
|
49
41
|
def children=(nodes)
|
50
42
|
if nodes.is_a?(NodeSet)
|
51
43
|
@children = nodes
|
@@ -54,11 +46,19 @@ module Oga
|
|
54
46
|
end
|
55
47
|
end
|
56
48
|
|
57
|
-
|
49
|
+
# Returns self.
|
50
|
+
#
|
51
|
+
# This method exists to make this class compatible with Element, which in
|
52
|
+
# turn makes it easier to use both in the XPath compiler.
|
53
|
+
#
|
54
|
+
# @return [Oga::XML::Document]
|
55
|
+
def root_node
|
56
|
+
self
|
57
|
+
end
|
58
|
+
|
58
59
|
# Converts the document and its child nodes to XML.
|
59
60
|
#
|
60
61
|
# @return [String]
|
61
|
-
#
|
62
62
|
def to_xml
|
63
63
|
xml = children.map(&:to_xml).join('')
|
64
64
|
|
@@ -73,19 +73,15 @@ module Oga
|
|
73
73
|
xml
|
74
74
|
end
|
75
75
|
|
76
|
-
##
|
77
76
|
# @return [TrueClass|FalseClass]
|
78
|
-
#
|
79
77
|
def html?
|
80
78
|
type.equal?(:html)
|
81
79
|
end
|
82
80
|
|
83
|
-
##
|
84
81
|
# Inspects the document and its child nodes. Child nodes are indented for
|
85
82
|
# each nesting level.
|
86
83
|
#
|
87
84
|
# @return [String]
|
88
|
-
#
|
89
85
|
def inspect
|
90
86
|
segments = []
|
91
87
|
|
data/lib/oga/xml/element.rb
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# Class that contains information about an XML element such as the name,
|
5
4
|
# attributes and child nodes.
|
6
|
-
#
|
7
5
|
class Element < Node
|
8
6
|
include Querying
|
7
|
+
include ExpandedName
|
9
8
|
|
10
9
|
# @return [String]
|
11
10
|
attr_reader :namespace_name
|
@@ -19,14 +18,11 @@ module Oga
|
|
19
18
|
# @return [Hash]
|
20
19
|
attr_writer :namespaces
|
21
20
|
|
22
|
-
##
|
23
21
|
# The attribute prefix/namespace used for registering element namespaces.
|
24
22
|
#
|
25
23
|
# @return [String]
|
26
|
-
#
|
27
24
|
XMLNS_PREFIX = 'xmlns'.freeze
|
28
25
|
|
29
|
-
##
|
30
26
|
# @param [Hash] options
|
31
27
|
#
|
32
28
|
# @option options [String] :name The name of the element.
|
@@ -35,7 +31,6 @@ module Oga
|
|
35
31
|
#
|
36
32
|
# @option options [Array<Oga::XML::Attribute>] :attributes The attributes
|
37
33
|
# of the element as an Array.
|
38
|
-
#
|
39
34
|
def initialize(options = {})
|
40
35
|
super
|
41
36
|
|
@@ -48,15 +43,12 @@ module Oga
|
|
48
43
|
register_namespaces_from_attributes
|
49
44
|
end
|
50
45
|
|
51
|
-
##
|
52
46
|
# @param [String] name
|
53
|
-
#
|
54
47
|
def namespace_name=(name)
|
55
48
|
@namespace_name = name
|
56
49
|
@namespace = nil
|
57
50
|
end
|
58
51
|
|
59
|
-
##
|
60
52
|
# Returns an attribute matching the given name (with or without the
|
61
53
|
# namespace).
|
62
54
|
#
|
@@ -71,7 +63,6 @@ module Oga
|
|
71
63
|
# of the attribute.
|
72
64
|
#
|
73
65
|
# @return [Oga::XML::Attribute]
|
74
|
-
#
|
75
66
|
def attribute(name)
|
76
67
|
name, ns = split_name(name)
|
77
68
|
|
@@ -84,32 +75,27 @@ module Oga
|
|
84
75
|
|
85
76
|
alias_method :attr, :attribute
|
86
77
|
|
87
|
-
##
|
88
78
|
# Returns the value of the given attribute.
|
89
79
|
#
|
90
80
|
# @example
|
91
81
|
# element.get('class') # => "container"
|
92
82
|
#
|
93
83
|
# @see [#attribute]
|
94
|
-
#
|
95
84
|
def get(name)
|
96
85
|
found = attribute(name)
|
97
86
|
|
98
87
|
found ? found.value : nil
|
99
88
|
end
|
100
89
|
|
101
|
-
##
|
102
90
|
# Adds a new attribute to the element.
|
103
91
|
#
|
104
92
|
# @param [Oga::XML::Attribute] attribute
|
105
|
-
#
|
106
93
|
def add_attribute(attribute)
|
107
94
|
attribute.element = self
|
108
95
|
|
109
96
|
attributes << attribute
|
110
97
|
end
|
111
98
|
|
112
|
-
##
|
113
99
|
# Sets the value of an attribute to the given value. If the attribute does
|
114
100
|
# not exist it is created automatically.
|
115
101
|
#
|
@@ -117,7 +103,6 @@ module Oga
|
|
117
103
|
# namespace.
|
118
104
|
#
|
119
105
|
# @param [String] value The new value of the attribute.
|
120
|
-
#
|
121
106
|
def set(name, value)
|
122
107
|
found = attribute(name)
|
123
108
|
|
@@ -140,25 +125,21 @@ module Oga
|
|
140
125
|
end
|
141
126
|
end
|
142
127
|
|
143
|
-
##
|
144
128
|
# Removes an attribute from the element.
|
145
129
|
#
|
146
130
|
# @param [String] name The name (optionally including namespace prefix)
|
147
131
|
# of the attribute to remove.
|
148
132
|
#
|
149
133
|
# @return [Oga::XML::Attribute]
|
150
|
-
#
|
151
134
|
def unset(name)
|
152
135
|
found = attribute(name)
|
153
136
|
|
154
137
|
return attributes.delete(found) if found
|
155
138
|
end
|
156
139
|
|
157
|
-
##
|
158
140
|
# Returns the namespace of the element.
|
159
141
|
#
|
160
142
|
# @return [Oga::XML::Namespace]
|
161
|
-
#
|
162
143
|
def namespace
|
163
144
|
unless @namespace
|
164
145
|
available = available_namespaces
|
@@ -168,40 +149,32 @@ module Oga
|
|
168
149
|
@namespace
|
169
150
|
end
|
170
151
|
|
171
|
-
##
|
172
152
|
# Returns the namespaces registered on this element, or an empty Hash in
|
173
153
|
# case of an HTML element.
|
174
154
|
#
|
175
155
|
# @return [Hash]
|
176
|
-
#
|
177
156
|
def namespaces
|
178
157
|
html? ? {} : @namespaces
|
179
158
|
end
|
180
159
|
|
181
|
-
##
|
182
160
|
# Returns true if the current element resides in the default XML
|
183
161
|
# namespace.
|
184
162
|
#
|
185
163
|
# @return [TrueClass|FalseClass]
|
186
|
-
#
|
187
164
|
def default_namespace?
|
188
165
|
namespace == DEFAULT_NAMESPACE || namespace.nil?
|
189
166
|
end
|
190
167
|
|
191
|
-
##
|
192
168
|
# Returns the text of all child nodes joined together.
|
193
169
|
#
|
194
170
|
# @return [String]
|
195
|
-
#
|
196
171
|
def text
|
197
172
|
children.text
|
198
173
|
end
|
199
174
|
|
200
|
-
##
|
201
175
|
# Returns the text of the current element only.
|
202
176
|
#
|
203
177
|
# @return [String]
|
204
|
-
#
|
205
178
|
def inner_text
|
206
179
|
text = ''
|
207
180
|
|
@@ -212,12 +185,10 @@ module Oga
|
|
212
185
|
text
|
213
186
|
end
|
214
187
|
|
215
|
-
##
|
216
188
|
# Returns any {Oga::XML::Text} nodes that are a direct child of this
|
217
189
|
# element.
|
218
190
|
#
|
219
191
|
# @return [Oga::XML::NodeSet]
|
220
|
-
#
|
221
192
|
def text_nodes
|
222
193
|
nodes = NodeSet.new
|
223
194
|
|
@@ -228,21 +199,17 @@ module Oga
|
|
228
199
|
nodes
|
229
200
|
end
|
230
201
|
|
231
|
-
##
|
232
202
|
# Sets the inner text of the current element to the given String.
|
233
203
|
#
|
234
204
|
# @param [String] text
|
235
|
-
#
|
236
205
|
def inner_text=(text)
|
237
206
|
text_node = XML::Text.new(:text => text)
|
238
207
|
@children = NodeSet.new([text_node], self)
|
239
208
|
end
|
240
209
|
|
241
|
-
##
|
242
210
|
# Converts the element and its child elements to XML.
|
243
211
|
#
|
244
212
|
# @return [String]
|
245
|
-
#
|
246
213
|
def to_xml
|
247
214
|
if namespace_name
|
248
215
|
full_name = "#{namespace_name}:#{name}"
|
@@ -264,9 +231,7 @@ module Oga
|
|
264
231
|
end
|
265
232
|
end
|
266
233
|
|
267
|
-
##
|
268
234
|
# @return [String]
|
269
|
-
#
|
270
235
|
def inspect
|
271
236
|
segments = []
|
272
237
|
|
@@ -283,7 +248,6 @@ module Oga
|
|
283
248
|
"Element(#{segments.join(' ')})"
|
284
249
|
end
|
285
250
|
|
286
|
-
##
|
287
251
|
# Registers a new namespace for the current element and its child
|
288
252
|
# elements.
|
289
253
|
#
|
@@ -291,7 +255,6 @@ module Oga
|
|
291
255
|
# @param [String] uri
|
292
256
|
# @param [TrueClass|FalseClass] flush
|
293
257
|
# @see [Oga::XML::Namespace#initialize]
|
294
|
-
#
|
295
258
|
def register_namespace(name, uri, flush = true)
|
296
259
|
if namespaces[name]
|
297
260
|
raise ArgumentError, "The namespace #{name.inspect} already exists"
|
@@ -302,12 +265,10 @@ module Oga
|
|
302
265
|
flush_namespaces_cache if flush
|
303
266
|
end
|
304
267
|
|
305
|
-
##
|
306
268
|
# Returns a Hash containing all the namespaces available to the current
|
307
269
|
# element.
|
308
270
|
#
|
309
271
|
# @return [Hash]
|
310
|
-
#
|
311
272
|
def available_namespaces
|
312
273
|
# HTML(5) completely ignores namespaces
|
313
274
|
unless @available_namespaces
|
@@ -332,11 +293,9 @@ module Oga
|
|
332
293
|
@available_namespaces
|
333
294
|
end
|
334
295
|
|
335
|
-
##
|
336
296
|
# Returns `true` if the element is a self-closing element.
|
337
297
|
#
|
338
298
|
# @return [TrueClass|FalseClass]
|
339
|
-
#
|
340
299
|
def self_closing?
|
341
300
|
self_closing = children.empty?
|
342
301
|
root = root_node
|
@@ -349,10 +308,8 @@ module Oga
|
|
349
308
|
self_closing
|
350
309
|
end
|
351
310
|
|
352
|
-
##
|
353
311
|
# Flushes the namespaces cache of the current element and all its child
|
354
312
|
# elements.
|
355
|
-
#
|
356
313
|
def flush_namespaces_cache
|
357
314
|
@available_namespaces = nil
|
358
315
|
@namespace = nil
|
@@ -364,9 +321,7 @@ module Oga
|
|
364
321
|
|
365
322
|
private
|
366
323
|
|
367
|
-
##
|
368
324
|
# Registers namespaces based on any "xmlns" attributes.
|
369
|
-
#
|
370
325
|
def register_namespaces_from_attributes
|
371
326
|
flush = false
|
372
327
|
|
@@ -385,31 +340,25 @@ module Oga
|
|
385
340
|
flush_namespaces_cache if flush
|
386
341
|
end
|
387
342
|
|
388
|
-
##
|
389
343
|
# Links all attributes to the current element.
|
390
|
-
#
|
391
344
|
def link_attributes
|
392
345
|
attributes.each do |attr|
|
393
346
|
attr.element = self
|
394
347
|
end
|
395
348
|
end
|
396
349
|
|
397
|
-
##
|
398
350
|
# @param [String] name
|
399
351
|
# @return [Array]
|
400
|
-
#
|
401
352
|
def split_name(name)
|
402
353
|
segments = name.to_s.split(':')
|
403
354
|
|
404
355
|
[segments.pop, segments.pop]
|
405
356
|
end
|
406
357
|
|
407
|
-
##
|
408
358
|
# @param [Oga::XML::Attribute] attr
|
409
359
|
# @param [String] ns
|
410
360
|
# @param [String] name
|
411
361
|
# @return [TrueClass|FalseClass]
|
412
|
-
#
|
413
362
|
def attribute_matches?(attr, ns, name)
|
414
363
|
name_matches = attr.name == name
|
415
364
|
ns_matches = false
|
data/lib/oga/xml/entities.rb
CHANGED
@@ -1,18 +1,14 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# Module for encoding/decoding XML and HTML entities. The mapping of HTML
|
5
4
|
# entities can be found in {Oga::HTML::Entities::DECODE_MAPPING}.
|
6
|
-
#
|
7
5
|
module Entities
|
8
|
-
##
|
9
6
|
# Hash containing XML entities and the corresponding characters.
|
10
7
|
#
|
11
8
|
# The `&` mapping must come last to ensure proper conversion of non
|
12
9
|
# encoded to encoded forms (see {Oga::XML::Text#to_xml}).
|
13
10
|
#
|
14
11
|
# @return [Hash]
|
15
|
-
#
|
16
12
|
DECODE_MAPPING = {
|
17
13
|
'<' => '<',
|
18
14
|
'>' => '>',
|
@@ -21,23 +17,19 @@ module Oga
|
|
21
17
|
'&' => '&',
|
22
18
|
}
|
23
19
|
|
24
|
-
##
|
25
20
|
# Hash containing characters and the corresponding XML entities.
|
26
21
|
#
|
27
22
|
# @return [Hash]
|
28
|
-
#
|
29
23
|
ENCODE_MAPPING = {
|
30
24
|
'&' => '&',
|
31
25
|
'>' => '>',
|
32
26
|
'<' => '<',
|
33
27
|
}
|
34
28
|
|
35
|
-
##
|
36
29
|
# Hash containing characters and the corresponding XML entities to use
|
37
30
|
# when encoding XML/HTML attribute values.
|
38
31
|
#
|
39
32
|
# @return [Hash]
|
40
|
-
#
|
41
33
|
ENCODE_ATTRIBUTE_MAPPING = {
|
42
34
|
'&' => '&',
|
43
35
|
'>' => '>',
|
@@ -46,50 +38,36 @@ module Oga
|
|
46
38
|
'"' => '"'
|
47
39
|
}
|
48
40
|
|
49
|
-
##
|
50
41
|
# @return [String]
|
51
|
-
#
|
52
42
|
AMPERSAND = '&'.freeze
|
53
43
|
|
54
|
-
##
|
55
44
|
# Regexp for matching XML/HTML entities such as " ".
|
56
45
|
#
|
57
46
|
# @return [Regexp]
|
58
|
-
#
|
59
47
|
REGULAR_ENTITY = /&[a-zA-Z0-9]+;/
|
60
48
|
|
61
|
-
##
|
62
49
|
# Regexp for matching XML/HTML numeric entities such as "&".
|
63
50
|
#
|
64
51
|
# @return [Regexp]
|
65
|
-
#
|
66
52
|
NUMERIC_CODE_POINT_ENTITY = /&#(\d+);/
|
67
53
|
|
68
|
-
##
|
69
54
|
# Regexp for matching XML/HTML hex entities such as "<".
|
70
55
|
#
|
71
56
|
# @return [Regexp]
|
72
|
-
#
|
73
57
|
HEX_CODE_POINT_ENTITY = /&#x([a-fA-F0-9]+);/
|
74
58
|
|
75
|
-
##
|
76
59
|
# @return [Regexp]
|
77
|
-
#
|
78
60
|
ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|'))
|
79
61
|
|
80
|
-
##
|
81
62
|
# @return [Regexp]
|
82
|
-
#
|
83
63
|
ENCODE_ATTRIBUTE_REGEXP =
|
84
64
|
Regexp.new(ENCODE_ATTRIBUTE_MAPPING.keys.join('|'))
|
85
65
|
|
86
|
-
##
|
87
66
|
# Decodes XML entities.
|
88
67
|
#
|
89
68
|
# @param [String] input
|
90
69
|
# @param [Hash] mapping
|
91
70
|
# @return [String]
|
92
|
-
#
|
93
71
|
def self.decode(input, mapping = DECODE_MAPPING)
|
94
72
|
return input unless input.include?(AMPERSAND)
|
95
73
|
|
@@ -110,23 +88,19 @@ module Oga
|
|
110
88
|
input
|
111
89
|
end
|
112
90
|
|
113
|
-
##
|
114
91
|
# Encodes special characters as XML entities.
|
115
92
|
#
|
116
93
|
# @param [String] input
|
117
94
|
# @param [Hash] mapping
|
118
95
|
# @return [String]
|
119
|
-
#
|
120
96
|
def self.encode(input, mapping = ENCODE_MAPPING)
|
121
97
|
input.gsub(ENCODE_REGEXP, mapping)
|
122
98
|
end
|
123
99
|
|
124
|
-
##
|
125
100
|
# Encodes special characters in an XML attribute value.
|
126
101
|
#
|
127
102
|
# @param [String] input
|
128
103
|
# @return [String]
|
129
|
-
#
|
130
104
|
def self.encode_attribute(input)
|
131
105
|
input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING)
|
132
106
|
end
|