oga 1.2.3-java → 1.3.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/css_selectors.md +1 -1
- data/lib/liboga.jar +0 -0
- data/lib/oga.rb +6 -1
- data/lib/oga/blacklist.rb +0 -10
- data/lib/oga/css/lexer.rb +530 -255
- data/lib/oga/css/parser.rb +232 -230
- data/lib/oga/entity_decoder.rb +0 -4
- data/lib/oga/html/entities.rb +0 -4
- data/lib/oga/html/parser.rb +0 -4
- data/lib/oga/html/sax_parser.rb +0 -4
- data/lib/oga/lru.rb +0 -26
- data/lib/oga/oga.rb +0 -8
- data/lib/oga/ruby/generator.rb +225 -0
- data/lib/oga/ruby/node.rb +189 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/whitelist.rb +0 -6
- data/lib/oga/xml/attribute.rb +13 -20
- data/lib/oga/xml/cdata.rb +0 -4
- data/lib/oga/xml/character_node.rb +0 -8
- data/lib/oga/xml/comment.rb +0 -4
- data/lib/oga/xml/default_namespace.rb +0 -2
- data/lib/oga/xml/doctype.rb +0 -8
- data/lib/oga/xml/document.rb +10 -14
- data/lib/oga/xml/element.rb +1 -52
- data/lib/oga/xml/entities.rb +0 -26
- data/lib/oga/xml/expanded_name.rb +12 -0
- data/lib/oga/xml/html_void_elements.rb +0 -2
- data/lib/oga/xml/lexer.rb +0 -86
- data/lib/oga/xml/namespace.rb +0 -10
- data/lib/oga/xml/node.rb +18 -34
- data/lib/oga/xml/node_set.rb +0 -50
- data/lib/oga/xml/parser.rb +13 -50
- data/lib/oga/xml/processing_instruction.rb +0 -8
- data/lib/oga/xml/pull_parser.rb +0 -18
- data/lib/oga/xml/querying.rb +58 -19
- data/lib/oga/xml/sax_parser.rb +0 -18
- data/lib/oga/xml/text.rb +0 -12
- data/lib/oga/xml/traversal.rb +0 -4
- data/lib/oga/xml/xml_declaration.rb +0 -8
- data/lib/oga/xpath/compiler.rb +1568 -0
- data/lib/oga/xpath/conversion.rb +102 -0
- data/lib/oga/xpath/lexer.rb +1844 -1238
- data/lib/oga/xpath/parser.rb +182 -153
- metadata +7 -3
- data/lib/oga/xpath/evaluator.rb +0 -1800
data/lib/oga/xml/document.rb
CHANGED
@@ -1,9 +1,7 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# Class used for storing information about an entire XML document. This
|
5
4
|
# includes the doctype, XML declaration, child nodes and more.
|
6
|
-
#
|
7
5
|
class Document
|
8
6
|
include Querying
|
9
7
|
include Traversal
|
@@ -18,14 +16,12 @@ module Oga
|
|
18
16
|
# @return [Symbol]
|
19
17
|
attr_reader :type
|
20
18
|
|
21
|
-
##
|
22
19
|
# @param [Hash] options
|
23
20
|
#
|
24
21
|
# @option options [Oga::XML::NodeSet] :children
|
25
22
|
# @option options [Oga::XML::Doctype] :doctype
|
26
23
|
# @option options [Oga::XML::XmlDeclaration] :xml_declaration
|
27
24
|
# @option options [Symbol] :type
|
28
|
-
#
|
29
25
|
def initialize(options = {})
|
30
26
|
@doctype = options[:doctype]
|
31
27
|
@xml_declaration = options[:xml_declaration]
|
@@ -34,18 +30,14 @@ module Oga
|
|
34
30
|
self.children = options[:children] if options[:children]
|
35
31
|
end
|
36
32
|
|
37
|
-
##
|
38
33
|
# @return [Oga::XML::NodeSet]
|
39
|
-
#
|
40
34
|
def children
|
41
35
|
@children ||= NodeSet.new([], self)
|
42
36
|
end
|
43
37
|
|
44
|
-
##
|
45
38
|
# Sets the child nodes of the document.
|
46
39
|
#
|
47
40
|
# @param [Oga::XML::NodeSet|Array] nodes
|
48
|
-
#
|
49
41
|
def children=(nodes)
|
50
42
|
if nodes.is_a?(NodeSet)
|
51
43
|
@children = nodes
|
@@ -54,11 +46,19 @@ module Oga
|
|
54
46
|
end
|
55
47
|
end
|
56
48
|
|
57
|
-
|
49
|
+
# Returns self.
|
50
|
+
#
|
51
|
+
# This method exists to make this class compatible with Element, which in
|
52
|
+
# turn makes it easier to use both in the XPath compiler.
|
53
|
+
#
|
54
|
+
# @return [Oga::XML::Document]
|
55
|
+
def root_node
|
56
|
+
self
|
57
|
+
end
|
58
|
+
|
58
59
|
# Converts the document and its child nodes to XML.
|
59
60
|
#
|
60
61
|
# @return [String]
|
61
|
-
#
|
62
62
|
def to_xml
|
63
63
|
xml = children.map(&:to_xml).join('')
|
64
64
|
|
@@ -73,19 +73,15 @@ module Oga
|
|
73
73
|
xml
|
74
74
|
end
|
75
75
|
|
76
|
-
##
|
77
76
|
# @return [TrueClass|FalseClass]
|
78
|
-
#
|
79
77
|
def html?
|
80
78
|
type.equal?(:html)
|
81
79
|
end
|
82
80
|
|
83
|
-
##
|
84
81
|
# Inspects the document and its child nodes. Child nodes are indented for
|
85
82
|
# each nesting level.
|
86
83
|
#
|
87
84
|
# @return [String]
|
88
|
-
#
|
89
85
|
def inspect
|
90
86
|
segments = []
|
91
87
|
|
data/lib/oga/xml/element.rb
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# Class that contains information about an XML element such as the name,
|
5
4
|
# attributes and child nodes.
|
6
|
-
#
|
7
5
|
class Element < Node
|
8
6
|
include Querying
|
7
|
+
include ExpandedName
|
9
8
|
|
10
9
|
# @return [String]
|
11
10
|
attr_reader :namespace_name
|
@@ -19,14 +18,11 @@ module Oga
|
|
19
18
|
# @return [Hash]
|
20
19
|
attr_writer :namespaces
|
21
20
|
|
22
|
-
##
|
23
21
|
# The attribute prefix/namespace used for registering element namespaces.
|
24
22
|
#
|
25
23
|
# @return [String]
|
26
|
-
#
|
27
24
|
XMLNS_PREFIX = 'xmlns'.freeze
|
28
25
|
|
29
|
-
##
|
30
26
|
# @param [Hash] options
|
31
27
|
#
|
32
28
|
# @option options [String] :name The name of the element.
|
@@ -35,7 +31,6 @@ module Oga
|
|
35
31
|
#
|
36
32
|
# @option options [Array<Oga::XML::Attribute>] :attributes The attributes
|
37
33
|
# of the element as an Array.
|
38
|
-
#
|
39
34
|
def initialize(options = {})
|
40
35
|
super
|
41
36
|
|
@@ -48,15 +43,12 @@ module Oga
|
|
48
43
|
register_namespaces_from_attributes
|
49
44
|
end
|
50
45
|
|
51
|
-
##
|
52
46
|
# @param [String] name
|
53
|
-
#
|
54
47
|
def namespace_name=(name)
|
55
48
|
@namespace_name = name
|
56
49
|
@namespace = nil
|
57
50
|
end
|
58
51
|
|
59
|
-
##
|
60
52
|
# Returns an attribute matching the given name (with or without the
|
61
53
|
# namespace).
|
62
54
|
#
|
@@ -71,7 +63,6 @@ module Oga
|
|
71
63
|
# of the attribute.
|
72
64
|
#
|
73
65
|
# @return [Oga::XML::Attribute]
|
74
|
-
#
|
75
66
|
def attribute(name)
|
76
67
|
name, ns = split_name(name)
|
77
68
|
|
@@ -84,32 +75,27 @@ module Oga
|
|
84
75
|
|
85
76
|
alias_method :attr, :attribute
|
86
77
|
|
87
|
-
##
|
88
78
|
# Returns the value of the given attribute.
|
89
79
|
#
|
90
80
|
# @example
|
91
81
|
# element.get('class') # => "container"
|
92
82
|
#
|
93
83
|
# @see [#attribute]
|
94
|
-
#
|
95
84
|
def get(name)
|
96
85
|
found = attribute(name)
|
97
86
|
|
98
87
|
found ? found.value : nil
|
99
88
|
end
|
100
89
|
|
101
|
-
##
|
102
90
|
# Adds a new attribute to the element.
|
103
91
|
#
|
104
92
|
# @param [Oga::XML::Attribute] attribute
|
105
|
-
#
|
106
93
|
def add_attribute(attribute)
|
107
94
|
attribute.element = self
|
108
95
|
|
109
96
|
attributes << attribute
|
110
97
|
end
|
111
98
|
|
112
|
-
##
|
113
99
|
# Sets the value of an attribute to the given value. If the attribute does
|
114
100
|
# not exist it is created automatically.
|
115
101
|
#
|
@@ -117,7 +103,6 @@ module Oga
|
|
117
103
|
# namespace.
|
118
104
|
#
|
119
105
|
# @param [String] value The new value of the attribute.
|
120
|
-
#
|
121
106
|
def set(name, value)
|
122
107
|
found = attribute(name)
|
123
108
|
|
@@ -140,25 +125,21 @@ module Oga
|
|
140
125
|
end
|
141
126
|
end
|
142
127
|
|
143
|
-
##
|
144
128
|
# Removes an attribute from the element.
|
145
129
|
#
|
146
130
|
# @param [String] name The name (optionally including namespace prefix)
|
147
131
|
# of the attribute to remove.
|
148
132
|
#
|
149
133
|
# @return [Oga::XML::Attribute]
|
150
|
-
#
|
151
134
|
def unset(name)
|
152
135
|
found = attribute(name)
|
153
136
|
|
154
137
|
return attributes.delete(found) if found
|
155
138
|
end
|
156
139
|
|
157
|
-
##
|
158
140
|
# Returns the namespace of the element.
|
159
141
|
#
|
160
142
|
# @return [Oga::XML::Namespace]
|
161
|
-
#
|
162
143
|
def namespace
|
163
144
|
unless @namespace
|
164
145
|
available = available_namespaces
|
@@ -168,40 +149,32 @@ module Oga
|
|
168
149
|
@namespace
|
169
150
|
end
|
170
151
|
|
171
|
-
##
|
172
152
|
# Returns the namespaces registered on this element, or an empty Hash in
|
173
153
|
# case of an HTML element.
|
174
154
|
#
|
175
155
|
# @return [Hash]
|
176
|
-
#
|
177
156
|
def namespaces
|
178
157
|
html? ? {} : @namespaces
|
179
158
|
end
|
180
159
|
|
181
|
-
##
|
182
160
|
# Returns true if the current element resides in the default XML
|
183
161
|
# namespace.
|
184
162
|
#
|
185
163
|
# @return [TrueClass|FalseClass]
|
186
|
-
#
|
187
164
|
def default_namespace?
|
188
165
|
namespace == DEFAULT_NAMESPACE || namespace.nil?
|
189
166
|
end
|
190
167
|
|
191
|
-
##
|
192
168
|
# Returns the text of all child nodes joined together.
|
193
169
|
#
|
194
170
|
# @return [String]
|
195
|
-
#
|
196
171
|
def text
|
197
172
|
children.text
|
198
173
|
end
|
199
174
|
|
200
|
-
##
|
201
175
|
# Returns the text of the current element only.
|
202
176
|
#
|
203
177
|
# @return [String]
|
204
|
-
#
|
205
178
|
def inner_text
|
206
179
|
text = ''
|
207
180
|
|
@@ -212,12 +185,10 @@ module Oga
|
|
212
185
|
text
|
213
186
|
end
|
214
187
|
|
215
|
-
##
|
216
188
|
# Returns any {Oga::XML::Text} nodes that are a direct child of this
|
217
189
|
# element.
|
218
190
|
#
|
219
191
|
# @return [Oga::XML::NodeSet]
|
220
|
-
#
|
221
192
|
def text_nodes
|
222
193
|
nodes = NodeSet.new
|
223
194
|
|
@@ -228,21 +199,17 @@ module Oga
|
|
228
199
|
nodes
|
229
200
|
end
|
230
201
|
|
231
|
-
##
|
232
202
|
# Sets the inner text of the current element to the given String.
|
233
203
|
#
|
234
204
|
# @param [String] text
|
235
|
-
#
|
236
205
|
def inner_text=(text)
|
237
206
|
text_node = XML::Text.new(:text => text)
|
238
207
|
@children = NodeSet.new([text_node], self)
|
239
208
|
end
|
240
209
|
|
241
|
-
##
|
242
210
|
# Converts the element and its child elements to XML.
|
243
211
|
#
|
244
212
|
# @return [String]
|
245
|
-
#
|
246
213
|
def to_xml
|
247
214
|
if namespace_name
|
248
215
|
full_name = "#{namespace_name}:#{name}"
|
@@ -264,9 +231,7 @@ module Oga
|
|
264
231
|
end
|
265
232
|
end
|
266
233
|
|
267
|
-
##
|
268
234
|
# @return [String]
|
269
|
-
#
|
270
235
|
def inspect
|
271
236
|
segments = []
|
272
237
|
|
@@ -283,7 +248,6 @@ module Oga
|
|
283
248
|
"Element(#{segments.join(' ')})"
|
284
249
|
end
|
285
250
|
|
286
|
-
##
|
287
251
|
# Registers a new namespace for the current element and its child
|
288
252
|
# elements.
|
289
253
|
#
|
@@ -291,7 +255,6 @@ module Oga
|
|
291
255
|
# @param [String] uri
|
292
256
|
# @param [TrueClass|FalseClass] flush
|
293
257
|
# @see [Oga::XML::Namespace#initialize]
|
294
|
-
#
|
295
258
|
def register_namespace(name, uri, flush = true)
|
296
259
|
if namespaces[name]
|
297
260
|
raise ArgumentError, "The namespace #{name.inspect} already exists"
|
@@ -302,12 +265,10 @@ module Oga
|
|
302
265
|
flush_namespaces_cache if flush
|
303
266
|
end
|
304
267
|
|
305
|
-
##
|
306
268
|
# Returns a Hash containing all the namespaces available to the current
|
307
269
|
# element.
|
308
270
|
#
|
309
271
|
# @return [Hash]
|
310
|
-
#
|
311
272
|
def available_namespaces
|
312
273
|
# HTML(5) completely ignores namespaces
|
313
274
|
unless @available_namespaces
|
@@ -332,11 +293,9 @@ module Oga
|
|
332
293
|
@available_namespaces
|
333
294
|
end
|
334
295
|
|
335
|
-
##
|
336
296
|
# Returns `true` if the element is a self-closing element.
|
337
297
|
#
|
338
298
|
# @return [TrueClass|FalseClass]
|
339
|
-
#
|
340
299
|
def self_closing?
|
341
300
|
self_closing = children.empty?
|
342
301
|
root = root_node
|
@@ -349,10 +308,8 @@ module Oga
|
|
349
308
|
self_closing
|
350
309
|
end
|
351
310
|
|
352
|
-
##
|
353
311
|
# Flushes the namespaces cache of the current element and all its child
|
354
312
|
# elements.
|
355
|
-
#
|
356
313
|
def flush_namespaces_cache
|
357
314
|
@available_namespaces = nil
|
358
315
|
@namespace = nil
|
@@ -364,9 +321,7 @@ module Oga
|
|
364
321
|
|
365
322
|
private
|
366
323
|
|
367
|
-
##
|
368
324
|
# Registers namespaces based on any "xmlns" attributes.
|
369
|
-
#
|
370
325
|
def register_namespaces_from_attributes
|
371
326
|
flush = false
|
372
327
|
|
@@ -385,31 +340,25 @@ module Oga
|
|
385
340
|
flush_namespaces_cache if flush
|
386
341
|
end
|
387
342
|
|
388
|
-
##
|
389
343
|
# Links all attributes to the current element.
|
390
|
-
#
|
391
344
|
def link_attributes
|
392
345
|
attributes.each do |attr|
|
393
346
|
attr.element = self
|
394
347
|
end
|
395
348
|
end
|
396
349
|
|
397
|
-
##
|
398
350
|
# @param [String] name
|
399
351
|
# @return [Array]
|
400
|
-
#
|
401
352
|
def split_name(name)
|
402
353
|
segments = name.to_s.split(':')
|
403
354
|
|
404
355
|
[segments.pop, segments.pop]
|
405
356
|
end
|
406
357
|
|
407
|
-
##
|
408
358
|
# @param [Oga::XML::Attribute] attr
|
409
359
|
# @param [String] ns
|
410
360
|
# @param [String] name
|
411
361
|
# @return [TrueClass|FalseClass]
|
412
|
-
#
|
413
362
|
def attribute_matches?(attr, ns, name)
|
414
363
|
name_matches = attr.name == name
|
415
364
|
ns_matches = false
|
data/lib/oga/xml/entities.rb
CHANGED
@@ -1,18 +1,14 @@
|
|
1
1
|
module Oga
|
2
2
|
module XML
|
3
|
-
##
|
4
3
|
# Module for encoding/decoding XML and HTML entities. The mapping of HTML
|
5
4
|
# entities can be found in {Oga::HTML::Entities::DECODE_MAPPING}.
|
6
|
-
#
|
7
5
|
module Entities
|
8
|
-
##
|
9
6
|
# Hash containing XML entities and the corresponding characters.
|
10
7
|
#
|
11
8
|
# The `&` mapping must come last to ensure proper conversion of non
|
12
9
|
# encoded to encoded forms (see {Oga::XML::Text#to_xml}).
|
13
10
|
#
|
14
11
|
# @return [Hash]
|
15
|
-
#
|
16
12
|
DECODE_MAPPING = {
|
17
13
|
'<' => '<',
|
18
14
|
'>' => '>',
|
@@ -21,23 +17,19 @@ module Oga
|
|
21
17
|
'&' => '&',
|
22
18
|
}
|
23
19
|
|
24
|
-
##
|
25
20
|
# Hash containing characters and the corresponding XML entities.
|
26
21
|
#
|
27
22
|
# @return [Hash]
|
28
|
-
#
|
29
23
|
ENCODE_MAPPING = {
|
30
24
|
'&' => '&',
|
31
25
|
'>' => '>',
|
32
26
|
'<' => '<',
|
33
27
|
}
|
34
28
|
|
35
|
-
##
|
36
29
|
# Hash containing characters and the corresponding XML entities to use
|
37
30
|
# when encoding XML/HTML attribute values.
|
38
31
|
#
|
39
32
|
# @return [Hash]
|
40
|
-
#
|
41
33
|
ENCODE_ATTRIBUTE_MAPPING = {
|
42
34
|
'&' => '&',
|
43
35
|
'>' => '>',
|
@@ -46,50 +38,36 @@ module Oga
|
|
46
38
|
'"' => '"'
|
47
39
|
}
|
48
40
|
|
49
|
-
##
|
50
41
|
# @return [String]
|
51
|
-
#
|
52
42
|
AMPERSAND = '&'.freeze
|
53
43
|
|
54
|
-
##
|
55
44
|
# Regexp for matching XML/HTML entities such as " ".
|
56
45
|
#
|
57
46
|
# @return [Regexp]
|
58
|
-
#
|
59
47
|
REGULAR_ENTITY = /&[a-zA-Z0-9]+;/
|
60
48
|
|
61
|
-
##
|
62
49
|
# Regexp for matching XML/HTML numeric entities such as "&".
|
63
50
|
#
|
64
51
|
# @return [Regexp]
|
65
|
-
#
|
66
52
|
NUMERIC_CODE_POINT_ENTITY = /&#(\d+);/
|
67
53
|
|
68
|
-
##
|
69
54
|
# Regexp for matching XML/HTML hex entities such as "<".
|
70
55
|
#
|
71
56
|
# @return [Regexp]
|
72
|
-
#
|
73
57
|
HEX_CODE_POINT_ENTITY = /&#x([a-fA-F0-9]+);/
|
74
58
|
|
75
|
-
##
|
76
59
|
# @return [Regexp]
|
77
|
-
#
|
78
60
|
ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|'))
|
79
61
|
|
80
|
-
##
|
81
62
|
# @return [Regexp]
|
82
|
-
#
|
83
63
|
ENCODE_ATTRIBUTE_REGEXP =
|
84
64
|
Regexp.new(ENCODE_ATTRIBUTE_MAPPING.keys.join('|'))
|
85
65
|
|
86
|
-
##
|
87
66
|
# Decodes XML entities.
|
88
67
|
#
|
89
68
|
# @param [String] input
|
90
69
|
# @param [Hash] mapping
|
91
70
|
# @return [String]
|
92
|
-
#
|
93
71
|
def self.decode(input, mapping = DECODE_MAPPING)
|
94
72
|
return input unless input.include?(AMPERSAND)
|
95
73
|
|
@@ -110,23 +88,19 @@ module Oga
|
|
110
88
|
input
|
111
89
|
end
|
112
90
|
|
113
|
-
##
|
114
91
|
# Encodes special characters as XML entities.
|
115
92
|
#
|
116
93
|
# @param [String] input
|
117
94
|
# @param [Hash] mapping
|
118
95
|
# @return [String]
|
119
|
-
#
|
120
96
|
def self.encode(input, mapping = ENCODE_MAPPING)
|
121
97
|
input.gsub(ENCODE_REGEXP, mapping)
|
122
98
|
end
|
123
99
|
|
124
|
-
##
|
125
100
|
# Encodes special characters in an XML attribute value.
|
126
101
|
#
|
127
102
|
# @param [String] input
|
128
103
|
# @return [String]
|
129
|
-
#
|
130
104
|
def self.encode_attribute(input)
|
131
105
|
input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING)
|
132
106
|
end
|