oga 0.1.1-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +13 -0
- data/LICENSE +19 -0
- data/README.md +179 -0
- data/doc/DCO.md +25 -0
- data/doc/changelog.md +20 -0
- data/doc/css/common.css +76 -0
- data/doc/migrating_from_nokogiri.md +169 -0
- data/ext/c/extconf.rb +13 -0
- data/ext/c/lexer.c +1518 -0
- data/ext/c/lexer.h +8 -0
- data/ext/c/lexer.rl +121 -0
- data/ext/c/liboga.c +6 -0
- data/ext/c/liboga.h +11 -0
- data/ext/java/Liboga.java +14 -0
- data/ext/java/org/liboga/xml/Lexer.java +829 -0
- data/ext/java/org/liboga/xml/Lexer.rl +151 -0
- data/ext/ragel/base_lexer.rl +323 -0
- data/lib/liboga.jar +0 -0
- data/lib/oga.rb +43 -0
- data/lib/oga/html/parser.rb +25 -0
- data/lib/oga/oga.rb +27 -0
- data/lib/oga/version.rb +3 -0
- data/lib/oga/xml/attribute.rb +111 -0
- data/lib/oga/xml/cdata.rb +17 -0
- data/lib/oga/xml/character_node.rb +39 -0
- data/lib/oga/xml/comment.rb +17 -0
- data/lib/oga/xml/doctype.rb +84 -0
- data/lib/oga/xml/document.rb +99 -0
- data/lib/oga/xml/element.rb +331 -0
- data/lib/oga/xml/lexer.rb +399 -0
- data/lib/oga/xml/namespace.rb +42 -0
- data/lib/oga/xml/node.rb +168 -0
- data/lib/oga/xml/node_set.rb +313 -0
- data/lib/oga/xml/parser.rb +556 -0
- data/lib/oga/xml/processing_instruction.rb +39 -0
- data/lib/oga/xml/pull_parser.rb +180 -0
- data/lib/oga/xml/querying.rb +32 -0
- data/lib/oga/xml/text.rb +11 -0
- data/lib/oga/xml/traversal.rb +48 -0
- data/lib/oga/xml/xml_declaration.rb +69 -0
- data/lib/oga/xpath/evaluator.rb +1748 -0
- data/lib/oga/xpath/lexer.rb +2043 -0
- data/lib/oga/xpath/node.rb +10 -0
- data/lib/oga/xpath/parser.rb +537 -0
- data/oga.gemspec +45 -0
- metadata +221 -0
@@ -0,0 +1,42 @@
|
|
1
|
+
module Oga
|
2
|
+
module XML
|
3
|
+
##
|
4
|
+
# The Namespace class contains information about XML namespaces such as the
|
5
|
+
# name and URI.
|
6
|
+
#
|
7
|
+
# @!attribute [r] name
|
8
|
+
# @return [String]
|
9
|
+
#
|
10
|
+
# @!attribute [r] uri
|
11
|
+
# @return [String]
|
12
|
+
#
|
13
|
+
class Namespace
|
14
|
+
attr_accessor :name, :uri
|
15
|
+
|
16
|
+
##
|
17
|
+
# @param [Hash] options
|
18
|
+
#
|
19
|
+
# @option options [String] :name
|
20
|
+
# @option options [String] :uri
|
21
|
+
#
|
22
|
+
def initialize(options = {})
|
23
|
+
@name = options[:name]
|
24
|
+
@uri = options[:uri]
|
25
|
+
end
|
26
|
+
|
27
|
+
##
|
28
|
+
# @return [String]
|
29
|
+
#
|
30
|
+
def to_s
|
31
|
+
return name.to_s
|
32
|
+
end
|
33
|
+
|
34
|
+
##
|
35
|
+
# @return [String]
|
36
|
+
#
|
37
|
+
def inspect
|
38
|
+
return "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
|
39
|
+
end
|
40
|
+
end # Namespace
|
41
|
+
end # XML
|
42
|
+
end # Oga
|
data/lib/oga/xml/node.rb
ADDED
@@ -0,0 +1,168 @@
|
|
1
|
+
module Oga
|
2
|
+
module XML
|
3
|
+
##
|
4
|
+
# A generic XML node. Instances of this class can belong to a
|
5
|
+
# {Oga::XML::NodeSet} and can be used to query surrounding and parent
|
6
|
+
# nodes.
|
7
|
+
#
|
8
|
+
# @!attribute [rw] node_set
|
9
|
+
# @return [Oga::XML::NodeSet]
|
10
|
+
#
|
11
|
+
class Node
|
12
|
+
include Traversal
|
13
|
+
|
14
|
+
attr_accessor :node_set
|
15
|
+
|
16
|
+
##
|
17
|
+
# @param [Hash] options
|
18
|
+
#
|
19
|
+
# @option options [Oga::XML::NodeSet] :node_set The node set that this
|
20
|
+
# node belongs to.
|
21
|
+
#
|
22
|
+
# @option options [Oga::XML::NodeSet|Array] :children The child nodes of
|
23
|
+
# the current node.
|
24
|
+
#
|
25
|
+
def initialize(options = {})
|
26
|
+
@node_set = options[:node_set]
|
27
|
+
|
28
|
+
self.children = options[:children] if options[:children]
|
29
|
+
end
|
30
|
+
|
31
|
+
##
|
32
|
+
# Returns the child nodes of the current node.
|
33
|
+
#
|
34
|
+
# @return [Oga::XML::NodeSet]
|
35
|
+
#
|
36
|
+
def children
|
37
|
+
return @children ||= NodeSet.new([], self)
|
38
|
+
end
|
39
|
+
|
40
|
+
##
|
41
|
+
# Sets the child nodes of the element.
|
42
|
+
#
|
43
|
+
# @param [Oga::XML::NodeSet|Array] nodes
|
44
|
+
#
|
45
|
+
def children=(nodes)
|
46
|
+
if nodes.is_a?(NodeSet)
|
47
|
+
@children = nodes
|
48
|
+
else
|
49
|
+
@children = NodeSet.new(nodes, self)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Returns the parent node of the current node. If there is no parent node
|
55
|
+
# `nil` is returned instead.
|
56
|
+
#
|
57
|
+
# @return [Oga::XML::Node]
|
58
|
+
#
|
59
|
+
def parent
|
60
|
+
return node_set ? node_set.owner : nil
|
61
|
+
end
|
62
|
+
|
63
|
+
##
|
64
|
+
# Returns the preceding node, or nil if there is none.
|
65
|
+
#
|
66
|
+
# @return [Oga::XML::Node]
|
67
|
+
#
|
68
|
+
def previous
|
69
|
+
index = node_set.index(self) - 1
|
70
|
+
|
71
|
+
return index >= 0 ? node_set[index] : nil
|
72
|
+
end
|
73
|
+
|
74
|
+
##
|
75
|
+
# Returns the following node, or nil if there is none.
|
76
|
+
#
|
77
|
+
# @return [Oga::XML::Node]
|
78
|
+
#
|
79
|
+
def next
|
80
|
+
index = node_set.index(self) + 1
|
81
|
+
length = node_set.length
|
82
|
+
|
83
|
+
return index <= length ? node_set[index] : nil
|
84
|
+
end
|
85
|
+
|
86
|
+
##
|
87
|
+
# Returns the previous element node or nil if there is none.
|
88
|
+
#
|
89
|
+
# @return [Oga::XML::Element]
|
90
|
+
#
|
91
|
+
def previous_element
|
92
|
+
node = self
|
93
|
+
|
94
|
+
while node = node.previous
|
95
|
+
return node if node.is_a?(Element)
|
96
|
+
end
|
97
|
+
|
98
|
+
return
|
99
|
+
end
|
100
|
+
|
101
|
+
##
|
102
|
+
# Returns the next element node or nil if there is none.
|
103
|
+
#
|
104
|
+
# @return [Oga::XML::Element]
|
105
|
+
#
|
106
|
+
def next_element
|
107
|
+
node = self
|
108
|
+
|
109
|
+
while node = node.next
|
110
|
+
return node if node.is_a?(Element)
|
111
|
+
end
|
112
|
+
|
113
|
+
return
|
114
|
+
end
|
115
|
+
|
116
|
+
##
|
117
|
+
# Returns the root document/node of the current node. The node is
|
118
|
+
# retrieved by traversing upwards in the DOM tree from the current node.
|
119
|
+
#
|
120
|
+
# @return [Oga::XML::Document|Oga::XML::Node]
|
121
|
+
#
|
122
|
+
def root_node
|
123
|
+
node = self
|
124
|
+
|
125
|
+
loop do
|
126
|
+
if !node.is_a?(Document) and node.node_set
|
127
|
+
node = node.node_set.owner
|
128
|
+
else
|
129
|
+
break
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
return node
|
134
|
+
end
|
135
|
+
|
136
|
+
##
|
137
|
+
# Removes the current node from the owning node set.
|
138
|
+
#
|
139
|
+
# @return [Oga::XML::Node]
|
140
|
+
#
|
141
|
+
def remove
|
142
|
+
return node_set.delete(self) if node_set
|
143
|
+
end
|
144
|
+
|
145
|
+
##
|
146
|
+
# Inserts the given node before the current node.
|
147
|
+
#
|
148
|
+
# @param [Oga::XML::Node] other
|
149
|
+
#
|
150
|
+
def before(other)
|
151
|
+
index = node_set.index(self)
|
152
|
+
|
153
|
+
node_set.insert(index, other)
|
154
|
+
end
|
155
|
+
|
156
|
+
##
|
157
|
+
# Inserts the given node after the current node.
|
158
|
+
#
|
159
|
+
# @param [Oga::XML::Node] other
|
160
|
+
#
|
161
|
+
def after(other)
|
162
|
+
index = node_set.index(self) + 1
|
163
|
+
|
164
|
+
node_set.insert(index, other)
|
165
|
+
end
|
166
|
+
end # Element
|
167
|
+
end # XML
|
168
|
+
end # Oga
|
@@ -0,0 +1,313 @@
|
|
1
|
+
module Oga
|
2
|
+
module XML
|
3
|
+
##
|
4
|
+
# The NodeSet class contains a set of unique {Oga::XML::Node} instances that
|
5
|
+
# can be queried and modified. Optionally NodeSet instances can take
|
6
|
+
# ownership of a node (besides just containing it). This allows the nodes to
|
7
|
+
# query their previous and next elements.
|
8
|
+
#
|
9
|
+
# There are two types of sets:
|
10
|
+
#
|
11
|
+
# 1. Regular node sets
|
12
|
+
# 2. Owned node sets
|
13
|
+
#
|
14
|
+
# Both behave similar to Ruby's Array class. The difference between an
|
15
|
+
# owned and regular node set is that an owned set modifies nodes that are
|
16
|
+
# added or removed by certain operations. For example, when a node is added
|
17
|
+
# to an owned set the `node_set` attribute of said node points to the set
|
18
|
+
# it was just added to.
|
19
|
+
#
|
20
|
+
# Owned node sets are used when building a DOM tree with
|
21
|
+
# {Oga::XML::Parser}. By taking ownership of nodes in a set Oga makes it
|
22
|
+
# possible to use these sets as following:
|
23
|
+
#
|
24
|
+
# document = Oga::XML::Document.new
|
25
|
+
# element = Oga::XML::Element.new
|
26
|
+
#
|
27
|
+
# document.children << element
|
28
|
+
#
|
29
|
+
# element.node_set == document.children # => true
|
30
|
+
#
|
31
|
+
# If ownership was not handled then you'd have to manually set the
|
32
|
+
# `element` variable's `node_set` attribute after pushing it into a set.
|
33
|
+
#
|
34
|
+
# @!attribute [rw] owner
|
35
|
+
# @return [Oga::XML::Node]
|
36
|
+
#
|
37
|
+
class NodeSet
|
38
|
+
include Enumerable
|
39
|
+
|
40
|
+
attr_accessor :owner
|
41
|
+
|
42
|
+
##
|
43
|
+
# @param [Array] nodes The nodes to add to the set.
|
44
|
+
# @param [Oga::XML::NodeSet] owner The owner of the set.
|
45
|
+
#
|
46
|
+
def initialize(nodes = [], owner = nil)
|
47
|
+
@nodes = nodes.uniq
|
48
|
+
@owner = owner
|
49
|
+
|
50
|
+
@nodes.each { |node| take_ownership(node) }
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Yields the supplied block for every node.
|
55
|
+
#
|
56
|
+
# @yieldparam [Oga::XML::Node]
|
57
|
+
#
|
58
|
+
def each
|
59
|
+
@nodes.each { |node| yield node }
|
60
|
+
end
|
61
|
+
|
62
|
+
##
|
63
|
+
# Returns the last node in the set.
|
64
|
+
#
|
65
|
+
# @return [Oga::XML::Node]
|
66
|
+
#
|
67
|
+
def last
|
68
|
+
return @nodes[-1]
|
69
|
+
end
|
70
|
+
|
71
|
+
##
|
72
|
+
# Returns `true` if the set is empty.
|
73
|
+
#
|
74
|
+
# @return [TrueClass|FalseClass]
|
75
|
+
#
|
76
|
+
def empty?
|
77
|
+
return @nodes.empty?
|
78
|
+
end
|
79
|
+
|
80
|
+
##
|
81
|
+
# Returns the amount of nodes in the set.
|
82
|
+
#
|
83
|
+
# @return [Fixnum]
|
84
|
+
#
|
85
|
+
def length
|
86
|
+
return @nodes.length
|
87
|
+
end
|
88
|
+
|
89
|
+
alias_method :count, :length
|
90
|
+
alias_method :size, :length
|
91
|
+
|
92
|
+
##
|
93
|
+
# Returns the index of the given node.
|
94
|
+
#
|
95
|
+
# @param [Oga::XML::Node] node
|
96
|
+
# @return [Fixnum]
|
97
|
+
#
|
98
|
+
def index(node)
|
99
|
+
return @nodes.index(node)
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# Pushes the node at the end of the set.
|
104
|
+
#
|
105
|
+
# @param [Oga::XML::Node] node
|
106
|
+
#
|
107
|
+
def push(node)
|
108
|
+
return if @nodes.include?(node)
|
109
|
+
|
110
|
+
@nodes << node
|
111
|
+
|
112
|
+
take_ownership(node)
|
113
|
+
end
|
114
|
+
|
115
|
+
alias_method :<<, :push
|
116
|
+
|
117
|
+
##
|
118
|
+
# Pushes the node at the start of the set.
|
119
|
+
#
|
120
|
+
# @param [Oga::XML::Node] node
|
121
|
+
#
|
122
|
+
def unshift(node)
|
123
|
+
return if @nodes.include?(node)
|
124
|
+
|
125
|
+
@nodes.unshift(node)
|
126
|
+
|
127
|
+
take_ownership(node)
|
128
|
+
end
|
129
|
+
|
130
|
+
##
|
131
|
+
# Shifts a node from the start of the set.
|
132
|
+
#
|
133
|
+
# @return [Oga::XML::Node]
|
134
|
+
#
|
135
|
+
def shift
|
136
|
+
node = @nodes.shift
|
137
|
+
|
138
|
+
remove_ownership(node)
|
139
|
+
|
140
|
+
return node
|
141
|
+
end
|
142
|
+
|
143
|
+
##
|
144
|
+
# Pops a node from the end of the set.
|
145
|
+
#
|
146
|
+
# @return [Oga::XML::Node]
|
147
|
+
#
|
148
|
+
def pop
|
149
|
+
node = @nodes.pop
|
150
|
+
|
151
|
+
remove_ownership(node)
|
152
|
+
|
153
|
+
return node
|
154
|
+
end
|
155
|
+
|
156
|
+
##
|
157
|
+
# Inserts a node into the set at the given index.
|
158
|
+
#
|
159
|
+
# @param [Fixnum] index The index to insert the node at.
|
160
|
+
# @param [Oga::XML::Node] node
|
161
|
+
#
|
162
|
+
def insert(index, node)
|
163
|
+
return if @nodes.include?(node)
|
164
|
+
|
165
|
+
@nodes.insert(index, node)
|
166
|
+
|
167
|
+
take_ownership(node)
|
168
|
+
end
|
169
|
+
|
170
|
+
##
|
171
|
+
# Returns the node for the given index.
|
172
|
+
#
|
173
|
+
# @param [Fixnum] index
|
174
|
+
# @return [Oga::XML::Node]
|
175
|
+
#
|
176
|
+
def [](index)
|
177
|
+
return @nodes[index]
|
178
|
+
end
|
179
|
+
|
180
|
+
##
|
181
|
+
# Converts the current set to an Array.
|
182
|
+
#
|
183
|
+
# @return [Array]
|
184
|
+
#
|
185
|
+
def to_a
|
186
|
+
return @nodes
|
187
|
+
end
|
188
|
+
|
189
|
+
##
|
190
|
+
# Creates a new set based on the current and the specified set. The newly
|
191
|
+
# created set does not inherit ownership rules of the current set.
|
192
|
+
#
|
193
|
+
# @param [Oga::XML::NodeSet] other
|
194
|
+
# @return [Oga::XML::NodeSet]
|
195
|
+
#
|
196
|
+
def +(other)
|
197
|
+
return self.class.new(to_a | other.to_a)
|
198
|
+
end
|
199
|
+
|
200
|
+
##
|
201
|
+
# Adds the nodes of the given node set to the current node set.
|
202
|
+
#
|
203
|
+
# @param [Oga::XML::NodeSet] other
|
204
|
+
#
|
205
|
+
def concat(other)
|
206
|
+
other.each { |node| push(node) }
|
207
|
+
end
|
208
|
+
|
209
|
+
##
|
210
|
+
# Removes the current nodes from their owning set. The nodes are *not*
|
211
|
+
# removed from the current set.
|
212
|
+
#
|
213
|
+
# This method is intended to remove nodes from an XML document/node.
|
214
|
+
#
|
215
|
+
def remove
|
216
|
+
sets = []
|
217
|
+
|
218
|
+
# First we gather all the sets to remove nodse from, then we remove the
|
219
|
+
# actual nodes. This is done as you can not reliably remove elements
|
220
|
+
# from an Array while iterating on that same Array.
|
221
|
+
@nodes.each do |node|
|
222
|
+
if node.node_set
|
223
|
+
sets << node.node_set
|
224
|
+
|
225
|
+
node.node_set = nil
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
sets.each do |set|
|
230
|
+
@nodes.each { |node| set.delete(node) }
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
##
|
235
|
+
# Removes a node from the current set only.
|
236
|
+
#
|
237
|
+
def delete(node)
|
238
|
+
removed = @nodes.delete(node)
|
239
|
+
|
240
|
+
remove_ownership(removed) if removed
|
241
|
+
|
242
|
+
return removed
|
243
|
+
end
|
244
|
+
|
245
|
+
##
|
246
|
+
# Returns the values of the given attribute.
|
247
|
+
#
|
248
|
+
# @param [String|Symbol] name The name of the attribute.
|
249
|
+
# @return [Array]
|
250
|
+
#
|
251
|
+
def attribute(name)
|
252
|
+
values = []
|
253
|
+
|
254
|
+
@nodes.each do |node|
|
255
|
+
if node.respond_to?(:attribute)
|
256
|
+
values << node.attribute(name)
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
return values
|
261
|
+
end
|
262
|
+
|
263
|
+
alias_method :attr, :attribute
|
264
|
+
|
265
|
+
##
|
266
|
+
# Returns the text of all nodes in the set, ignoring comment nodes.
|
267
|
+
#
|
268
|
+
# @return [String]
|
269
|
+
#
|
270
|
+
def text
|
271
|
+
text = ''
|
272
|
+
|
273
|
+
@nodes.each do |node|
|
274
|
+
if node.respond_to?(:text) and !node.is_a?(Comment)
|
275
|
+
text << node.text
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
return text
|
280
|
+
end
|
281
|
+
|
282
|
+
##
|
283
|
+
# @return [String]
|
284
|
+
#
|
285
|
+
def inspect
|
286
|
+
values = @nodes.map(&:inspect).join(', ')
|
287
|
+
|
288
|
+
return "NodeSet(#{values})"
|
289
|
+
end
|
290
|
+
|
291
|
+
private
|
292
|
+
|
293
|
+
##
|
294
|
+
# Takes ownership of the given node. This only occurs when the current
|
295
|
+
# set has an owner.
|
296
|
+
#
|
297
|
+
# @param [Oga::XML::Node] node
|
298
|
+
#
|
299
|
+
def take_ownership(node)
|
300
|
+
node.node_set = self if owner
|
301
|
+
end
|
302
|
+
|
303
|
+
##
|
304
|
+
# Removes ownership of the node if it belongs to the current set.
|
305
|
+
#
|
306
|
+
# @param [Oga::XML::Node] node
|
307
|
+
#
|
308
|
+
def remove_ownership(node)
|
309
|
+
node.node_set = nil if node.node_set == self
|
310
|
+
end
|
311
|
+
end # NodeSet
|
312
|
+
end # XML
|
313
|
+
end # Oga
|