oga 0.1.1-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +13 -0
- data/LICENSE +19 -0
- data/README.md +179 -0
- data/doc/DCO.md +25 -0
- data/doc/changelog.md +20 -0
- data/doc/css/common.css +76 -0
- data/doc/migrating_from_nokogiri.md +169 -0
- data/ext/c/extconf.rb +13 -0
- data/ext/c/lexer.c +1518 -0
- data/ext/c/lexer.h +8 -0
- data/ext/c/lexer.rl +121 -0
- data/ext/c/liboga.c +6 -0
- data/ext/c/liboga.h +11 -0
- data/ext/java/Liboga.java +14 -0
- data/ext/java/org/liboga/xml/Lexer.java +829 -0
- data/ext/java/org/liboga/xml/Lexer.rl +151 -0
- data/ext/ragel/base_lexer.rl +323 -0
- data/lib/liboga.jar +0 -0
- data/lib/oga.rb +43 -0
- data/lib/oga/html/parser.rb +25 -0
- data/lib/oga/oga.rb +27 -0
- data/lib/oga/version.rb +3 -0
- data/lib/oga/xml/attribute.rb +111 -0
- data/lib/oga/xml/cdata.rb +17 -0
- data/lib/oga/xml/character_node.rb +39 -0
- data/lib/oga/xml/comment.rb +17 -0
- data/lib/oga/xml/doctype.rb +84 -0
- data/lib/oga/xml/document.rb +99 -0
- data/lib/oga/xml/element.rb +331 -0
- data/lib/oga/xml/lexer.rb +399 -0
- data/lib/oga/xml/namespace.rb +42 -0
- data/lib/oga/xml/node.rb +168 -0
- data/lib/oga/xml/node_set.rb +313 -0
- data/lib/oga/xml/parser.rb +556 -0
- data/lib/oga/xml/processing_instruction.rb +39 -0
- data/lib/oga/xml/pull_parser.rb +180 -0
- data/lib/oga/xml/querying.rb +32 -0
- data/lib/oga/xml/text.rb +11 -0
- data/lib/oga/xml/traversal.rb +48 -0
- data/lib/oga/xml/xml_declaration.rb +69 -0
- data/lib/oga/xpath/evaluator.rb +1748 -0
- data/lib/oga/xpath/lexer.rb +2043 -0
- data/lib/oga/xpath/node.rb +10 -0
- data/lib/oga/xpath/parser.rb +537 -0
- data/oga.gemspec +45 -0
- metadata +221 -0
@@ -0,0 +1,42 @@
|
|
1
|
+
module Oga
|
2
|
+
module XML
|
3
|
+
##
|
4
|
+
# The Namespace class contains information about XML namespaces such as the
|
5
|
+
# name and URI.
|
6
|
+
#
|
7
|
+
# @!attribute [r] name
|
8
|
+
# @return [String]
|
9
|
+
#
|
10
|
+
# @!attribute [r] uri
|
11
|
+
# @return [String]
|
12
|
+
#
|
13
|
+
class Namespace
|
14
|
+
attr_accessor :name, :uri
|
15
|
+
|
16
|
+
##
|
17
|
+
# @param [Hash] options
|
18
|
+
#
|
19
|
+
# @option options [String] :name
|
20
|
+
# @option options [String] :uri
|
21
|
+
#
|
22
|
+
def initialize(options = {})
|
23
|
+
@name = options[:name]
|
24
|
+
@uri = options[:uri]
|
25
|
+
end
|
26
|
+
|
27
|
+
##
|
28
|
+
# @return [String]
|
29
|
+
#
|
30
|
+
def to_s
|
31
|
+
return name.to_s
|
32
|
+
end
|
33
|
+
|
34
|
+
##
|
35
|
+
# @return [String]
|
36
|
+
#
|
37
|
+
def inspect
|
38
|
+
return "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
|
39
|
+
end
|
40
|
+
end # Namespace
|
41
|
+
end # XML
|
42
|
+
end # Oga
|
data/lib/oga/xml/node.rb
ADDED
@@ -0,0 +1,168 @@
|
|
1
|
+
module Oga
|
2
|
+
module XML
|
3
|
+
##
|
4
|
+
# A generic XML node. Instances of this class can belong to a
|
5
|
+
# {Oga::XML::NodeSet} and can be used to query surrounding and parent
|
6
|
+
# nodes.
|
7
|
+
#
|
8
|
+
# @!attribute [rw] node_set
|
9
|
+
# @return [Oga::XML::NodeSet]
|
10
|
+
#
|
11
|
+
class Node
|
12
|
+
include Traversal
|
13
|
+
|
14
|
+
attr_accessor :node_set
|
15
|
+
|
16
|
+
##
|
17
|
+
# @param [Hash] options
|
18
|
+
#
|
19
|
+
# @option options [Oga::XML::NodeSet] :node_set The node set that this
|
20
|
+
# node belongs to.
|
21
|
+
#
|
22
|
+
# @option options [Oga::XML::NodeSet|Array] :children The child nodes of
|
23
|
+
# the current node.
|
24
|
+
#
|
25
|
+
def initialize(options = {})
|
26
|
+
@node_set = options[:node_set]
|
27
|
+
|
28
|
+
self.children = options[:children] if options[:children]
|
29
|
+
end
|
30
|
+
|
31
|
+
##
|
32
|
+
# Returns the child nodes of the current node.
|
33
|
+
#
|
34
|
+
# @return [Oga::XML::NodeSet]
|
35
|
+
#
|
36
|
+
def children
|
37
|
+
return @children ||= NodeSet.new([], self)
|
38
|
+
end
|
39
|
+
|
40
|
+
##
|
41
|
+
# Sets the child nodes of the element.
|
42
|
+
#
|
43
|
+
# @param [Oga::XML::NodeSet|Array] nodes
|
44
|
+
#
|
45
|
+
def children=(nodes)
|
46
|
+
if nodes.is_a?(NodeSet)
|
47
|
+
@children = nodes
|
48
|
+
else
|
49
|
+
@children = NodeSet.new(nodes, self)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Returns the parent node of the current node. If there is no parent node
|
55
|
+
# `nil` is returned instead.
|
56
|
+
#
|
57
|
+
# @return [Oga::XML::Node]
|
58
|
+
#
|
59
|
+
def parent
|
60
|
+
return node_set ? node_set.owner : nil
|
61
|
+
end
|
62
|
+
|
63
|
+
##
|
64
|
+
# Returns the preceding node, or nil if there is none.
|
65
|
+
#
|
66
|
+
# @return [Oga::XML::Node]
|
67
|
+
#
|
68
|
+
def previous
|
69
|
+
index = node_set.index(self) - 1
|
70
|
+
|
71
|
+
return index >= 0 ? node_set[index] : nil
|
72
|
+
end
|
73
|
+
|
74
|
+
##
|
75
|
+
# Returns the following node, or nil if there is none.
|
76
|
+
#
|
77
|
+
# @return [Oga::XML::Node]
|
78
|
+
#
|
79
|
+
def next
|
80
|
+
index = node_set.index(self) + 1
|
81
|
+
length = node_set.length
|
82
|
+
|
83
|
+
return index <= length ? node_set[index] : nil
|
84
|
+
end
|
85
|
+
|
86
|
+
##
|
87
|
+
# Returns the previous element node or nil if there is none.
|
88
|
+
#
|
89
|
+
# @return [Oga::XML::Element]
|
90
|
+
#
|
91
|
+
def previous_element
|
92
|
+
node = self
|
93
|
+
|
94
|
+
while node = node.previous
|
95
|
+
return node if node.is_a?(Element)
|
96
|
+
end
|
97
|
+
|
98
|
+
return
|
99
|
+
end
|
100
|
+
|
101
|
+
##
|
102
|
+
# Returns the next element node or nil if there is none.
|
103
|
+
#
|
104
|
+
# @return [Oga::XML::Element]
|
105
|
+
#
|
106
|
+
def next_element
|
107
|
+
node = self
|
108
|
+
|
109
|
+
while node = node.next
|
110
|
+
return node if node.is_a?(Element)
|
111
|
+
end
|
112
|
+
|
113
|
+
return
|
114
|
+
end
|
115
|
+
|
116
|
+
##
|
117
|
+
# Returns the root document/node of the current node. The node is
|
118
|
+
# retrieved by traversing upwards in the DOM tree from the current node.
|
119
|
+
#
|
120
|
+
# @return [Oga::XML::Document|Oga::XML::Node]
|
121
|
+
#
|
122
|
+
def root_node
|
123
|
+
node = self
|
124
|
+
|
125
|
+
loop do
|
126
|
+
if !node.is_a?(Document) and node.node_set
|
127
|
+
node = node.node_set.owner
|
128
|
+
else
|
129
|
+
break
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
return node
|
134
|
+
end
|
135
|
+
|
136
|
+
##
|
137
|
+
# Removes the current node from the owning node set.
|
138
|
+
#
|
139
|
+
# @return [Oga::XML::Node]
|
140
|
+
#
|
141
|
+
def remove
|
142
|
+
return node_set.delete(self) if node_set
|
143
|
+
end
|
144
|
+
|
145
|
+
##
|
146
|
+
# Inserts the given node before the current node.
|
147
|
+
#
|
148
|
+
# @param [Oga::XML::Node] other
|
149
|
+
#
|
150
|
+
def before(other)
|
151
|
+
index = node_set.index(self)
|
152
|
+
|
153
|
+
node_set.insert(index, other)
|
154
|
+
end
|
155
|
+
|
156
|
+
##
|
157
|
+
# Inserts the given node after the current node.
|
158
|
+
#
|
159
|
+
# @param [Oga::XML::Node] other
|
160
|
+
#
|
161
|
+
def after(other)
|
162
|
+
index = node_set.index(self) + 1
|
163
|
+
|
164
|
+
node_set.insert(index, other)
|
165
|
+
end
|
166
|
+
end # Element
|
167
|
+
end # XML
|
168
|
+
end # Oga
|
@@ -0,0 +1,313 @@
|
|
1
|
+
module Oga
|
2
|
+
module XML
|
3
|
+
##
|
4
|
+
# The NodeSet class contains a set of unique {Oga::XML::Node} instances that
|
5
|
+
# can be queried and modified. Optionally NodeSet instances can take
|
6
|
+
# ownership of a node (besides just containing it). This allows the nodes to
|
7
|
+
# query their previous and next elements.
|
8
|
+
#
|
9
|
+
# There are two types of sets:
|
10
|
+
#
|
11
|
+
# 1. Regular node sets
|
12
|
+
# 2. Owned node sets
|
13
|
+
#
|
14
|
+
# Both behave similar to Ruby's Array class. The difference between an
|
15
|
+
# owned and regular node set is that an owned set modifies nodes that are
|
16
|
+
# added or removed by certain operations. For example, when a node is added
|
17
|
+
# to an owned set the `node_set` attribute of said node points to the set
|
18
|
+
# it was just added to.
|
19
|
+
#
|
20
|
+
# Owned node sets are used when building a DOM tree with
|
21
|
+
# {Oga::XML::Parser}. By taking ownership of nodes in a set Oga makes it
|
22
|
+
# possible to use these sets as following:
|
23
|
+
#
|
24
|
+
# document = Oga::XML::Document.new
|
25
|
+
# element = Oga::XML::Element.new
|
26
|
+
#
|
27
|
+
# document.children << element
|
28
|
+
#
|
29
|
+
# element.node_set == document.children # => true
|
30
|
+
#
|
31
|
+
# If ownership was not handled then you'd have to manually set the
|
32
|
+
# `element` variable's `node_set` attribute after pushing it into a set.
|
33
|
+
#
|
34
|
+
# @!attribute [rw] owner
|
35
|
+
# @return [Oga::XML::Node]
|
36
|
+
#
|
37
|
+
class NodeSet
|
38
|
+
include Enumerable
|
39
|
+
|
40
|
+
attr_accessor :owner
|
41
|
+
|
42
|
+
##
|
43
|
+
# @param [Array] nodes The nodes to add to the set.
|
44
|
+
# @param [Oga::XML::NodeSet] owner The owner of the set.
|
45
|
+
#
|
46
|
+
def initialize(nodes = [], owner = nil)
|
47
|
+
@nodes = nodes.uniq
|
48
|
+
@owner = owner
|
49
|
+
|
50
|
+
@nodes.each { |node| take_ownership(node) }
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Yields the supplied block for every node.
|
55
|
+
#
|
56
|
+
# @yieldparam [Oga::XML::Node]
|
57
|
+
#
|
58
|
+
def each
|
59
|
+
@nodes.each { |node| yield node }
|
60
|
+
end
|
61
|
+
|
62
|
+
##
|
63
|
+
# Returns the last node in the set.
|
64
|
+
#
|
65
|
+
# @return [Oga::XML::Node]
|
66
|
+
#
|
67
|
+
def last
|
68
|
+
return @nodes[-1]
|
69
|
+
end
|
70
|
+
|
71
|
+
##
|
72
|
+
# Returns `true` if the set is empty.
|
73
|
+
#
|
74
|
+
# @return [TrueClass|FalseClass]
|
75
|
+
#
|
76
|
+
def empty?
|
77
|
+
return @nodes.empty?
|
78
|
+
end
|
79
|
+
|
80
|
+
##
|
81
|
+
# Returns the amount of nodes in the set.
|
82
|
+
#
|
83
|
+
# @return [Fixnum]
|
84
|
+
#
|
85
|
+
def length
|
86
|
+
return @nodes.length
|
87
|
+
end
|
88
|
+
|
89
|
+
alias_method :count, :length
|
90
|
+
alias_method :size, :length
|
91
|
+
|
92
|
+
##
|
93
|
+
# Returns the index of the given node.
|
94
|
+
#
|
95
|
+
# @param [Oga::XML::Node] node
|
96
|
+
# @return [Fixnum]
|
97
|
+
#
|
98
|
+
def index(node)
|
99
|
+
return @nodes.index(node)
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# Pushes the node at the end of the set.
|
104
|
+
#
|
105
|
+
# @param [Oga::XML::Node] node
|
106
|
+
#
|
107
|
+
def push(node)
|
108
|
+
return if @nodes.include?(node)
|
109
|
+
|
110
|
+
@nodes << node
|
111
|
+
|
112
|
+
take_ownership(node)
|
113
|
+
end
|
114
|
+
|
115
|
+
alias_method :<<, :push
|
116
|
+
|
117
|
+
##
|
118
|
+
# Pushes the node at the start of the set.
|
119
|
+
#
|
120
|
+
# @param [Oga::XML::Node] node
|
121
|
+
#
|
122
|
+
def unshift(node)
|
123
|
+
return if @nodes.include?(node)
|
124
|
+
|
125
|
+
@nodes.unshift(node)
|
126
|
+
|
127
|
+
take_ownership(node)
|
128
|
+
end
|
129
|
+
|
130
|
+
##
|
131
|
+
# Shifts a node from the start of the set.
|
132
|
+
#
|
133
|
+
# @return [Oga::XML::Node]
|
134
|
+
#
|
135
|
+
def shift
|
136
|
+
node = @nodes.shift
|
137
|
+
|
138
|
+
remove_ownership(node)
|
139
|
+
|
140
|
+
return node
|
141
|
+
end
|
142
|
+
|
143
|
+
##
|
144
|
+
# Pops a node from the end of the set.
|
145
|
+
#
|
146
|
+
# @return [Oga::XML::Node]
|
147
|
+
#
|
148
|
+
def pop
|
149
|
+
node = @nodes.pop
|
150
|
+
|
151
|
+
remove_ownership(node)
|
152
|
+
|
153
|
+
return node
|
154
|
+
end
|
155
|
+
|
156
|
+
##
|
157
|
+
# Inserts a node into the set at the given index.
|
158
|
+
#
|
159
|
+
# @param [Fixnum] index The index to insert the node at.
|
160
|
+
# @param [Oga::XML::Node] node
|
161
|
+
#
|
162
|
+
def insert(index, node)
|
163
|
+
return if @nodes.include?(node)
|
164
|
+
|
165
|
+
@nodes.insert(index, node)
|
166
|
+
|
167
|
+
take_ownership(node)
|
168
|
+
end
|
169
|
+
|
170
|
+
##
|
171
|
+
# Returns the node for the given index.
|
172
|
+
#
|
173
|
+
# @param [Fixnum] index
|
174
|
+
# @return [Oga::XML::Node]
|
175
|
+
#
|
176
|
+
def [](index)
|
177
|
+
return @nodes[index]
|
178
|
+
end
|
179
|
+
|
180
|
+
##
|
181
|
+
# Converts the current set to an Array.
|
182
|
+
#
|
183
|
+
# @return [Array]
|
184
|
+
#
|
185
|
+
def to_a
|
186
|
+
return @nodes
|
187
|
+
end
|
188
|
+
|
189
|
+
##
|
190
|
+
# Creates a new set based on the current and the specified set. The newly
|
191
|
+
# created set does not inherit ownership rules of the current set.
|
192
|
+
#
|
193
|
+
# @param [Oga::XML::NodeSet] other
|
194
|
+
# @return [Oga::XML::NodeSet]
|
195
|
+
#
|
196
|
+
def +(other)
|
197
|
+
return self.class.new(to_a | other.to_a)
|
198
|
+
end
|
199
|
+
|
200
|
+
##
|
201
|
+
# Adds the nodes of the given node set to the current node set.
|
202
|
+
#
|
203
|
+
# @param [Oga::XML::NodeSet] other
|
204
|
+
#
|
205
|
+
def concat(other)
|
206
|
+
other.each { |node| push(node) }
|
207
|
+
end
|
208
|
+
|
209
|
+
##
|
210
|
+
# Removes the current nodes from their owning set. The nodes are *not*
|
211
|
+
# removed from the current set.
|
212
|
+
#
|
213
|
+
# This method is intended to remove nodes from an XML document/node.
|
214
|
+
#
|
215
|
+
def remove
|
216
|
+
sets = []
|
217
|
+
|
218
|
+
# First we gather all the sets to remove nodse from, then we remove the
|
219
|
+
# actual nodes. This is done as you can not reliably remove elements
|
220
|
+
# from an Array while iterating on that same Array.
|
221
|
+
@nodes.each do |node|
|
222
|
+
if node.node_set
|
223
|
+
sets << node.node_set
|
224
|
+
|
225
|
+
node.node_set = nil
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
sets.each do |set|
|
230
|
+
@nodes.each { |node| set.delete(node) }
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
##
|
235
|
+
# Removes a node from the current set only.
|
236
|
+
#
|
237
|
+
def delete(node)
|
238
|
+
removed = @nodes.delete(node)
|
239
|
+
|
240
|
+
remove_ownership(removed) if removed
|
241
|
+
|
242
|
+
return removed
|
243
|
+
end
|
244
|
+
|
245
|
+
##
|
246
|
+
# Returns the values of the given attribute.
|
247
|
+
#
|
248
|
+
# @param [String|Symbol] name The name of the attribute.
|
249
|
+
# @return [Array]
|
250
|
+
#
|
251
|
+
def attribute(name)
|
252
|
+
values = []
|
253
|
+
|
254
|
+
@nodes.each do |node|
|
255
|
+
if node.respond_to?(:attribute)
|
256
|
+
values << node.attribute(name)
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
return values
|
261
|
+
end
|
262
|
+
|
263
|
+
alias_method :attr, :attribute
|
264
|
+
|
265
|
+
##
|
266
|
+
# Returns the text of all nodes in the set, ignoring comment nodes.
|
267
|
+
#
|
268
|
+
# @return [String]
|
269
|
+
#
|
270
|
+
def text
|
271
|
+
text = ''
|
272
|
+
|
273
|
+
@nodes.each do |node|
|
274
|
+
if node.respond_to?(:text) and !node.is_a?(Comment)
|
275
|
+
text << node.text
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
return text
|
280
|
+
end
|
281
|
+
|
282
|
+
##
|
283
|
+
# @return [String]
|
284
|
+
#
|
285
|
+
def inspect
|
286
|
+
values = @nodes.map(&:inspect).join(', ')
|
287
|
+
|
288
|
+
return "NodeSet(#{values})"
|
289
|
+
end
|
290
|
+
|
291
|
+
private
|
292
|
+
|
293
|
+
##
|
294
|
+
# Takes ownership of the given node. This only occurs when the current
|
295
|
+
# set has an owner.
|
296
|
+
#
|
297
|
+
# @param [Oga::XML::Node] node
|
298
|
+
#
|
299
|
+
def take_ownership(node)
|
300
|
+
node.node_set = self if owner
|
301
|
+
end
|
302
|
+
|
303
|
+
##
|
304
|
+
# Removes ownership of the node if it belongs to the current set.
|
305
|
+
#
|
306
|
+
# @param [Oga::XML::Node] node
|
307
|
+
#
|
308
|
+
def remove_ownership(node)
|
309
|
+
node.node_set = nil if node.node_set == self
|
310
|
+
end
|
311
|
+
end # NodeSet
|
312
|
+
end # XML
|
313
|
+
end # Oga
|