oga 0.1.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +19 -0
  4. data/README.md +179 -0
  5. data/doc/DCO.md +25 -0
  6. data/doc/changelog.md +20 -0
  7. data/doc/css/common.css +76 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/ext/c/extconf.rb +13 -0
  10. data/ext/c/lexer.c +1518 -0
  11. data/ext/c/lexer.h +8 -0
  12. data/ext/c/lexer.rl +121 -0
  13. data/ext/c/liboga.c +6 -0
  14. data/ext/c/liboga.h +11 -0
  15. data/ext/java/Liboga.java +14 -0
  16. data/ext/java/org/liboga/xml/Lexer.java +829 -0
  17. data/ext/java/org/liboga/xml/Lexer.rl +151 -0
  18. data/ext/ragel/base_lexer.rl +323 -0
  19. data/lib/liboga.jar +0 -0
  20. data/lib/oga.rb +43 -0
  21. data/lib/oga/html/parser.rb +25 -0
  22. data/lib/oga/oga.rb +27 -0
  23. data/lib/oga/version.rb +3 -0
  24. data/lib/oga/xml/attribute.rb +111 -0
  25. data/lib/oga/xml/cdata.rb +17 -0
  26. data/lib/oga/xml/character_node.rb +39 -0
  27. data/lib/oga/xml/comment.rb +17 -0
  28. data/lib/oga/xml/doctype.rb +84 -0
  29. data/lib/oga/xml/document.rb +99 -0
  30. data/lib/oga/xml/element.rb +331 -0
  31. data/lib/oga/xml/lexer.rb +399 -0
  32. data/lib/oga/xml/namespace.rb +42 -0
  33. data/lib/oga/xml/node.rb +168 -0
  34. data/lib/oga/xml/node_set.rb +313 -0
  35. data/lib/oga/xml/parser.rb +556 -0
  36. data/lib/oga/xml/processing_instruction.rb +39 -0
  37. data/lib/oga/xml/pull_parser.rb +180 -0
  38. data/lib/oga/xml/querying.rb +32 -0
  39. data/lib/oga/xml/text.rb +11 -0
  40. data/lib/oga/xml/traversal.rb +48 -0
  41. data/lib/oga/xml/xml_declaration.rb +69 -0
  42. data/lib/oga/xpath/evaluator.rb +1748 -0
  43. data/lib/oga/xpath/lexer.rb +2043 -0
  44. data/lib/oga/xpath/node.rb +10 -0
  45. data/lib/oga/xpath/parser.rb +537 -0
  46. data/oga.gemspec +45 -0
  47. metadata +221 -0
@@ -0,0 +1,42 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # The Namespace class contains information about XML namespaces such as the
5
+ # name and URI.
6
+ #
7
+ # @!attribute [r] name
8
+ # @return [String]
9
+ #
10
+ # @!attribute [r] uri
11
+ # @return [String]
12
+ #
13
+ class Namespace
14
+ attr_accessor :name, :uri
15
+
16
+ ##
17
+ # @param [Hash] options
18
+ #
19
+ # @option options [String] :name
20
+ # @option options [String] :uri
21
+ #
22
+ def initialize(options = {})
23
+ @name = options[:name]
24
+ @uri = options[:uri]
25
+ end
26
+
27
+ ##
28
+ # @return [String]
29
+ #
30
+ def to_s
31
+ return name.to_s
32
+ end
33
+
34
+ ##
35
+ # @return [String]
36
+ #
37
+ def inspect
38
+ return "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
39
+ end
40
+ end # Namespace
41
+ end # XML
42
+ end # Oga
@@ -0,0 +1,168 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # A generic XML node. Instances of this class can belong to a
5
+ # {Oga::XML::NodeSet} and can be used to query surrounding and parent
6
+ # nodes.
7
+ #
8
+ # @!attribute [rw] node_set
9
+ # @return [Oga::XML::NodeSet]
10
+ #
11
+ class Node
12
+ include Traversal
13
+
14
+ attr_accessor :node_set
15
+
16
+ ##
17
+ # @param [Hash] options
18
+ #
19
+ # @option options [Oga::XML::NodeSet] :node_set The node set that this
20
+ # node belongs to.
21
+ #
22
+ # @option options [Oga::XML::NodeSet|Array] :children The child nodes of
23
+ # the current node.
24
+ #
25
+ def initialize(options = {})
26
+ @node_set = options[:node_set]
27
+
28
+ self.children = options[:children] if options[:children]
29
+ end
30
+
31
+ ##
32
+ # Returns the child nodes of the current node.
33
+ #
34
+ # @return [Oga::XML::NodeSet]
35
+ #
36
+ def children
37
+ return @children ||= NodeSet.new([], self)
38
+ end
39
+
40
+ ##
41
+ # Sets the child nodes of the element.
42
+ #
43
+ # @param [Oga::XML::NodeSet|Array] nodes
44
+ #
45
+ def children=(nodes)
46
+ if nodes.is_a?(NodeSet)
47
+ @children = nodes
48
+ else
49
+ @children = NodeSet.new(nodes, self)
50
+ end
51
+ end
52
+
53
+ ##
54
+ # Returns the parent node of the current node. If there is no parent node
55
+ # `nil` is returned instead.
56
+ #
57
+ # @return [Oga::XML::Node]
58
+ #
59
+ def parent
60
+ return node_set ? node_set.owner : nil
61
+ end
62
+
63
+ ##
64
+ # Returns the preceding node, or nil if there is none.
65
+ #
66
+ # @return [Oga::XML::Node]
67
+ #
68
+ def previous
69
+ index = node_set.index(self) - 1
70
+
71
+ return index >= 0 ? node_set[index] : nil
72
+ end
73
+
74
+ ##
75
+ # Returns the following node, or nil if there is none.
76
+ #
77
+ # @return [Oga::XML::Node]
78
+ #
79
+ def next
80
+ index = node_set.index(self) + 1
81
+ length = node_set.length
82
+
83
+ return index <= length ? node_set[index] : nil
84
+ end
85
+
86
+ ##
87
+ # Returns the previous element node or nil if there is none.
88
+ #
89
+ # @return [Oga::XML::Element]
90
+ #
91
+ def previous_element
92
+ node = self
93
+
94
+ while node = node.previous
95
+ return node if node.is_a?(Element)
96
+ end
97
+
98
+ return
99
+ end
100
+
101
+ ##
102
+ # Returns the next element node or nil if there is none.
103
+ #
104
+ # @return [Oga::XML::Element]
105
+ #
106
+ def next_element
107
+ node = self
108
+
109
+ while node = node.next
110
+ return node if node.is_a?(Element)
111
+ end
112
+
113
+ return
114
+ end
115
+
116
+ ##
117
+ # Returns the root document/node of the current node. The node is
118
+ # retrieved by traversing upwards in the DOM tree from the current node.
119
+ #
120
+ # @return [Oga::XML::Document|Oga::XML::Node]
121
+ #
122
+ def root_node
123
+ node = self
124
+
125
+ loop do
126
+ if !node.is_a?(Document) and node.node_set
127
+ node = node.node_set.owner
128
+ else
129
+ break
130
+ end
131
+ end
132
+
133
+ return node
134
+ end
135
+
136
+ ##
137
+ # Removes the current node from the owning node set.
138
+ #
139
+ # @return [Oga::XML::Node]
140
+ #
141
+ def remove
142
+ return node_set.delete(self) if node_set
143
+ end
144
+
145
+ ##
146
+ # Inserts the given node before the current node.
147
+ #
148
+ # @param [Oga::XML::Node] other
149
+ #
150
+ def before(other)
151
+ index = node_set.index(self)
152
+
153
+ node_set.insert(index, other)
154
+ end
155
+
156
+ ##
157
+ # Inserts the given node after the current node.
158
+ #
159
+ # @param [Oga::XML::Node] other
160
+ #
161
+ def after(other)
162
+ index = node_set.index(self) + 1
163
+
164
+ node_set.insert(index, other)
165
+ end
166
+ end # Element
167
+ end # XML
168
+ end # Oga
@@ -0,0 +1,313 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # The NodeSet class contains a set of unique {Oga::XML::Node} instances that
5
+ # can be queried and modified. Optionally NodeSet instances can take
6
+ # ownership of a node (besides just containing it). This allows the nodes to
7
+ # query their previous and next elements.
8
+ #
9
+ # There are two types of sets:
10
+ #
11
+ # 1. Regular node sets
12
+ # 2. Owned node sets
13
+ #
14
+ # Both behave similar to Ruby's Array class. The difference between an
15
+ # owned and regular node set is that an owned set modifies nodes that are
16
+ # added or removed by certain operations. For example, when a node is added
17
+ # to an owned set the `node_set` attribute of said node points to the set
18
+ # it was just added to.
19
+ #
20
+ # Owned node sets are used when building a DOM tree with
21
+ # {Oga::XML::Parser}. By taking ownership of nodes in a set Oga makes it
22
+ # possible to use these sets as following:
23
+ #
24
+ # document = Oga::XML::Document.new
25
+ # element = Oga::XML::Element.new
26
+ #
27
+ # document.children << element
28
+ #
29
+ # element.node_set == document.children # => true
30
+ #
31
+ # If ownership was not handled then you'd have to manually set the
32
+ # `element` variable's `node_set` attribute after pushing it into a set.
33
+ #
34
+ # @!attribute [rw] owner
35
+ # @return [Oga::XML::Node]
36
+ #
37
+ class NodeSet
38
+ include Enumerable
39
+
40
+ attr_accessor :owner
41
+
42
+ ##
43
+ # @param [Array] nodes The nodes to add to the set.
44
+ # @param [Oga::XML::NodeSet] owner The owner of the set.
45
+ #
46
+ def initialize(nodes = [], owner = nil)
47
+ @nodes = nodes.uniq
48
+ @owner = owner
49
+
50
+ @nodes.each { |node| take_ownership(node) }
51
+ end
52
+
53
+ ##
54
+ # Yields the supplied block for every node.
55
+ #
56
+ # @yieldparam [Oga::XML::Node]
57
+ #
58
+ def each
59
+ @nodes.each { |node| yield node }
60
+ end
61
+
62
+ ##
63
+ # Returns the last node in the set.
64
+ #
65
+ # @return [Oga::XML::Node]
66
+ #
67
+ def last
68
+ return @nodes[-1]
69
+ end
70
+
71
+ ##
72
+ # Returns `true` if the set is empty.
73
+ #
74
+ # @return [TrueClass|FalseClass]
75
+ #
76
+ def empty?
77
+ return @nodes.empty?
78
+ end
79
+
80
+ ##
81
+ # Returns the amount of nodes in the set.
82
+ #
83
+ # @return [Fixnum]
84
+ #
85
+ def length
86
+ return @nodes.length
87
+ end
88
+
89
+ alias_method :count, :length
90
+ alias_method :size, :length
91
+
92
+ ##
93
+ # Returns the index of the given node.
94
+ #
95
+ # @param [Oga::XML::Node] node
96
+ # @return [Fixnum]
97
+ #
98
+ def index(node)
99
+ return @nodes.index(node)
100
+ end
101
+
102
+ ##
103
+ # Pushes the node at the end of the set.
104
+ #
105
+ # @param [Oga::XML::Node] node
106
+ #
107
+ def push(node)
108
+ return if @nodes.include?(node)
109
+
110
+ @nodes << node
111
+
112
+ take_ownership(node)
113
+ end
114
+
115
+ alias_method :<<, :push
116
+
117
+ ##
118
+ # Pushes the node at the start of the set.
119
+ #
120
+ # @param [Oga::XML::Node] node
121
+ #
122
+ def unshift(node)
123
+ return if @nodes.include?(node)
124
+
125
+ @nodes.unshift(node)
126
+
127
+ take_ownership(node)
128
+ end
129
+
130
+ ##
131
+ # Shifts a node from the start of the set.
132
+ #
133
+ # @return [Oga::XML::Node]
134
+ #
135
+ def shift
136
+ node = @nodes.shift
137
+
138
+ remove_ownership(node)
139
+
140
+ return node
141
+ end
142
+
143
+ ##
144
+ # Pops a node from the end of the set.
145
+ #
146
+ # @return [Oga::XML::Node]
147
+ #
148
+ def pop
149
+ node = @nodes.pop
150
+
151
+ remove_ownership(node)
152
+
153
+ return node
154
+ end
155
+
156
+ ##
157
+ # Inserts a node into the set at the given index.
158
+ #
159
+ # @param [Fixnum] index The index to insert the node at.
160
+ # @param [Oga::XML::Node] node
161
+ #
162
+ def insert(index, node)
163
+ return if @nodes.include?(node)
164
+
165
+ @nodes.insert(index, node)
166
+
167
+ take_ownership(node)
168
+ end
169
+
170
+ ##
171
+ # Returns the node for the given index.
172
+ #
173
+ # @param [Fixnum] index
174
+ # @return [Oga::XML::Node]
175
+ #
176
+ def [](index)
177
+ return @nodes[index]
178
+ end
179
+
180
+ ##
181
+ # Converts the current set to an Array.
182
+ #
183
+ # @return [Array]
184
+ #
185
+ def to_a
186
+ return @nodes
187
+ end
188
+
189
+ ##
190
+ # Creates a new set based on the current and the specified set. The newly
191
+ # created set does not inherit ownership rules of the current set.
192
+ #
193
+ # @param [Oga::XML::NodeSet] other
194
+ # @return [Oga::XML::NodeSet]
195
+ #
196
+ def +(other)
197
+ return self.class.new(to_a | other.to_a)
198
+ end
199
+
200
+ ##
201
+ # Adds the nodes of the given node set to the current node set.
202
+ #
203
+ # @param [Oga::XML::NodeSet] other
204
+ #
205
+ def concat(other)
206
+ other.each { |node| push(node) }
207
+ end
208
+
209
+ ##
210
+ # Removes the current nodes from their owning set. The nodes are *not*
211
+ # removed from the current set.
212
+ #
213
+ # This method is intended to remove nodes from an XML document/node.
214
+ #
215
+ def remove
216
+ sets = []
217
+
218
+ # First we gather all the sets to remove nodse from, then we remove the
219
+ # actual nodes. This is done as you can not reliably remove elements
220
+ # from an Array while iterating on that same Array.
221
+ @nodes.each do |node|
222
+ if node.node_set
223
+ sets << node.node_set
224
+
225
+ node.node_set = nil
226
+ end
227
+ end
228
+
229
+ sets.each do |set|
230
+ @nodes.each { |node| set.delete(node) }
231
+ end
232
+ end
233
+
234
+ ##
235
+ # Removes a node from the current set only.
236
+ #
237
+ def delete(node)
238
+ removed = @nodes.delete(node)
239
+
240
+ remove_ownership(removed) if removed
241
+
242
+ return removed
243
+ end
244
+
245
+ ##
246
+ # Returns the values of the given attribute.
247
+ #
248
+ # @param [String|Symbol] name The name of the attribute.
249
+ # @return [Array]
250
+ #
251
+ def attribute(name)
252
+ values = []
253
+
254
+ @nodes.each do |node|
255
+ if node.respond_to?(:attribute)
256
+ values << node.attribute(name)
257
+ end
258
+ end
259
+
260
+ return values
261
+ end
262
+
263
+ alias_method :attr, :attribute
264
+
265
+ ##
266
+ # Returns the text of all nodes in the set, ignoring comment nodes.
267
+ #
268
+ # @return [String]
269
+ #
270
+ def text
271
+ text = ''
272
+
273
+ @nodes.each do |node|
274
+ if node.respond_to?(:text) and !node.is_a?(Comment)
275
+ text << node.text
276
+ end
277
+ end
278
+
279
+ return text
280
+ end
281
+
282
+ ##
283
+ # @return [String]
284
+ #
285
+ def inspect
286
+ values = @nodes.map(&:inspect).join(', ')
287
+
288
+ return "NodeSet(#{values})"
289
+ end
290
+
291
+ private
292
+
293
+ ##
294
+ # Takes ownership of the given node. This only occurs when the current
295
+ # set has an owner.
296
+ #
297
+ # @param [Oga::XML::Node] node
298
+ #
299
+ def take_ownership(node)
300
+ node.node_set = self if owner
301
+ end
302
+
303
+ ##
304
+ # Removes ownership of the node if it belongs to the current set.
305
+ #
306
+ # @param [Oga::XML::Node] node
307
+ #
308
+ def remove_ownership(node)
309
+ node.node_set = nil if node.node_set == self
310
+ end
311
+ end # NodeSet
312
+ end # XML
313
+ end # Oga