oga 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +19 -0
  4. data/README.md +171 -0
  5. data/doc/DCO.md +25 -0
  6. data/doc/changelog.md +7 -0
  7. data/doc/css/common.css +76 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/ext/c/extconf.rb +13 -0
  10. data/ext/c/lexer.c +1518 -0
  11. data/ext/c/lexer.h +8 -0
  12. data/ext/c/lexer.rl +121 -0
  13. data/ext/c/liboga.c +6 -0
  14. data/ext/c/liboga.h +11 -0
  15. data/ext/java/Liboga.java +14 -0
  16. data/ext/java/org/liboga/xml/Lexer.java +829 -0
  17. data/ext/java/org/liboga/xml/Lexer.rl +151 -0
  18. data/ext/ragel/base_lexer.rl +323 -0
  19. data/lib/oga.rb +43 -0
  20. data/lib/oga/html/parser.rb +25 -0
  21. data/lib/oga/oga.rb +27 -0
  22. data/lib/oga/version.rb +3 -0
  23. data/lib/oga/xml/attribute.rb +111 -0
  24. data/lib/oga/xml/cdata.rb +24 -0
  25. data/lib/oga/xml/character_node.rb +39 -0
  26. data/lib/oga/xml/comment.rb +24 -0
  27. data/lib/oga/xml/doctype.rb +91 -0
  28. data/lib/oga/xml/document.rb +99 -0
  29. data/lib/oga/xml/element.rb +340 -0
  30. data/lib/oga/xml/lexer.rb +399 -0
  31. data/lib/oga/xml/namespace.rb +42 -0
  32. data/lib/oga/xml/node.rb +175 -0
  33. data/lib/oga/xml/node_set.rb +313 -0
  34. data/lib/oga/xml/parser.rb +556 -0
  35. data/lib/oga/xml/processing_instruction.rb +39 -0
  36. data/lib/oga/xml/pull_parser.rb +166 -0
  37. data/lib/oga/xml/querying.rb +32 -0
  38. data/lib/oga/xml/text.rb +16 -0
  39. data/lib/oga/xml/traversal.rb +48 -0
  40. data/lib/oga/xml/xml_declaration.rb +76 -0
  41. data/lib/oga/xpath/evaluator.rb +1748 -0
  42. data/lib/oga/xpath/lexer.rb +2043 -0
  43. data/lib/oga/xpath/node.rb +10 -0
  44. data/lib/oga/xpath/parser.rb +535 -0
  45. data/oga.gemspec +45 -0
  46. metadata +221 -0
@@ -0,0 +1,42 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # The Namespace class contains information about XML namespaces such as the
5
+ # name and URI.
6
+ #
7
+ # @!attribute [r] name
8
+ # @return [String]
9
+ #
10
+ # @!attribute [r] uri
11
+ # @return [String]
12
+ #
13
+ class Namespace
14
+ attr_accessor :name, :uri
15
+
16
+ ##
17
+ # @param [Hash] options
18
+ #
19
+ # @option options [String] :name
20
+ # @option options [String] :uri
21
+ #
22
+ def initialize(options = {})
23
+ @name = options[:name]
24
+ @uri = options[:uri]
25
+ end
26
+
27
+ ##
28
+ # @return [String]
29
+ #
30
+ def to_s
31
+ return name.to_s
32
+ end
33
+
34
+ ##
35
+ # @return [String]
36
+ #
37
+ def inspect
38
+ return "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
39
+ end
40
+ end # Namespace
41
+ end # XML
42
+ end # Oga
@@ -0,0 +1,175 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # A generic XML node. Instances of this class can belong to a
5
+ # {Oga::XML::NodeSet} and can be used to query surrounding and parent
6
+ # nodes.
7
+ #
8
+ # @!attribute [rw] node_set
9
+ # @return [Oga::XML::NodeSet]
10
+ #
11
+ class Node
12
+ include Traversal
13
+
14
+ attr_accessor :node_set
15
+
16
+ ##
17
+ # @param [Hash] options
18
+ #
19
+ # @option options [Oga::XML::NodeSet] :node_set The node set that this
20
+ # node belongs to.
21
+ #
22
+ # @option options [Oga::XML::NodeSet|Array] :children The child nodes of
23
+ # the current node.
24
+ #
25
+ def initialize(options = {})
26
+ @node_set = options[:node_set]
27
+
28
+ self.children = options[:children] if options[:children]
29
+ end
30
+
31
+ ##
32
+ # Returns the child nodes of the current node.
33
+ #
34
+ # @return [Oga::XML::NodeSet]
35
+ #
36
+ def children
37
+ return @children ||= NodeSet.new([], self)
38
+ end
39
+
40
+ ##
41
+ # Sets the child nodes of the element.
42
+ #
43
+ # @param [Oga::XML::NodeSet|Array] nodes
44
+ #
45
+ def children=(nodes)
46
+ if nodes.is_a?(NodeSet)
47
+ @children = nodes
48
+ else
49
+ @children = NodeSet.new(nodes, self)
50
+ end
51
+ end
52
+
53
+ ##
54
+ # Returns the parent node of the current node. If there is no parent node
55
+ # `nil` is returned instead.
56
+ #
57
+ # @return [Oga::XML::Node]
58
+ #
59
+ def parent
60
+ return node_set ? node_set.owner : nil
61
+ end
62
+
63
+ ##
64
+ # Returns the preceding node, or nil if there is none.
65
+ #
66
+ # @return [Oga::XML::Node]
67
+ #
68
+ def previous
69
+ index = node_set.index(self) - 1
70
+
71
+ return index >= 0 ? node_set[index] : nil
72
+ end
73
+
74
+ ##
75
+ # Returns the following node, or nil if there is none.
76
+ #
77
+ # @return [Oga::XML::Node]
78
+ #
79
+ def next
80
+ index = node_set.index(self) + 1
81
+ length = node_set.length
82
+
83
+ return index <= length ? node_set[index] : nil
84
+ end
85
+
86
+ ##
87
+ # Returns the previous element node or nil if there is none.
88
+ #
89
+ # @return [Oga::XML::Element]
90
+ #
91
+ def previous_element
92
+ node = self
93
+
94
+ while node = node.previous
95
+ return node if node.is_a?(Element)
96
+ end
97
+
98
+ return
99
+ end
100
+
101
+ ##
102
+ # Returns the next element node or nil if there is none.
103
+ #
104
+ # @return [Oga::XML::Element]
105
+ #
106
+ def next_element
107
+ node = self
108
+
109
+ while node = node.next
110
+ return node if node.is_a?(Element)
111
+ end
112
+
113
+ return
114
+ end
115
+
116
+ ##
117
+ # Returns the root document/node of the current node. The node is
118
+ # retrieved by traversing upwards in the DOM tree from the current node.
119
+ #
120
+ # @return [Oga::XML::Document|Oga::XML::Node]
121
+ #
122
+ def root_node
123
+ node = self
124
+
125
+ loop do
126
+ if !node.is_a?(Document) and node.node_set
127
+ node = node.node_set.owner
128
+ else
129
+ break
130
+ end
131
+ end
132
+
133
+ return node
134
+ end
135
+
136
+ ##
137
+ # Removes the current node from the owning node set.
138
+ #
139
+ # @return [Oga::XML::Node]
140
+ #
141
+ def remove
142
+ return node_set.delete(self) if node_set
143
+ end
144
+
145
+ ##
146
+ # Inserts the given node before the current node.
147
+ #
148
+ # @param [Oga::XML::Node] other
149
+ #
150
+ def before(other)
151
+ index = node_set.index(self)
152
+
153
+ node_set.insert(index, other)
154
+ end
155
+
156
+ ##
157
+ # Inserts the given node after the current node.
158
+ #
159
+ # @param [Oga::XML::Node] other
160
+ #
161
+ def after(other)
162
+ index = node_set.index(self) + 1
163
+
164
+ node_set.insert(index, other)
165
+ end
166
+
167
+ ##
168
+ # @return [Symbol]
169
+ #
170
+ def node_type
171
+ return :node
172
+ end
173
+ end # Element
174
+ end # XML
175
+ end # Oga
@@ -0,0 +1,313 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # The NodeSet class contains a set of unique {Oga::XML::Node} instances that
5
+ # can be queried and modified. Optionally NodeSet instances can take
6
+ # ownership of a node (besides just containing it). This allows the nodes to
7
+ # query their previous and next elements.
8
+ #
9
+ # There are two types of sets:
10
+ #
11
+ # 1. Regular node sets
12
+ # 2. Owned node sets
13
+ #
14
+ # Both behave similar to Ruby's Array class. The difference between an
15
+ # owned and regular node set is that an owned set modifies nodes that are
16
+ # added or removed by certain operations. For example, when a node is added
17
+ # to an owned set the `node_set` attribute of said node points to the set
18
+ # it was just added to.
19
+ #
20
+ # Owned node sets are used when building a DOM tree with
21
+ # {Oga::XML::Parser}. By taking ownership of nodes in a set Oga makes it
22
+ # possible to use these sets as following:
23
+ #
24
+ # document = Oga::XML::Document.new
25
+ # element = Oga::XML::Element.new
26
+ #
27
+ # document.children << element
28
+ #
29
+ # element.node_set == document.children # => true
30
+ #
31
+ # If ownership was not handled then you'd have to manually set the
32
+ # `element` variable's `node_set` attribute after pushing it into a set.
33
+ #
34
+ # @!attribute [rw] owner
35
+ # @return [Oga::XML::Node]
36
+ #
37
+ class NodeSet
38
+ include Enumerable
39
+
40
+ attr_accessor :owner
41
+
42
+ ##
43
+ # @param [Array] nodes The nodes to add to the set.
44
+ # @param [Oga::XML::NodeSet] owner The owner of the set.
45
+ #
46
+ def initialize(nodes = [], owner = nil)
47
+ @nodes = nodes.uniq
48
+ @owner = owner
49
+
50
+ @nodes.each { |node| take_ownership(node) }
51
+ end
52
+
53
+ ##
54
+ # Yields the supplied block for every node.
55
+ #
56
+ # @yieldparam [Oga::XML::Node]
57
+ #
58
+ def each
59
+ @nodes.each { |node| yield node }
60
+ end
61
+
62
+ ##
63
+ # Returns the last node in the set.
64
+ #
65
+ # @return [Oga::XML::Node]
66
+ #
67
+ def last
68
+ return @nodes[-1]
69
+ end
70
+
71
+ ##
72
+ # Returns `true` if the set is empty.
73
+ #
74
+ # @return [TrueClass|FalseClass]
75
+ #
76
+ def empty?
77
+ return @nodes.empty?
78
+ end
79
+
80
+ ##
81
+ # Returns the amount of nodes in the set.
82
+ #
83
+ # @return [Fixnum]
84
+ #
85
+ def length
86
+ return @nodes.length
87
+ end
88
+
89
+ alias_method :count, :length
90
+ alias_method :size, :length
91
+
92
+ ##
93
+ # Returns the index of the given node.
94
+ #
95
+ # @param [Oga::XML::Node] node
96
+ # @return [Fixnum]
97
+ #
98
+ def index(node)
99
+ return @nodes.index(node)
100
+ end
101
+
102
+ ##
103
+ # Pushes the node at the end of the set.
104
+ #
105
+ # @param [Oga::XML::Node] node
106
+ #
107
+ def push(node)
108
+ return if @nodes.include?(node)
109
+
110
+ @nodes << node
111
+
112
+ take_ownership(node)
113
+ end
114
+
115
+ alias_method :<<, :push
116
+
117
+ ##
118
+ # Pushes the node at the start of the set.
119
+ #
120
+ # @param [Oga::XML::Node] node
121
+ #
122
+ def unshift(node)
123
+ return if @nodes.include?(node)
124
+
125
+ @nodes.unshift(node)
126
+
127
+ take_ownership(node)
128
+ end
129
+
130
+ ##
131
+ # Shifts a node from the start of the set.
132
+ #
133
+ # @return [Oga::XML::Node]
134
+ #
135
+ def shift
136
+ node = @nodes.shift
137
+
138
+ remove_ownership(node)
139
+
140
+ return node
141
+ end
142
+
143
+ ##
144
+ # Pops a node from the end of the set.
145
+ #
146
+ # @return [Oga::XML::Node]
147
+ #
148
+ def pop
149
+ node = @nodes.pop
150
+
151
+ remove_ownership(node)
152
+
153
+ return node
154
+ end
155
+
156
+ ##
157
+ # Inserts a node into the set at the given index.
158
+ #
159
+ # @param [Fixnum] index The index to insert the node at.
160
+ # @param [Oga::XML::Node] node
161
+ #
162
+ def insert(index, node)
163
+ return if @nodes.include?(node)
164
+
165
+ @nodes.insert(index, node)
166
+
167
+ take_ownership(node)
168
+ end
169
+
170
+ ##
171
+ # Returns the node for the given index.
172
+ #
173
+ # @param [Fixnum] index
174
+ # @return [Oga::XML::Node]
175
+ #
176
+ def [](index)
177
+ return @nodes[index]
178
+ end
179
+
180
+ ##
181
+ # Converts the current set to an Array.
182
+ #
183
+ # @return [Array]
184
+ #
185
+ def to_a
186
+ return @nodes
187
+ end
188
+
189
+ ##
190
+ # Creates a new set based on the current and the specified set. The newly
191
+ # created set does not inherit ownership rules of the current set.
192
+ #
193
+ # @param [Oga::XML::NodeSet] other
194
+ # @return [Oga::XML::NodeSet]
195
+ #
196
+ def +(other)
197
+ return self.class.new(to_a | other.to_a)
198
+ end
199
+
200
+ ##
201
+ # Adds the nodes of the given node set to the current node set.
202
+ #
203
+ # @param [Oga::XML::NodeSet] other
204
+ #
205
+ def concat(other)
206
+ other.each { |node| push(node) }
207
+ end
208
+
209
+ ##
210
+ # Removes the current nodes from their owning set. The nodes are *not*
211
+ # removed from the current set.
212
+ #
213
+ # This method is intended to remove nodes from an XML document/node.
214
+ #
215
+ def remove
216
+ sets = []
217
+
218
+ # First we gather all the sets to remove nodse from, then we remove the
219
+ # actual nodes. This is done as you can not reliably remove elements
220
+ # from an Array while iterating on that same Array.
221
+ @nodes.each do |node|
222
+ if node.node_set
223
+ sets << node.node_set
224
+
225
+ node.node_set = nil
226
+ end
227
+ end
228
+
229
+ sets.each do |set|
230
+ @nodes.each { |node| set.delete(node) }
231
+ end
232
+ end
233
+
234
+ ##
235
+ # Removes a node from the current set only.
236
+ #
237
+ def delete(node)
238
+ removed = @nodes.delete(node)
239
+
240
+ remove_ownership(removed) if removed
241
+
242
+ return removed
243
+ end
244
+
245
+ ##
246
+ # Returns the values of the given attribute.
247
+ #
248
+ # @param [String|Symbol] name The name of the attribute.
249
+ # @return [Array]
250
+ #
251
+ def attribute(name)
252
+ values = []
253
+
254
+ @nodes.each do |node|
255
+ if node.respond_to?(:attribute)
256
+ values << node.attribute(name)
257
+ end
258
+ end
259
+
260
+ return values
261
+ end
262
+
263
+ alias_method :attr, :attribute
264
+
265
+ ##
266
+ # Returns the text of all nodes in the set, ignoring comment nodes.
267
+ #
268
+ # @return [String]
269
+ #
270
+ def text
271
+ text = ''
272
+
273
+ @nodes.each do |node|
274
+ if node.respond_to?(:text) and !node.is_a?(Comment)
275
+ text << node.text
276
+ end
277
+ end
278
+
279
+ return text
280
+ end
281
+
282
+ ##
283
+ # @return [String]
284
+ #
285
+ def inspect
286
+ values = @nodes.map(&:inspect).join(', ')
287
+
288
+ return "NodeSet(#{values})"
289
+ end
290
+
291
+ private
292
+
293
+ ##
294
+ # Takes ownership of the given node. This only occurs when the current
295
+ # set has an owner.
296
+ #
297
+ # @param [Oga::XML::Node] node
298
+ #
299
+ def take_ownership(node)
300
+ node.node_set = self if owner
301
+ end
302
+
303
+ ##
304
+ # Removes ownership of the node if it belongs to the current set.
305
+ #
306
+ # @param [Oga::XML::Node] node
307
+ #
308
+ def remove_ownership(node)
309
+ node.node_set = nil if node.node_set == self
310
+ end
311
+ end # NodeSet
312
+ end # XML
313
+ end # Oga