rubyjedi-oga 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +362 -0
  4. data/README.md +317 -0
  5. data/doc/css/common.css +77 -0
  6. data/doc/css_selectors.md +935 -0
  7. data/doc/manually_creating_documents.md +67 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/doc/xml_namespaces.md +63 -0
  10. data/ext/c/extconf.rb +11 -0
  11. data/ext/c/lexer.c +2595 -0
  12. data/ext/c/lexer.h +16 -0
  13. data/ext/c/lexer.rl +198 -0
  14. data/ext/c/liboga.c +6 -0
  15. data/ext/c/liboga.h +11 -0
  16. data/ext/java/Liboga.java +14 -0
  17. data/ext/java/org/liboga/xml/Lexer.java +1363 -0
  18. data/ext/java/org/liboga/xml/Lexer.rl +223 -0
  19. data/ext/ragel/base_lexer.rl +633 -0
  20. data/lib/oga.rb +57 -0
  21. data/lib/oga/blacklist.rb +40 -0
  22. data/lib/oga/css/lexer.rb +743 -0
  23. data/lib/oga/css/parser.rb +976 -0
  24. data/lib/oga/entity_decoder.rb +21 -0
  25. data/lib/oga/html/entities.rb +2150 -0
  26. data/lib/oga/html/parser.rb +25 -0
  27. data/lib/oga/html/sax_parser.rb +18 -0
  28. data/lib/oga/lru.rb +160 -0
  29. data/lib/oga/oga.rb +57 -0
  30. data/lib/oga/version.rb +3 -0
  31. data/lib/oga/whitelist.rb +20 -0
  32. data/lib/oga/xml/attribute.rb +136 -0
  33. data/lib/oga/xml/cdata.rb +17 -0
  34. data/lib/oga/xml/character_node.rb +37 -0
  35. data/lib/oga/xml/comment.rb +17 -0
  36. data/lib/oga/xml/default_namespace.rb +13 -0
  37. data/lib/oga/xml/doctype.rb +82 -0
  38. data/lib/oga/xml/document.rb +108 -0
  39. data/lib/oga/xml/element.rb +428 -0
  40. data/lib/oga/xml/entities.rb +122 -0
  41. data/lib/oga/xml/html_void_elements.rb +15 -0
  42. data/lib/oga/xml/lexer.rb +550 -0
  43. data/lib/oga/xml/namespace.rb +48 -0
  44. data/lib/oga/xml/node.rb +219 -0
  45. data/lib/oga/xml/node_set.rb +333 -0
  46. data/lib/oga/xml/parser.rb +631 -0
  47. data/lib/oga/xml/processing_instruction.rb +37 -0
  48. data/lib/oga/xml/pull_parser.rb +175 -0
  49. data/lib/oga/xml/querying.rb +56 -0
  50. data/lib/oga/xml/sax_parser.rb +192 -0
  51. data/lib/oga/xml/text.rb +66 -0
  52. data/lib/oga/xml/traversal.rb +50 -0
  53. data/lib/oga/xml/xml_declaration.rb +65 -0
  54. data/lib/oga/xpath/evaluator.rb +1798 -0
  55. data/lib/oga/xpath/lexer.rb +1958 -0
  56. data/lib/oga/xpath/parser.rb +622 -0
  57. data/oga.gemspec +45 -0
  58. metadata +227 -0
@@ -0,0 +1,48 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # The Namespace class contains information about XML namespaces such as the
5
+ # name and URI.
6
+ #
7
+ class Namespace
8
+ # @return [String]
9
+ attr_accessor :name
10
+
11
+ # @return [String]
12
+ attr_accessor :uri
13
+
14
+ ##
15
+ # @param [Hash] options
16
+ #
17
+ # @option options [String] :name
18
+ # @option options [String] :uri
19
+ #
20
+ def initialize(options = {})
21
+ @name = options[:name]
22
+ @uri = options[:uri]
23
+ end
24
+
25
+ ##
26
+ # @return [String]
27
+ #
28
+ def to_s
29
+ name.to_s
30
+ end
31
+
32
+ ##
33
+ # @return [String]
34
+ #
35
+ def inspect
36
+ "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
37
+ end
38
+
39
+ ##
40
+ # @param [Oga::XML::Namespace] other
41
+ # @return [TrueClass|FalseClass]
42
+ #
43
+ def ==(other)
44
+ other.is_a?(self.class) && name == other.name && uri == other.uri
45
+ end
46
+ end # Namespace
47
+ end # XML
48
+ end # Oga
@@ -0,0 +1,219 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # A generic XML node. Instances of this class can belong to a
5
+ # {Oga::XML::NodeSet} and can be used to query surrounding and parent
6
+ # nodes.
7
+ #
8
+ class Node
9
+ include Traversal
10
+
11
+ # @return [Oga::XML::NodeSet]
12
+ attr_reader :node_set
13
+
14
+ ##
15
+ # @param [Hash] options
16
+ #
17
+ # @option options [Oga::XML::NodeSet] :node_set The node set that this
18
+ # node belongs to.
19
+ #
20
+ # @option options [Oga::XML::NodeSet|Array] :children The child nodes of
21
+ # the current node.
22
+ #
23
+ def initialize(options = {})
24
+ self.node_set = options[:node_set]
25
+ self.children = options[:children] if options[:children]
26
+ end
27
+
28
+ ##
29
+ # @param [Oga::XML::NodeSet] set
30
+ #
31
+ def node_set=(set)
32
+ @node_set = set
33
+ @root_node = nil
34
+ @html_p = nil
35
+ end
36
+
37
+ ##
38
+ # Returns the child nodes of the current node.
39
+ #
40
+ # @return [Oga::XML::NodeSet]
41
+ #
42
+ def children
43
+ @children ||= NodeSet.new([], self)
44
+ end
45
+
46
+ ##
47
+ # Sets the child nodes of the element.
48
+ #
49
+ # @param [Oga::XML::NodeSet|Array] nodes
50
+ #
51
+ def children=(nodes)
52
+ if nodes.is_a?(NodeSet)
53
+ @children = nodes
54
+ else
55
+ @children = NodeSet.new(nodes, self)
56
+ end
57
+ end
58
+
59
+ ##
60
+ # Returns the parent node of the current node. If there is no parent node
61
+ # `nil` is returned instead.
62
+ #
63
+ # @return [Oga::XML::Node]
64
+ #
65
+ def parent
66
+ node_set ? node_set.owner : nil
67
+ end
68
+
69
+ ##
70
+ # Returns the preceding node, or nil if there is none.
71
+ #
72
+ # @return [Oga::XML::Node]
73
+ #
74
+ def previous
75
+ index = node_set.index(self) - 1
76
+
77
+ index >= 0 ? node_set[index] : nil
78
+ end
79
+
80
+ ##
81
+ # Returns the following node, or nil if there is none.
82
+ #
83
+ # @return [Oga::XML::Node]
84
+ #
85
+ def next
86
+ index = node_set.index(self) + 1
87
+ length = node_set.length
88
+
89
+ index <= length ? node_set[index] : nil
90
+ end
91
+
92
+ ##
93
+ # Returns the previous element node or nil if there is none.
94
+ #
95
+ # @return [Oga::XML::Element]
96
+ #
97
+ def previous_element
98
+ node = self
99
+
100
+ while node = node.previous
101
+ return node if node.is_a?(Element)
102
+ end
103
+
104
+ return
105
+ end
106
+
107
+ ##
108
+ # Returns the next element node or nil if there is none.
109
+ #
110
+ # @return [Oga::XML::Element]
111
+ #
112
+ def next_element
113
+ node = self
114
+
115
+ while node = node.next
116
+ return node if node.is_a?(Element)
117
+ end
118
+
119
+ return
120
+ end
121
+
122
+ ##
123
+ # Returns the root document/node of the current node. The node is
124
+ # retrieved by traversing upwards in the DOM tree from the current node.
125
+ #
126
+ # @return [Oga::XML::Document|Oga::XML::Node]
127
+ #
128
+ def root_node
129
+ unless @root_node
130
+ node = self
131
+
132
+ loop do
133
+ if !node.is_a?(Document) and node.node_set
134
+ node = node.node_set.owner
135
+ else
136
+ break
137
+ end
138
+ end
139
+
140
+ @root_node = node
141
+ end
142
+
143
+ @root_node
144
+ end
145
+
146
+ ##
147
+ # Removes the current node from the owning node set.
148
+ #
149
+ # @return [Oga::XML::Node]
150
+ #
151
+ def remove
152
+ return node_set.delete(self) if node_set
153
+ end
154
+
155
+ ##
156
+ # Replaces the current node with another.
157
+ #
158
+ # @example Replacing with an element
159
+ # element = Oga::XML::Element.new(:name => 'div')
160
+ # some_node.replace(element)
161
+ #
162
+ # @example Replacing with a String
163
+ # some_node.replace('this will replace the current node with a text node')
164
+ #
165
+ # @param [String|Oga::XML::Node] other
166
+ #
167
+ def replace(other)
168
+ if other.is_a?(String)
169
+ other = Text.new(:text => other)
170
+ end
171
+
172
+ before(other)
173
+ remove
174
+ end
175
+
176
+ ##
177
+ # Inserts the given node before the current node.
178
+ #
179
+ # @param [Oga::XML::Node] other
180
+ #
181
+ def before(other)
182
+ index = node_set.index(self)
183
+
184
+ node_set.insert(index, other)
185
+ end
186
+
187
+ ##
188
+ # Inserts the given node after the current node.
189
+ #
190
+ # @param [Oga::XML::Node] other
191
+ #
192
+ def after(other)
193
+ index = node_set.index(self) + 1
194
+
195
+ node_set.insert(index, other)
196
+ end
197
+
198
+ ##
199
+ # @return [TrueClass|FalseClass]
200
+ #
201
+ def html?
202
+ if @html_p.nil?
203
+ root = root_node
204
+
205
+ @html_p = root.is_a?(Document) && root.html?
206
+ end
207
+
208
+ @html_p
209
+ end
210
+
211
+ ##
212
+ # @return [TrueClass|FalseClass]
213
+ #
214
+ def xml?
215
+ !html?
216
+ end
217
+ end # Element
218
+ end # XML
219
+ end # Oga
@@ -0,0 +1,333 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # The NodeSet class contains a set of unique {Oga::XML::Node} instances that
5
+ # can be queried and modified. Optionally NodeSet instances can take
6
+ # ownership of a node (besides just containing it). This allows the nodes to
7
+ # query their previous and next elements.
8
+ #
9
+ # There are two types of sets:
10
+ #
11
+ # 1. Regular node sets
12
+ # 2. Owned node sets
13
+ #
14
+ # Both behave similar to Ruby's Array class. The difference between an
15
+ # owned and regular node set is that an owned set modifies nodes that are
16
+ # added or removed by certain operations. For example, when a node is added
17
+ # to an owned set the `node_set` attribute of said node points to the set
18
+ # it was just added to.
19
+ #
20
+ # Owned node sets are used when building a DOM tree with
21
+ # {Oga::XML::Parser}. By taking ownership of nodes in a set Oga makes it
22
+ # possible to use these sets as following:
23
+ #
24
+ # document = Oga::XML::Document.new
25
+ # element = Oga::XML::Element.new
26
+ #
27
+ # document.children << element
28
+ #
29
+ # element.node_set == document.children # => true
30
+ #
31
+ # If ownership was not handled then you'd have to manually set the
32
+ # `element` variable's `node_set` attribute after pushing it into a set.
33
+ #
34
+ class NodeSet
35
+ include Enumerable
36
+
37
+ # @return [Oga::XML::Node]
38
+ attr_accessor :owner
39
+
40
+ ##
41
+ # @param [Array] nodes The nodes to add to the set.
42
+ # @param [Oga::XML::NodeSet] owner The owner of the set.
43
+ #
44
+ def initialize(nodes = [], owner = nil)
45
+ @nodes = nodes
46
+ @owner = owner
47
+
48
+ @nodes.each { |node| take_ownership(node) } if owner
49
+ end
50
+
51
+ ##
52
+ # Yields the supplied block for every node.
53
+ #
54
+ # @yieldparam [Oga::XML::Node]
55
+ #
56
+ def each
57
+ @nodes.each { |node| yield node }
58
+ end
59
+
60
+ ##
61
+ # Returns the last node in the set.
62
+ #
63
+ # @return [Oga::XML::Node]
64
+ #
65
+ def last
66
+ @nodes[-1]
67
+ end
68
+
69
+ ##
70
+ # Returns `true` if the set is empty.
71
+ #
72
+ # @return [TrueClass|FalseClass]
73
+ #
74
+ def empty?
75
+ @nodes.empty?
76
+ end
77
+
78
+ ##
79
+ # Returns the amount of nodes in the set.
80
+ #
81
+ # @return [Fixnum]
82
+ #
83
+ def length
84
+ @nodes.length
85
+ end
86
+
87
+ alias_method :count, :length
88
+ alias_method :size, :length
89
+
90
+ ##
91
+ # Returns the index of the given node.
92
+ #
93
+ # @param [Oga::XML::Node] node
94
+ # @return [Fixnum]
95
+ #
96
+ def index(node)
97
+ @nodes.index(node)
98
+ end
99
+
100
+ ##
101
+ # Pushes the node at the end of the set.
102
+ #
103
+ # @param [Oga::XML::Node] node
104
+ #
105
+ def push(node)
106
+ return if @nodes.include?(node)
107
+
108
+ @nodes << node
109
+
110
+ take_ownership(node)
111
+ end
112
+
113
+ alias_method :<<, :push
114
+
115
+ ##
116
+ # Pushes the node at the start of the set.
117
+ #
118
+ # @param [Oga::XML::Node] node
119
+ #
120
+ def unshift(node)
121
+ return if @nodes.include?(node)
122
+
123
+ @nodes.unshift(node)
124
+
125
+ take_ownership(node)
126
+ end
127
+
128
+ ##
129
+ # Shifts a node from the start of the set.
130
+ #
131
+ # @return [Oga::XML::Node]
132
+ #
133
+ def shift
134
+ node = @nodes.shift
135
+
136
+ remove_ownership(node)
137
+
138
+ node
139
+ end
140
+
141
+ ##
142
+ # Pops a node from the end of the set.
143
+ #
144
+ # @return [Oga::XML::Node]
145
+ #
146
+ def pop
147
+ node = @nodes.pop
148
+
149
+ remove_ownership(node)
150
+
151
+ node
152
+ end
153
+
154
+ ##
155
+ # Inserts a node into the set at the given index.
156
+ #
157
+ # @param [Fixnum] index The index to insert the node at.
158
+ # @param [Oga::XML::Node] node
159
+ #
160
+ def insert(index, node)
161
+ return if @nodes.include?(node)
162
+
163
+ @nodes.insert(index, node)
164
+
165
+ take_ownership(node)
166
+ end
167
+
168
+ ##
169
+ # Returns the node for the given index.
170
+ #
171
+ # @param [Fixnum] index
172
+ # @return [Oga::XML::Node]
173
+ #
174
+ def [](index)
175
+ @nodes[index]
176
+ end
177
+
178
+ ##
179
+ # Converts the current set to an Array.
180
+ #
181
+ # @return [Array]
182
+ #
183
+ def to_a
184
+ @nodes
185
+ end
186
+
187
+ ##
188
+ # Creates a new set based on the current and the specified set. The newly
189
+ # created set does not inherit ownership rules of the current set.
190
+ #
191
+ # @param [Oga::XML::NodeSet] other
192
+ # @return [Oga::XML::NodeSet]
193
+ #
194
+ def +(other)
195
+ self.class.new(to_a | other.to_a)
196
+ end
197
+
198
+ ##
199
+ # Returns `true` if the current node set and the one given in `other` are
200
+ # equal to each other.
201
+ #
202
+ # @param [Oga::XML::NodeSet] other
203
+ #
204
+ def ==(other)
205
+ other.is_a?(NodeSet) && other.equal_nodes?(@nodes)
206
+ end
207
+
208
+ ##
209
+ # Returns `true` if the nodes given in `nodes` are equal to those
210
+ # specified in the current `@nodes` variable. This method allows two
211
+ # NodeSet instances to compare each other without the need of exposing
212
+ # `@nodes` to the public.
213
+ #
214
+ # @param [Array<Oga::XML::Node>] nodes
215
+ #
216
+ def equal_nodes?(nodes)
217
+ @nodes == nodes
218
+ end
219
+
220
+ ##
221
+ # Adds the nodes of the given node set to the current node set.
222
+ #
223
+ # @param [Oga::XML::NodeSet] other
224
+ #
225
+ def concat(other)
226
+ other.each { |node| push(node) }
227
+ end
228
+
229
+ ##
230
+ # Removes the current nodes from their owning set. The nodes are *not*
231
+ # removed from the current set.
232
+ #
233
+ # This method is intended to remove nodes from an XML document/node.
234
+ #
235
+ def remove
236
+ sets = []
237
+
238
+ # First we gather all the sets to remove nodse from, then we remove the
239
+ # actual nodes. This is done as you can not reliably remove elements
240
+ # from an Array while iterating on that same Array.
241
+ @nodes.each do |node|
242
+ if node.node_set
243
+ sets << node.node_set
244
+
245
+ node.node_set = nil
246
+ end
247
+ end
248
+
249
+ sets.each do |set|
250
+ @nodes.each { |node| set.delete(node) }
251
+ end
252
+ end
253
+
254
+ ##
255
+ # Removes a node from the current set only.
256
+ #
257
+ def delete(node)
258
+ removed = @nodes.delete(node)
259
+
260
+ remove_ownership(removed) if removed
261
+
262
+ removed
263
+ end
264
+
265
+ ##
266
+ # Returns the values of the given attribute.
267
+ #
268
+ # @param [String|Symbol] name The name of the attribute.
269
+ # @return [Array]
270
+ #
271
+ def attribute(name)
272
+ values = []
273
+
274
+ @nodes.each do |node|
275
+ if node.respond_to?(:attribute)
276
+ values << node.attribute(name)
277
+ end
278
+ end
279
+
280
+ values
281
+ end
282
+
283
+ alias_method :attr, :attribute
284
+
285
+ ##
286
+ # Returns the text of all nodes in the set, ignoring comment nodes.
287
+ #
288
+ # @return [String]
289
+ #
290
+ def text
291
+ text = ''
292
+
293
+ @nodes.each do |node|
294
+ if node.respond_to?(:text) and !node.is_a?(Comment)
295
+ text << node.text
296
+ end
297
+ end
298
+
299
+ text
300
+ end
301
+
302
+ ##
303
+ # @return [String]
304
+ #
305
+ def inspect
306
+ values = @nodes.map(&:inspect).join(', ')
307
+
308
+ "NodeSet(#{values})"
309
+ end
310
+
311
+ private
312
+
313
+ ##
314
+ # Takes ownership of the given node. This only occurs when the current
315
+ # set has an owner.
316
+ #
317
+ # @param [Oga::XML::Node] node
318
+ #
319
+ def take_ownership(node)
320
+ node.node_set = self if owner
321
+ end
322
+
323
+ ##
324
+ # Removes ownership of the node if it belongs to the current set.
325
+ #
326
+ # @param [Oga::XML::Node] node
327
+ #
328
+ def remove_ownership(node)
329
+ node.node_set = nil if node.node_set == self
330
+ end
331
+ end # NodeSet
332
+ end # XML
333
+ end # Oga