rubyjedi-oga 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +362 -0
  4. data/README.md +317 -0
  5. data/doc/css/common.css +77 -0
  6. data/doc/css_selectors.md +935 -0
  7. data/doc/manually_creating_documents.md +67 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/doc/xml_namespaces.md +63 -0
  10. data/ext/c/extconf.rb +11 -0
  11. data/ext/c/lexer.c +2595 -0
  12. data/ext/c/lexer.h +16 -0
  13. data/ext/c/lexer.rl +198 -0
  14. data/ext/c/liboga.c +6 -0
  15. data/ext/c/liboga.h +11 -0
  16. data/ext/java/Liboga.java +14 -0
  17. data/ext/java/org/liboga/xml/Lexer.java +1363 -0
  18. data/ext/java/org/liboga/xml/Lexer.rl +223 -0
  19. data/ext/ragel/base_lexer.rl +633 -0
  20. data/lib/oga.rb +57 -0
  21. data/lib/oga/blacklist.rb +40 -0
  22. data/lib/oga/css/lexer.rb +743 -0
  23. data/lib/oga/css/parser.rb +976 -0
  24. data/lib/oga/entity_decoder.rb +21 -0
  25. data/lib/oga/html/entities.rb +2150 -0
  26. data/lib/oga/html/parser.rb +25 -0
  27. data/lib/oga/html/sax_parser.rb +18 -0
  28. data/lib/oga/lru.rb +160 -0
  29. data/lib/oga/oga.rb +57 -0
  30. data/lib/oga/version.rb +3 -0
  31. data/lib/oga/whitelist.rb +20 -0
  32. data/lib/oga/xml/attribute.rb +136 -0
  33. data/lib/oga/xml/cdata.rb +17 -0
  34. data/lib/oga/xml/character_node.rb +37 -0
  35. data/lib/oga/xml/comment.rb +17 -0
  36. data/lib/oga/xml/default_namespace.rb +13 -0
  37. data/lib/oga/xml/doctype.rb +82 -0
  38. data/lib/oga/xml/document.rb +108 -0
  39. data/lib/oga/xml/element.rb +428 -0
  40. data/lib/oga/xml/entities.rb +122 -0
  41. data/lib/oga/xml/html_void_elements.rb +15 -0
  42. data/lib/oga/xml/lexer.rb +550 -0
  43. data/lib/oga/xml/namespace.rb +48 -0
  44. data/lib/oga/xml/node.rb +219 -0
  45. data/lib/oga/xml/node_set.rb +333 -0
  46. data/lib/oga/xml/parser.rb +631 -0
  47. data/lib/oga/xml/processing_instruction.rb +37 -0
  48. data/lib/oga/xml/pull_parser.rb +175 -0
  49. data/lib/oga/xml/querying.rb +56 -0
  50. data/lib/oga/xml/sax_parser.rb +192 -0
  51. data/lib/oga/xml/text.rb +66 -0
  52. data/lib/oga/xml/traversal.rb +50 -0
  53. data/lib/oga/xml/xml_declaration.rb +65 -0
  54. data/lib/oga/xpath/evaluator.rb +1798 -0
  55. data/lib/oga/xpath/lexer.rb +1958 -0
  56. data/lib/oga/xpath/parser.rb +622 -0
  57. data/oga.gemspec +45 -0
  58. metadata +227 -0
@@ -0,0 +1,48 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # The Namespace class contains information about XML namespaces such as the
5
+ # name and URI.
6
+ #
7
+ class Namespace
8
+ # @return [String]
9
+ attr_accessor :name
10
+
11
+ # @return [String]
12
+ attr_accessor :uri
13
+
14
+ ##
15
+ # @param [Hash] options
16
+ #
17
+ # @option options [String] :name
18
+ # @option options [String] :uri
19
+ #
20
+ def initialize(options = {})
21
+ @name = options[:name]
22
+ @uri = options[:uri]
23
+ end
24
+
25
+ ##
26
+ # @return [String]
27
+ #
28
+ def to_s
29
+ name.to_s
30
+ end
31
+
32
+ ##
33
+ # @return [String]
34
+ #
35
+ def inspect
36
+ "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
37
+ end
38
+
39
+ ##
40
+ # @param [Oga::XML::Namespace] other
41
+ # @return [TrueClass|FalseClass]
42
+ #
43
+ def ==(other)
44
+ other.is_a?(self.class) && name == other.name && uri == other.uri
45
+ end
46
+ end # Namespace
47
+ end # XML
48
+ end # Oga
@@ -0,0 +1,219 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # A generic XML node. Instances of this class can belong to a
5
+ # {Oga::XML::NodeSet} and can be used to query surrounding and parent
6
+ # nodes.
7
+ #
8
+ class Node
9
+ include Traversal
10
+
11
+ # @return [Oga::XML::NodeSet]
12
+ attr_reader :node_set
13
+
14
+ ##
15
+ # @param [Hash] options
16
+ #
17
+ # @option options [Oga::XML::NodeSet] :node_set The node set that this
18
+ # node belongs to.
19
+ #
20
+ # @option options [Oga::XML::NodeSet|Array] :children The child nodes of
21
+ # the current node.
22
+ #
23
+ def initialize(options = {})
24
+ self.node_set = options[:node_set]
25
+ self.children = options[:children] if options[:children]
26
+ end
27
+
28
+ ##
29
+ # @param [Oga::XML::NodeSet] set
30
+ #
31
+ def node_set=(set)
32
+ @node_set = set
33
+ @root_node = nil
34
+ @html_p = nil
35
+ end
36
+
37
+ ##
38
+ # Returns the child nodes of the current node.
39
+ #
40
+ # @return [Oga::XML::NodeSet]
41
+ #
42
+ def children
43
+ @children ||= NodeSet.new([], self)
44
+ end
45
+
46
+ ##
47
+ # Sets the child nodes of the element.
48
+ #
49
+ # @param [Oga::XML::NodeSet|Array] nodes
50
+ #
51
+ def children=(nodes)
52
+ if nodes.is_a?(NodeSet)
53
+ @children = nodes
54
+ else
55
+ @children = NodeSet.new(nodes, self)
56
+ end
57
+ end
58
+
59
+ ##
60
+ # Returns the parent node of the current node. If there is no parent node
61
+ # `nil` is returned instead.
62
+ #
63
+ # @return [Oga::XML::Node]
64
+ #
65
+ def parent
66
+ node_set ? node_set.owner : nil
67
+ end
68
+
69
+ ##
70
+ # Returns the preceding node, or nil if there is none.
71
+ #
72
+ # @return [Oga::XML::Node]
73
+ #
74
+ def previous
75
+ index = node_set.index(self) - 1
76
+
77
+ index >= 0 ? node_set[index] : nil
78
+ end
79
+
80
+ ##
81
+ # Returns the following node, or nil if there is none.
82
+ #
83
+ # @return [Oga::XML::Node]
84
+ #
85
+ def next
86
+ index = node_set.index(self) + 1
87
+ length = node_set.length
88
+
89
+ index <= length ? node_set[index] : nil
90
+ end
91
+
92
+ ##
93
+ # Returns the previous element node or nil if there is none.
94
+ #
95
+ # @return [Oga::XML::Element]
96
+ #
97
+ def previous_element
98
+ node = self
99
+
100
+ while node = node.previous
101
+ return node if node.is_a?(Element)
102
+ end
103
+
104
+ return
105
+ end
106
+
107
+ ##
108
+ # Returns the next element node or nil if there is none.
109
+ #
110
+ # @return [Oga::XML::Element]
111
+ #
112
+ def next_element
113
+ node = self
114
+
115
+ while node = node.next
116
+ return node if node.is_a?(Element)
117
+ end
118
+
119
+ return
120
+ end
121
+
122
+ ##
123
+ # Returns the root document/node of the current node. The node is
124
+ # retrieved by traversing upwards in the DOM tree from the current node.
125
+ #
126
+ # @return [Oga::XML::Document|Oga::XML::Node]
127
+ #
128
+ def root_node
129
+ unless @root_node
130
+ node = self
131
+
132
+ loop do
133
+ if !node.is_a?(Document) and node.node_set
134
+ node = node.node_set.owner
135
+ else
136
+ break
137
+ end
138
+ end
139
+
140
+ @root_node = node
141
+ end
142
+
143
+ @root_node
144
+ end
145
+
146
+ ##
147
+ # Removes the current node from the owning node set.
148
+ #
149
+ # @return [Oga::XML::Node]
150
+ #
151
+ def remove
152
+ return node_set.delete(self) if node_set
153
+ end
154
+
155
+ ##
156
+ # Replaces the current node with another.
157
+ #
158
+ # @example Replacing with an element
159
+ # element = Oga::XML::Element.new(:name => 'div')
160
+ # some_node.replace(element)
161
+ #
162
+ # @example Replacing with a String
163
+ # some_node.replace('this will replace the current node with a text node')
164
+ #
165
+ # @param [String|Oga::XML::Node] other
166
+ #
167
+ def replace(other)
168
+ if other.is_a?(String)
169
+ other = Text.new(:text => other)
170
+ end
171
+
172
+ before(other)
173
+ remove
174
+ end
175
+
176
+ ##
177
+ # Inserts the given node before the current node.
178
+ #
179
+ # @param [Oga::XML::Node] other
180
+ #
181
+ def before(other)
182
+ index = node_set.index(self)
183
+
184
+ node_set.insert(index, other)
185
+ end
186
+
187
+ ##
188
+ # Inserts the given node after the current node.
189
+ #
190
+ # @param [Oga::XML::Node] other
191
+ #
192
+ def after(other)
193
+ index = node_set.index(self) + 1
194
+
195
+ node_set.insert(index, other)
196
+ end
197
+
198
+ ##
199
+ # @return [TrueClass|FalseClass]
200
+ #
201
+ def html?
202
+ if @html_p.nil?
203
+ root = root_node
204
+
205
+ @html_p = root.is_a?(Document) && root.html?
206
+ end
207
+
208
+ @html_p
209
+ end
210
+
211
+ ##
212
+ # @return [TrueClass|FalseClass]
213
+ #
214
+ def xml?
215
+ !html?
216
+ end
217
+ end # Element
218
+ end # XML
219
+ end # Oga
@@ -0,0 +1,333 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # The NodeSet class contains a set of unique {Oga::XML::Node} instances that
5
+ # can be queried and modified. Optionally NodeSet instances can take
6
+ # ownership of a node (besides just containing it). This allows the nodes to
7
+ # query their previous and next elements.
8
+ #
9
+ # There are two types of sets:
10
+ #
11
+ # 1. Regular node sets
12
+ # 2. Owned node sets
13
+ #
14
+ # Both behave similar to Ruby's Array class. The difference between an
15
+ # owned and regular node set is that an owned set modifies nodes that are
16
+ # added or removed by certain operations. For example, when a node is added
17
+ # to an owned set the `node_set` attribute of said node points to the set
18
+ # it was just added to.
19
+ #
20
+ # Owned node sets are used when building a DOM tree with
21
+ # {Oga::XML::Parser}. By taking ownership of nodes in a set Oga makes it
22
+ # possible to use these sets as following:
23
+ #
24
+ # document = Oga::XML::Document.new
25
+ # element = Oga::XML::Element.new
26
+ #
27
+ # document.children << element
28
+ #
29
+ # element.node_set == document.children # => true
30
+ #
31
+ # If ownership was not handled then you'd have to manually set the
32
+ # `element` variable's `node_set` attribute after pushing it into a set.
33
+ #
34
+ class NodeSet
35
+ include Enumerable
36
+
37
+ # @return [Oga::XML::Node]
38
+ attr_accessor :owner
39
+
40
+ ##
41
+ # @param [Array] nodes The nodes to add to the set.
42
+ # @param [Oga::XML::NodeSet] owner The owner of the set.
43
+ #
44
+ def initialize(nodes = [], owner = nil)
45
+ @nodes = nodes
46
+ @owner = owner
47
+
48
+ @nodes.each { |node| take_ownership(node) } if owner
49
+ end
50
+
51
+ ##
52
+ # Yields the supplied block for every node.
53
+ #
54
+ # @yieldparam [Oga::XML::Node]
55
+ #
56
+ def each
57
+ @nodes.each { |node| yield node }
58
+ end
59
+
60
+ ##
61
+ # Returns the last node in the set.
62
+ #
63
+ # @return [Oga::XML::Node]
64
+ #
65
+ def last
66
+ @nodes[-1]
67
+ end
68
+
69
+ ##
70
+ # Returns `true` if the set is empty.
71
+ #
72
+ # @return [TrueClass|FalseClass]
73
+ #
74
+ def empty?
75
+ @nodes.empty?
76
+ end
77
+
78
+ ##
79
+ # Returns the amount of nodes in the set.
80
+ #
81
+ # @return [Fixnum]
82
+ #
83
+ def length
84
+ @nodes.length
85
+ end
86
+
87
+ alias_method :count, :length
88
+ alias_method :size, :length
89
+
90
+ ##
91
+ # Returns the index of the given node.
92
+ #
93
+ # @param [Oga::XML::Node] node
94
+ # @return [Fixnum]
95
+ #
96
+ def index(node)
97
+ @nodes.index(node)
98
+ end
99
+
100
+ ##
101
+ # Pushes the node at the end of the set.
102
+ #
103
+ # @param [Oga::XML::Node] node
104
+ #
105
+ def push(node)
106
+ return if @nodes.include?(node)
107
+
108
+ @nodes << node
109
+
110
+ take_ownership(node)
111
+ end
112
+
113
+ alias_method :<<, :push
114
+
115
+ ##
116
+ # Pushes the node at the start of the set.
117
+ #
118
+ # @param [Oga::XML::Node] node
119
+ #
120
+ def unshift(node)
121
+ return if @nodes.include?(node)
122
+
123
+ @nodes.unshift(node)
124
+
125
+ take_ownership(node)
126
+ end
127
+
128
+ ##
129
+ # Shifts a node from the start of the set.
130
+ #
131
+ # @return [Oga::XML::Node]
132
+ #
133
+ def shift
134
+ node = @nodes.shift
135
+
136
+ remove_ownership(node)
137
+
138
+ node
139
+ end
140
+
141
+ ##
142
+ # Pops a node from the end of the set.
143
+ #
144
+ # @return [Oga::XML::Node]
145
+ #
146
+ def pop
147
+ node = @nodes.pop
148
+
149
+ remove_ownership(node)
150
+
151
+ node
152
+ end
153
+
154
+ ##
155
+ # Inserts a node into the set at the given index.
156
+ #
157
+ # @param [Fixnum] index The index to insert the node at.
158
+ # @param [Oga::XML::Node] node
159
+ #
160
+ def insert(index, node)
161
+ return if @nodes.include?(node)
162
+
163
+ @nodes.insert(index, node)
164
+
165
+ take_ownership(node)
166
+ end
167
+
168
+ ##
169
+ # Returns the node for the given index.
170
+ #
171
+ # @param [Fixnum] index
172
+ # @return [Oga::XML::Node]
173
+ #
174
+ def [](index)
175
+ @nodes[index]
176
+ end
177
+
178
+ ##
179
+ # Converts the current set to an Array.
180
+ #
181
+ # @return [Array]
182
+ #
183
+ def to_a
184
+ @nodes
185
+ end
186
+
187
+ ##
188
+ # Creates a new set based on the current and the specified set. The newly
189
+ # created set does not inherit ownership rules of the current set.
190
+ #
191
+ # @param [Oga::XML::NodeSet] other
192
+ # @return [Oga::XML::NodeSet]
193
+ #
194
+ def +(other)
195
+ self.class.new(to_a | other.to_a)
196
+ end
197
+
198
+ ##
199
+ # Returns `true` if the current node set and the one given in `other` are
200
+ # equal to each other.
201
+ #
202
+ # @param [Oga::XML::NodeSet] other
203
+ #
204
+ def ==(other)
205
+ other.is_a?(NodeSet) && other.equal_nodes?(@nodes)
206
+ end
207
+
208
+ ##
209
+ # Returns `true` if the nodes given in `nodes` are equal to those
210
+ # specified in the current `@nodes` variable. This method allows two
211
+ # NodeSet instances to compare each other without the need of exposing
212
+ # `@nodes` to the public.
213
+ #
214
+ # @param [Array<Oga::XML::Node>] nodes
215
+ #
216
+ def equal_nodes?(nodes)
217
+ @nodes == nodes
218
+ end
219
+
220
+ ##
221
+ # Adds the nodes of the given node set to the current node set.
222
+ #
223
+ # @param [Oga::XML::NodeSet] other
224
+ #
225
+ def concat(other)
226
+ other.each { |node| push(node) }
227
+ end
228
+
229
+ ##
230
+ # Removes the current nodes from their owning set. The nodes are *not*
231
+ # removed from the current set.
232
+ #
233
+ # This method is intended to remove nodes from an XML document/node.
234
+ #
235
+ def remove
236
+ sets = []
237
+
238
+ # First we gather all the sets to remove nodse from, then we remove the
239
+ # actual nodes. This is done as you can not reliably remove elements
240
+ # from an Array while iterating on that same Array.
241
+ @nodes.each do |node|
242
+ if node.node_set
243
+ sets << node.node_set
244
+
245
+ node.node_set = nil
246
+ end
247
+ end
248
+
249
+ sets.each do |set|
250
+ @nodes.each { |node| set.delete(node) }
251
+ end
252
+ end
253
+
254
+ ##
255
+ # Removes a node from the current set only.
256
+ #
257
+ def delete(node)
258
+ removed = @nodes.delete(node)
259
+
260
+ remove_ownership(removed) if removed
261
+
262
+ removed
263
+ end
264
+
265
+ ##
266
+ # Returns the values of the given attribute.
267
+ #
268
+ # @param [String|Symbol] name The name of the attribute.
269
+ # @return [Array]
270
+ #
271
+ def attribute(name)
272
+ values = []
273
+
274
+ @nodes.each do |node|
275
+ if node.respond_to?(:attribute)
276
+ values << node.attribute(name)
277
+ end
278
+ end
279
+
280
+ values
281
+ end
282
+
283
+ alias_method :attr, :attribute
284
+
285
+ ##
286
+ # Returns the text of all nodes in the set, ignoring comment nodes.
287
+ #
288
+ # @return [String]
289
+ #
290
+ def text
291
+ text = ''
292
+
293
+ @nodes.each do |node|
294
+ if node.respond_to?(:text) and !node.is_a?(Comment)
295
+ text << node.text
296
+ end
297
+ end
298
+
299
+ text
300
+ end
301
+
302
+ ##
303
+ # @return [String]
304
+ #
305
+ def inspect
306
+ values = @nodes.map(&:inspect).join(', ')
307
+
308
+ "NodeSet(#{values})"
309
+ end
310
+
311
+ private
312
+
313
+ ##
314
+ # Takes ownership of the given node. This only occurs when the current
315
+ # set has an owner.
316
+ #
317
+ # @param [Oga::XML::Node] node
318
+ #
319
+ def take_ownership(node)
320
+ node.node_set = self if owner
321
+ end
322
+
323
+ ##
324
+ # Removes ownership of the node if it belongs to the current set.
325
+ #
326
+ # @param [Oga::XML::Node] node
327
+ #
328
+ def remove_ownership(node)
329
+ node.node_set = nil if node.node_set == self
330
+ end
331
+ end # NodeSet
332
+ end # XML
333
+ end # Oga