oga 1.2.3 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/doc/css_selectors.md +1 -1
  3. data/lib/oga.rb +6 -1
  4. data/lib/oga/blacklist.rb +0 -10
  5. data/lib/oga/css/lexer.rb +530 -255
  6. data/lib/oga/css/parser.rb +232 -230
  7. data/lib/oga/entity_decoder.rb +0 -4
  8. data/lib/oga/html/entities.rb +0 -4
  9. data/lib/oga/html/parser.rb +0 -4
  10. data/lib/oga/html/sax_parser.rb +0 -4
  11. data/lib/oga/lru.rb +0 -26
  12. data/lib/oga/oga.rb +0 -8
  13. data/lib/oga/ruby/generator.rb +225 -0
  14. data/lib/oga/ruby/node.rb +189 -0
  15. data/lib/oga/version.rb +1 -1
  16. data/lib/oga/whitelist.rb +0 -6
  17. data/lib/oga/xml/attribute.rb +13 -20
  18. data/lib/oga/xml/cdata.rb +0 -4
  19. data/lib/oga/xml/character_node.rb +0 -8
  20. data/lib/oga/xml/comment.rb +0 -4
  21. data/lib/oga/xml/default_namespace.rb +0 -2
  22. data/lib/oga/xml/doctype.rb +0 -8
  23. data/lib/oga/xml/document.rb +10 -14
  24. data/lib/oga/xml/element.rb +1 -52
  25. data/lib/oga/xml/entities.rb +0 -26
  26. data/lib/oga/xml/expanded_name.rb +12 -0
  27. data/lib/oga/xml/html_void_elements.rb +0 -2
  28. data/lib/oga/xml/lexer.rb +0 -86
  29. data/lib/oga/xml/namespace.rb +0 -10
  30. data/lib/oga/xml/node.rb +18 -34
  31. data/lib/oga/xml/node_set.rb +0 -50
  32. data/lib/oga/xml/parser.rb +13 -50
  33. data/lib/oga/xml/processing_instruction.rb +0 -8
  34. data/lib/oga/xml/pull_parser.rb +0 -18
  35. data/lib/oga/xml/querying.rb +58 -19
  36. data/lib/oga/xml/sax_parser.rb +0 -18
  37. data/lib/oga/xml/text.rb +0 -12
  38. data/lib/oga/xml/traversal.rb +0 -4
  39. data/lib/oga/xml/xml_declaration.rb +0 -8
  40. data/lib/oga/xpath/compiler.rb +1568 -0
  41. data/lib/oga/xpath/conversion.rb +102 -0
  42. data/lib/oga/xpath/lexer.rb +1844 -1238
  43. data/lib/oga/xpath/parser.rb +182 -153
  44. metadata +7 -3
  45. data/lib/oga/xpath/evaluator.rb +0 -1800
@@ -1,9 +1,7 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # Class used for storing information about an entire XML document. This
5
4
  # includes the doctype, XML declaration, child nodes and more.
6
- #
7
5
  class Document
8
6
  include Querying
9
7
  include Traversal
@@ -18,14 +16,12 @@ module Oga
18
16
  # @return [Symbol]
19
17
  attr_reader :type
20
18
 
21
- ##
22
19
  # @param [Hash] options
23
20
  #
24
21
  # @option options [Oga::XML::NodeSet] :children
25
22
  # @option options [Oga::XML::Doctype] :doctype
26
23
  # @option options [Oga::XML::XmlDeclaration] :xml_declaration
27
24
  # @option options [Symbol] :type
28
- #
29
25
  def initialize(options = {})
30
26
  @doctype = options[:doctype]
31
27
  @xml_declaration = options[:xml_declaration]
@@ -34,18 +30,14 @@ module Oga
34
30
  self.children = options[:children] if options[:children]
35
31
  end
36
32
 
37
- ##
38
33
  # @return [Oga::XML::NodeSet]
39
- #
40
34
  def children
41
35
  @children ||= NodeSet.new([], self)
42
36
  end
43
37
 
44
- ##
45
38
  # Sets the child nodes of the document.
46
39
  #
47
40
  # @param [Oga::XML::NodeSet|Array] nodes
48
- #
49
41
  def children=(nodes)
50
42
  if nodes.is_a?(NodeSet)
51
43
  @children = nodes
@@ -54,11 +46,19 @@ module Oga
54
46
  end
55
47
  end
56
48
 
57
- ##
49
+ # Returns self.
50
+ #
51
+ # This method exists to make this class compatible with Element, which in
52
+ # turn makes it easier to use both in the XPath compiler.
53
+ #
54
+ # @return [Oga::XML::Document]
55
+ def root_node
56
+ self
57
+ end
58
+
58
59
  # Converts the document and its child nodes to XML.
59
60
  #
60
61
  # @return [String]
61
- #
62
62
  def to_xml
63
63
  xml = children.map(&:to_xml).join('')
64
64
 
@@ -73,19 +73,15 @@ module Oga
73
73
  xml
74
74
  end
75
75
 
76
- ##
77
76
  # @return [TrueClass|FalseClass]
78
- #
79
77
  def html?
80
78
  type.equal?(:html)
81
79
  end
82
80
 
83
- ##
84
81
  # Inspects the document and its child nodes. Child nodes are indented for
85
82
  # each nesting level.
86
83
  #
87
84
  # @return [String]
88
- #
89
85
  def inspect
90
86
  segments = []
91
87
 
@@ -1,11 +1,10 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # Class that contains information about an XML element such as the name,
5
4
  # attributes and child nodes.
6
- #
7
5
  class Element < Node
8
6
  include Querying
7
+ include ExpandedName
9
8
 
10
9
  # @return [String]
11
10
  attr_reader :namespace_name
@@ -19,14 +18,11 @@ module Oga
19
18
  # @return [Hash]
20
19
  attr_writer :namespaces
21
20
 
22
- ##
23
21
  # The attribute prefix/namespace used for registering element namespaces.
24
22
  #
25
23
  # @return [String]
26
- #
27
24
  XMLNS_PREFIX = 'xmlns'.freeze
28
25
 
29
- ##
30
26
  # @param [Hash] options
31
27
  #
32
28
  # @option options [String] :name The name of the element.
@@ -35,7 +31,6 @@ module Oga
35
31
  #
36
32
  # @option options [Array<Oga::XML::Attribute>] :attributes The attributes
37
33
  # of the element as an Array.
38
- #
39
34
  def initialize(options = {})
40
35
  super
41
36
 
@@ -48,15 +43,12 @@ module Oga
48
43
  register_namespaces_from_attributes
49
44
  end
50
45
 
51
- ##
52
46
  # @param [String] name
53
- #
54
47
  def namespace_name=(name)
55
48
  @namespace_name = name
56
49
  @namespace = nil
57
50
  end
58
51
 
59
- ##
60
52
  # Returns an attribute matching the given name (with or without the
61
53
  # namespace).
62
54
  #
@@ -71,7 +63,6 @@ module Oga
71
63
  # of the attribute.
72
64
  #
73
65
  # @return [Oga::XML::Attribute]
74
- #
75
66
  def attribute(name)
76
67
  name, ns = split_name(name)
77
68
 
@@ -84,32 +75,27 @@ module Oga
84
75
 
85
76
  alias_method :attr, :attribute
86
77
 
87
- ##
88
78
  # Returns the value of the given attribute.
89
79
  #
90
80
  # @example
91
81
  # element.get('class') # => "container"
92
82
  #
93
83
  # @see [#attribute]
94
- #
95
84
  def get(name)
96
85
  found = attribute(name)
97
86
 
98
87
  found ? found.value : nil
99
88
  end
100
89
 
101
- ##
102
90
  # Adds a new attribute to the element.
103
91
  #
104
92
  # @param [Oga::XML::Attribute] attribute
105
- #
106
93
  def add_attribute(attribute)
107
94
  attribute.element = self
108
95
 
109
96
  attributes << attribute
110
97
  end
111
98
 
112
- ##
113
99
  # Sets the value of an attribute to the given value. If the attribute does
114
100
  # not exist it is created automatically.
115
101
  #
@@ -117,7 +103,6 @@ module Oga
117
103
  # namespace.
118
104
  #
119
105
  # @param [String] value The new value of the attribute.
120
- #
121
106
  def set(name, value)
122
107
  found = attribute(name)
123
108
 
@@ -140,25 +125,21 @@ module Oga
140
125
  end
141
126
  end
142
127
 
143
- ##
144
128
  # Removes an attribute from the element.
145
129
  #
146
130
  # @param [String] name The name (optionally including namespace prefix)
147
131
  # of the attribute to remove.
148
132
  #
149
133
  # @return [Oga::XML::Attribute]
150
- #
151
134
  def unset(name)
152
135
  found = attribute(name)
153
136
 
154
137
  return attributes.delete(found) if found
155
138
  end
156
139
 
157
- ##
158
140
  # Returns the namespace of the element.
159
141
  #
160
142
  # @return [Oga::XML::Namespace]
161
- #
162
143
  def namespace
163
144
  unless @namespace
164
145
  available = available_namespaces
@@ -168,40 +149,32 @@ module Oga
168
149
  @namespace
169
150
  end
170
151
 
171
- ##
172
152
  # Returns the namespaces registered on this element, or an empty Hash in
173
153
  # case of an HTML element.
174
154
  #
175
155
  # @return [Hash]
176
- #
177
156
  def namespaces
178
157
  html? ? {} : @namespaces
179
158
  end
180
159
 
181
- ##
182
160
  # Returns true if the current element resides in the default XML
183
161
  # namespace.
184
162
  #
185
163
  # @return [TrueClass|FalseClass]
186
- #
187
164
  def default_namespace?
188
165
  namespace == DEFAULT_NAMESPACE || namespace.nil?
189
166
  end
190
167
 
191
- ##
192
168
  # Returns the text of all child nodes joined together.
193
169
  #
194
170
  # @return [String]
195
- #
196
171
  def text
197
172
  children.text
198
173
  end
199
174
 
200
- ##
201
175
  # Returns the text of the current element only.
202
176
  #
203
177
  # @return [String]
204
- #
205
178
  def inner_text
206
179
  text = ''
207
180
 
@@ -212,12 +185,10 @@ module Oga
212
185
  text
213
186
  end
214
187
 
215
- ##
216
188
  # Returns any {Oga::XML::Text} nodes that are a direct child of this
217
189
  # element.
218
190
  #
219
191
  # @return [Oga::XML::NodeSet]
220
- #
221
192
  def text_nodes
222
193
  nodes = NodeSet.new
223
194
 
@@ -228,21 +199,17 @@ module Oga
228
199
  nodes
229
200
  end
230
201
 
231
- ##
232
202
  # Sets the inner text of the current element to the given String.
233
203
  #
234
204
  # @param [String] text
235
- #
236
205
  def inner_text=(text)
237
206
  text_node = XML::Text.new(:text => text)
238
207
  @children = NodeSet.new([text_node], self)
239
208
  end
240
209
 
241
- ##
242
210
  # Converts the element and its child elements to XML.
243
211
  #
244
212
  # @return [String]
245
- #
246
213
  def to_xml
247
214
  if namespace_name
248
215
  full_name = "#{namespace_name}:#{name}"
@@ -264,9 +231,7 @@ module Oga
264
231
  end
265
232
  end
266
233
 
267
- ##
268
234
  # @return [String]
269
- #
270
235
  def inspect
271
236
  segments = []
272
237
 
@@ -283,7 +248,6 @@ module Oga
283
248
  "Element(#{segments.join(' ')})"
284
249
  end
285
250
 
286
- ##
287
251
  # Registers a new namespace for the current element and its child
288
252
  # elements.
289
253
  #
@@ -291,7 +255,6 @@ module Oga
291
255
  # @param [String] uri
292
256
  # @param [TrueClass|FalseClass] flush
293
257
  # @see [Oga::XML::Namespace#initialize]
294
- #
295
258
  def register_namespace(name, uri, flush = true)
296
259
  if namespaces[name]
297
260
  raise ArgumentError, "The namespace #{name.inspect} already exists"
@@ -302,12 +265,10 @@ module Oga
302
265
  flush_namespaces_cache if flush
303
266
  end
304
267
 
305
- ##
306
268
  # Returns a Hash containing all the namespaces available to the current
307
269
  # element.
308
270
  #
309
271
  # @return [Hash]
310
- #
311
272
  def available_namespaces
312
273
  # HTML(5) completely ignores namespaces
313
274
  unless @available_namespaces
@@ -332,11 +293,9 @@ module Oga
332
293
  @available_namespaces
333
294
  end
334
295
 
335
- ##
336
296
  # Returns `true` if the element is a self-closing element.
337
297
  #
338
298
  # @return [TrueClass|FalseClass]
339
- #
340
299
  def self_closing?
341
300
  self_closing = children.empty?
342
301
  root = root_node
@@ -349,10 +308,8 @@ module Oga
349
308
  self_closing
350
309
  end
351
310
 
352
- ##
353
311
  # Flushes the namespaces cache of the current element and all its child
354
312
  # elements.
355
- #
356
313
  def flush_namespaces_cache
357
314
  @available_namespaces = nil
358
315
  @namespace = nil
@@ -364,9 +321,7 @@ module Oga
364
321
 
365
322
  private
366
323
 
367
- ##
368
324
  # Registers namespaces based on any "xmlns" attributes.
369
- #
370
325
  def register_namespaces_from_attributes
371
326
  flush = false
372
327
 
@@ -385,31 +340,25 @@ module Oga
385
340
  flush_namespaces_cache if flush
386
341
  end
387
342
 
388
- ##
389
343
  # Links all attributes to the current element.
390
- #
391
344
  def link_attributes
392
345
  attributes.each do |attr|
393
346
  attr.element = self
394
347
  end
395
348
  end
396
349
 
397
- ##
398
350
  # @param [String] name
399
351
  # @return [Array]
400
- #
401
352
  def split_name(name)
402
353
  segments = name.to_s.split(':')
403
354
 
404
355
  [segments.pop, segments.pop]
405
356
  end
406
357
 
407
- ##
408
358
  # @param [Oga::XML::Attribute] attr
409
359
  # @param [String] ns
410
360
  # @param [String] name
411
361
  # @return [TrueClass|FalseClass]
412
- #
413
362
  def attribute_matches?(attr, ns, name)
414
363
  name_matches = attr.name == name
415
364
  ns_matches = false
@@ -1,18 +1,14 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # Module for encoding/decoding XML and HTML entities. The mapping of HTML
5
4
  # entities can be found in {Oga::HTML::Entities::DECODE_MAPPING}.
6
- #
7
5
  module Entities
8
- ##
9
6
  # Hash containing XML entities and the corresponding characters.
10
7
  #
11
8
  # The `&amp;` mapping must come last to ensure proper conversion of non
12
9
  # encoded to encoded forms (see {Oga::XML::Text#to_xml}).
13
10
  #
14
11
  # @return [Hash]
15
- #
16
12
  DECODE_MAPPING = {
17
13
  '&lt;' => '<',
18
14
  '&gt;' => '>',
@@ -21,23 +17,19 @@ module Oga
21
17
  '&amp;' => '&',
22
18
  }
23
19
 
24
- ##
25
20
  # Hash containing characters and the corresponding XML entities.
26
21
  #
27
22
  # @return [Hash]
28
- #
29
23
  ENCODE_MAPPING = {
30
24
  '&' => '&amp;',
31
25
  '>' => '&gt;',
32
26
  '<' => '&lt;',
33
27
  }
34
28
 
35
- ##
36
29
  # Hash containing characters and the corresponding XML entities to use
37
30
  # when encoding XML/HTML attribute values.
38
31
  #
39
32
  # @return [Hash]
40
- #
41
33
  ENCODE_ATTRIBUTE_MAPPING = {
42
34
  '&' => '&amp;',
43
35
  '>' => '&gt;',
@@ -46,50 +38,36 @@ module Oga
46
38
  '"' => '&quot;'
47
39
  }
48
40
 
49
- ##
50
41
  # @return [String]
51
- #
52
42
  AMPERSAND = '&'.freeze
53
43
 
54
- ##
55
44
  # Regexp for matching XML/HTML entities such as "&nbsp;".
56
45
  #
57
46
  # @return [Regexp]
58
- #
59
47
  REGULAR_ENTITY = /&[a-zA-Z0-9]+;/
60
48
 
61
- ##
62
49
  # Regexp for matching XML/HTML numeric entities such as "&#38;".
63
50
  #
64
51
  # @return [Regexp]
65
- #
66
52
  NUMERIC_CODE_POINT_ENTITY = /&#(\d+);/
67
53
 
68
- ##
69
54
  # Regexp for matching XML/HTML hex entities such as "&#x3C;".
70
55
  #
71
56
  # @return [Regexp]
72
- #
73
57
  HEX_CODE_POINT_ENTITY = /&#x([a-fA-F0-9]+);/
74
58
 
75
- ##
76
59
  # @return [Regexp]
77
- #
78
60
  ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|'))
79
61
 
80
- ##
81
62
  # @return [Regexp]
82
- #
83
63
  ENCODE_ATTRIBUTE_REGEXP =
84
64
  Regexp.new(ENCODE_ATTRIBUTE_MAPPING.keys.join('|'))
85
65
 
86
- ##
87
66
  # Decodes XML entities.
88
67
  #
89
68
  # @param [String] input
90
69
  # @param [Hash] mapping
91
70
  # @return [String]
92
- #
93
71
  def self.decode(input, mapping = DECODE_MAPPING)
94
72
  return input unless input.include?(AMPERSAND)
95
73
 
@@ -110,23 +88,19 @@ module Oga
110
88
  input
111
89
  end
112
90
 
113
- ##
114
91
  # Encodes special characters as XML entities.
115
92
  #
116
93
  # @param [String] input
117
94
  # @param [Hash] mapping
118
95
  # @return [String]
119
- #
120
96
  def self.encode(input, mapping = ENCODE_MAPPING)
121
97
  input.gsub(ENCODE_REGEXP, mapping)
122
98
  end
123
99
 
124
- ##
125
100
  # Encodes special characters in an XML attribute value.
126
101
  #
127
102
  # @param [String] input
128
103
  # @return [String]
129
- #
130
104
  def self.encode_attribute(input)
131
105
  input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING)
132
106
  end