oga 1.2.3-java → 1.3.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/doc/css_selectors.md +1 -1
  3. data/lib/liboga.jar +0 -0
  4. data/lib/oga.rb +6 -1
  5. data/lib/oga/blacklist.rb +0 -10
  6. data/lib/oga/css/lexer.rb +530 -255
  7. data/lib/oga/css/parser.rb +232 -230
  8. data/lib/oga/entity_decoder.rb +0 -4
  9. data/lib/oga/html/entities.rb +0 -4
  10. data/lib/oga/html/parser.rb +0 -4
  11. data/lib/oga/html/sax_parser.rb +0 -4
  12. data/lib/oga/lru.rb +0 -26
  13. data/lib/oga/oga.rb +0 -8
  14. data/lib/oga/ruby/generator.rb +225 -0
  15. data/lib/oga/ruby/node.rb +189 -0
  16. data/lib/oga/version.rb +1 -1
  17. data/lib/oga/whitelist.rb +0 -6
  18. data/lib/oga/xml/attribute.rb +13 -20
  19. data/lib/oga/xml/cdata.rb +0 -4
  20. data/lib/oga/xml/character_node.rb +0 -8
  21. data/lib/oga/xml/comment.rb +0 -4
  22. data/lib/oga/xml/default_namespace.rb +0 -2
  23. data/lib/oga/xml/doctype.rb +0 -8
  24. data/lib/oga/xml/document.rb +10 -14
  25. data/lib/oga/xml/element.rb +1 -52
  26. data/lib/oga/xml/entities.rb +0 -26
  27. data/lib/oga/xml/expanded_name.rb +12 -0
  28. data/lib/oga/xml/html_void_elements.rb +0 -2
  29. data/lib/oga/xml/lexer.rb +0 -86
  30. data/lib/oga/xml/namespace.rb +0 -10
  31. data/lib/oga/xml/node.rb +18 -34
  32. data/lib/oga/xml/node_set.rb +0 -50
  33. data/lib/oga/xml/parser.rb +13 -50
  34. data/lib/oga/xml/processing_instruction.rb +0 -8
  35. data/lib/oga/xml/pull_parser.rb +0 -18
  36. data/lib/oga/xml/querying.rb +58 -19
  37. data/lib/oga/xml/sax_parser.rb +0 -18
  38. data/lib/oga/xml/text.rb +0 -12
  39. data/lib/oga/xml/traversal.rb +0 -4
  40. data/lib/oga/xml/xml_declaration.rb +0 -8
  41. data/lib/oga/xpath/compiler.rb +1568 -0
  42. data/lib/oga/xpath/conversion.rb +102 -0
  43. data/lib/oga/xpath/lexer.rb +1844 -1238
  44. data/lib/oga/xpath/parser.rb +182 -153
  45. metadata +7 -3
  46. data/lib/oga/xpath/evaluator.rb +0 -1800
@@ -1,9 +1,7 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # Class used for storing information about an entire XML document. This
5
4
  # includes the doctype, XML declaration, child nodes and more.
6
- #
7
5
  class Document
8
6
  include Querying
9
7
  include Traversal
@@ -18,14 +16,12 @@ module Oga
18
16
  # @return [Symbol]
19
17
  attr_reader :type
20
18
 
21
- ##
22
19
  # @param [Hash] options
23
20
  #
24
21
  # @option options [Oga::XML::NodeSet] :children
25
22
  # @option options [Oga::XML::Doctype] :doctype
26
23
  # @option options [Oga::XML::XmlDeclaration] :xml_declaration
27
24
  # @option options [Symbol] :type
28
- #
29
25
  def initialize(options = {})
30
26
  @doctype = options[:doctype]
31
27
  @xml_declaration = options[:xml_declaration]
@@ -34,18 +30,14 @@ module Oga
34
30
  self.children = options[:children] if options[:children]
35
31
  end
36
32
 
37
- ##
38
33
  # @return [Oga::XML::NodeSet]
39
- #
40
34
  def children
41
35
  @children ||= NodeSet.new([], self)
42
36
  end
43
37
 
44
- ##
45
38
  # Sets the child nodes of the document.
46
39
  #
47
40
  # @param [Oga::XML::NodeSet|Array] nodes
48
- #
49
41
  def children=(nodes)
50
42
  if nodes.is_a?(NodeSet)
51
43
  @children = nodes
@@ -54,11 +46,19 @@ module Oga
54
46
  end
55
47
  end
56
48
 
57
- ##
49
+ # Returns self.
50
+ #
51
+ # This method exists to make this class compatible with Element, which in
52
+ # turn makes it easier to use both in the XPath compiler.
53
+ #
54
+ # @return [Oga::XML::Document]
55
+ def root_node
56
+ self
57
+ end
58
+
58
59
  # Converts the document and its child nodes to XML.
59
60
  #
60
61
  # @return [String]
61
- #
62
62
  def to_xml
63
63
  xml = children.map(&:to_xml).join('')
64
64
 
@@ -73,19 +73,15 @@ module Oga
73
73
  xml
74
74
  end
75
75
 
76
- ##
77
76
  # @return [TrueClass|FalseClass]
78
- #
79
77
  def html?
80
78
  type.equal?(:html)
81
79
  end
82
80
 
83
- ##
84
81
  # Inspects the document and its child nodes. Child nodes are indented for
85
82
  # each nesting level.
86
83
  #
87
84
  # @return [String]
88
- #
89
85
  def inspect
90
86
  segments = []
91
87
 
@@ -1,11 +1,10 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # Class that contains information about an XML element such as the name,
5
4
  # attributes and child nodes.
6
- #
7
5
  class Element < Node
8
6
  include Querying
7
+ include ExpandedName
9
8
 
10
9
  # @return [String]
11
10
  attr_reader :namespace_name
@@ -19,14 +18,11 @@ module Oga
19
18
  # @return [Hash]
20
19
  attr_writer :namespaces
21
20
 
22
- ##
23
21
  # The attribute prefix/namespace used for registering element namespaces.
24
22
  #
25
23
  # @return [String]
26
- #
27
24
  XMLNS_PREFIX = 'xmlns'.freeze
28
25
 
29
- ##
30
26
  # @param [Hash] options
31
27
  #
32
28
  # @option options [String] :name The name of the element.
@@ -35,7 +31,6 @@ module Oga
35
31
  #
36
32
  # @option options [Array<Oga::XML::Attribute>] :attributes The attributes
37
33
  # of the element as an Array.
38
- #
39
34
  def initialize(options = {})
40
35
  super
41
36
 
@@ -48,15 +43,12 @@ module Oga
48
43
  register_namespaces_from_attributes
49
44
  end
50
45
 
51
- ##
52
46
  # @param [String] name
53
- #
54
47
  def namespace_name=(name)
55
48
  @namespace_name = name
56
49
  @namespace = nil
57
50
  end
58
51
 
59
- ##
60
52
  # Returns an attribute matching the given name (with or without the
61
53
  # namespace).
62
54
  #
@@ -71,7 +63,6 @@ module Oga
71
63
  # of the attribute.
72
64
  #
73
65
  # @return [Oga::XML::Attribute]
74
- #
75
66
  def attribute(name)
76
67
  name, ns = split_name(name)
77
68
 
@@ -84,32 +75,27 @@ module Oga
84
75
 
85
76
  alias_method :attr, :attribute
86
77
 
87
- ##
88
78
  # Returns the value of the given attribute.
89
79
  #
90
80
  # @example
91
81
  # element.get('class') # => "container"
92
82
  #
93
83
  # @see [#attribute]
94
- #
95
84
  def get(name)
96
85
  found = attribute(name)
97
86
 
98
87
  found ? found.value : nil
99
88
  end
100
89
 
101
- ##
102
90
  # Adds a new attribute to the element.
103
91
  #
104
92
  # @param [Oga::XML::Attribute] attribute
105
- #
106
93
  def add_attribute(attribute)
107
94
  attribute.element = self
108
95
 
109
96
  attributes << attribute
110
97
  end
111
98
 
112
- ##
113
99
  # Sets the value of an attribute to the given value. If the attribute does
114
100
  # not exist it is created automatically.
115
101
  #
@@ -117,7 +103,6 @@ module Oga
117
103
  # namespace.
118
104
  #
119
105
  # @param [String] value The new value of the attribute.
120
- #
121
106
  def set(name, value)
122
107
  found = attribute(name)
123
108
 
@@ -140,25 +125,21 @@ module Oga
140
125
  end
141
126
  end
142
127
 
143
- ##
144
128
  # Removes an attribute from the element.
145
129
  #
146
130
  # @param [String] name The name (optionally including namespace prefix)
147
131
  # of the attribute to remove.
148
132
  #
149
133
  # @return [Oga::XML::Attribute]
150
- #
151
134
  def unset(name)
152
135
  found = attribute(name)
153
136
 
154
137
  return attributes.delete(found) if found
155
138
  end
156
139
 
157
- ##
158
140
  # Returns the namespace of the element.
159
141
  #
160
142
  # @return [Oga::XML::Namespace]
161
- #
162
143
  def namespace
163
144
  unless @namespace
164
145
  available = available_namespaces
@@ -168,40 +149,32 @@ module Oga
168
149
  @namespace
169
150
  end
170
151
 
171
- ##
172
152
  # Returns the namespaces registered on this element, or an empty Hash in
173
153
  # case of an HTML element.
174
154
  #
175
155
  # @return [Hash]
176
- #
177
156
  def namespaces
178
157
  html? ? {} : @namespaces
179
158
  end
180
159
 
181
- ##
182
160
  # Returns true if the current element resides in the default XML
183
161
  # namespace.
184
162
  #
185
163
  # @return [TrueClass|FalseClass]
186
- #
187
164
  def default_namespace?
188
165
  namespace == DEFAULT_NAMESPACE || namespace.nil?
189
166
  end
190
167
 
191
- ##
192
168
  # Returns the text of all child nodes joined together.
193
169
  #
194
170
  # @return [String]
195
- #
196
171
  def text
197
172
  children.text
198
173
  end
199
174
 
200
- ##
201
175
  # Returns the text of the current element only.
202
176
  #
203
177
  # @return [String]
204
- #
205
178
  def inner_text
206
179
  text = ''
207
180
 
@@ -212,12 +185,10 @@ module Oga
212
185
  text
213
186
  end
214
187
 
215
- ##
216
188
  # Returns any {Oga::XML::Text} nodes that are a direct child of this
217
189
  # element.
218
190
  #
219
191
  # @return [Oga::XML::NodeSet]
220
- #
221
192
  def text_nodes
222
193
  nodes = NodeSet.new
223
194
 
@@ -228,21 +199,17 @@ module Oga
228
199
  nodes
229
200
  end
230
201
 
231
- ##
232
202
  # Sets the inner text of the current element to the given String.
233
203
  #
234
204
  # @param [String] text
235
- #
236
205
  def inner_text=(text)
237
206
  text_node = XML::Text.new(:text => text)
238
207
  @children = NodeSet.new([text_node], self)
239
208
  end
240
209
 
241
- ##
242
210
  # Converts the element and its child elements to XML.
243
211
  #
244
212
  # @return [String]
245
- #
246
213
  def to_xml
247
214
  if namespace_name
248
215
  full_name = "#{namespace_name}:#{name}"
@@ -264,9 +231,7 @@ module Oga
264
231
  end
265
232
  end
266
233
 
267
- ##
268
234
  # @return [String]
269
- #
270
235
  def inspect
271
236
  segments = []
272
237
 
@@ -283,7 +248,6 @@ module Oga
283
248
  "Element(#{segments.join(' ')})"
284
249
  end
285
250
 
286
- ##
287
251
  # Registers a new namespace for the current element and its child
288
252
  # elements.
289
253
  #
@@ -291,7 +255,6 @@ module Oga
291
255
  # @param [String] uri
292
256
  # @param [TrueClass|FalseClass] flush
293
257
  # @see [Oga::XML::Namespace#initialize]
294
- #
295
258
  def register_namespace(name, uri, flush = true)
296
259
  if namespaces[name]
297
260
  raise ArgumentError, "The namespace #{name.inspect} already exists"
@@ -302,12 +265,10 @@ module Oga
302
265
  flush_namespaces_cache if flush
303
266
  end
304
267
 
305
- ##
306
268
  # Returns a Hash containing all the namespaces available to the current
307
269
  # element.
308
270
  #
309
271
  # @return [Hash]
310
- #
311
272
  def available_namespaces
312
273
  # HTML(5) completely ignores namespaces
313
274
  unless @available_namespaces
@@ -332,11 +293,9 @@ module Oga
332
293
  @available_namespaces
333
294
  end
334
295
 
335
- ##
336
296
  # Returns `true` if the element is a self-closing element.
337
297
  #
338
298
  # @return [TrueClass|FalseClass]
339
- #
340
299
  def self_closing?
341
300
  self_closing = children.empty?
342
301
  root = root_node
@@ -349,10 +308,8 @@ module Oga
349
308
  self_closing
350
309
  end
351
310
 
352
- ##
353
311
  # Flushes the namespaces cache of the current element and all its child
354
312
  # elements.
355
- #
356
313
  def flush_namespaces_cache
357
314
  @available_namespaces = nil
358
315
  @namespace = nil
@@ -364,9 +321,7 @@ module Oga
364
321
 
365
322
  private
366
323
 
367
- ##
368
324
  # Registers namespaces based on any "xmlns" attributes.
369
- #
370
325
  def register_namespaces_from_attributes
371
326
  flush = false
372
327
 
@@ -385,31 +340,25 @@ module Oga
385
340
  flush_namespaces_cache if flush
386
341
  end
387
342
 
388
- ##
389
343
  # Links all attributes to the current element.
390
- #
391
344
  def link_attributes
392
345
  attributes.each do |attr|
393
346
  attr.element = self
394
347
  end
395
348
  end
396
349
 
397
- ##
398
350
  # @param [String] name
399
351
  # @return [Array]
400
- #
401
352
  def split_name(name)
402
353
  segments = name.to_s.split(':')
403
354
 
404
355
  [segments.pop, segments.pop]
405
356
  end
406
357
 
407
- ##
408
358
  # @param [Oga::XML::Attribute] attr
409
359
  # @param [String] ns
410
360
  # @param [String] name
411
361
  # @return [TrueClass|FalseClass]
412
- #
413
362
  def attribute_matches?(attr, ns, name)
414
363
  name_matches = attr.name == name
415
364
  ns_matches = false
@@ -1,18 +1,14 @@
1
1
  module Oga
2
2
  module XML
3
- ##
4
3
  # Module for encoding/decoding XML and HTML entities. The mapping of HTML
5
4
  # entities can be found in {Oga::HTML::Entities::DECODE_MAPPING}.
6
- #
7
5
  module Entities
8
- ##
9
6
  # Hash containing XML entities and the corresponding characters.
10
7
  #
11
8
  # The `&amp;` mapping must come last to ensure proper conversion of non
12
9
  # encoded to encoded forms (see {Oga::XML::Text#to_xml}).
13
10
  #
14
11
  # @return [Hash]
15
- #
16
12
  DECODE_MAPPING = {
17
13
  '&lt;' => '<',
18
14
  '&gt;' => '>',
@@ -21,23 +17,19 @@ module Oga
21
17
  '&amp;' => '&',
22
18
  }
23
19
 
24
- ##
25
20
  # Hash containing characters and the corresponding XML entities.
26
21
  #
27
22
  # @return [Hash]
28
- #
29
23
  ENCODE_MAPPING = {
30
24
  '&' => '&amp;',
31
25
  '>' => '&gt;',
32
26
  '<' => '&lt;',
33
27
  }
34
28
 
35
- ##
36
29
  # Hash containing characters and the corresponding XML entities to use
37
30
  # when encoding XML/HTML attribute values.
38
31
  #
39
32
  # @return [Hash]
40
- #
41
33
  ENCODE_ATTRIBUTE_MAPPING = {
42
34
  '&' => '&amp;',
43
35
  '>' => '&gt;',
@@ -46,50 +38,36 @@ module Oga
46
38
  '"' => '&quot;'
47
39
  }
48
40
 
49
- ##
50
41
  # @return [String]
51
- #
52
42
  AMPERSAND = '&'.freeze
53
43
 
54
- ##
55
44
  # Regexp for matching XML/HTML entities such as "&nbsp;".
56
45
  #
57
46
  # @return [Regexp]
58
- #
59
47
  REGULAR_ENTITY = /&[a-zA-Z0-9]+;/
60
48
 
61
- ##
62
49
  # Regexp for matching XML/HTML numeric entities such as "&#38;".
63
50
  #
64
51
  # @return [Regexp]
65
- #
66
52
  NUMERIC_CODE_POINT_ENTITY = /&#(\d+);/
67
53
 
68
- ##
69
54
  # Regexp for matching XML/HTML hex entities such as "&#x3C;".
70
55
  #
71
56
  # @return [Regexp]
72
- #
73
57
  HEX_CODE_POINT_ENTITY = /&#x([a-fA-F0-9]+);/
74
58
 
75
- ##
76
59
  # @return [Regexp]
77
- #
78
60
  ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|'))
79
61
 
80
- ##
81
62
  # @return [Regexp]
82
- #
83
63
  ENCODE_ATTRIBUTE_REGEXP =
84
64
  Regexp.new(ENCODE_ATTRIBUTE_MAPPING.keys.join('|'))
85
65
 
86
- ##
87
66
  # Decodes XML entities.
88
67
  #
89
68
  # @param [String] input
90
69
  # @param [Hash] mapping
91
70
  # @return [String]
92
- #
93
71
  def self.decode(input, mapping = DECODE_MAPPING)
94
72
  return input unless input.include?(AMPERSAND)
95
73
 
@@ -110,23 +88,19 @@ module Oga
110
88
  input
111
89
  end
112
90
 
113
- ##
114
91
  # Encodes special characters as XML entities.
115
92
  #
116
93
  # @param [String] input
117
94
  # @param [Hash] mapping
118
95
  # @return [String]
119
- #
120
96
  def self.encode(input, mapping = ENCODE_MAPPING)
121
97
  input.gsub(ENCODE_REGEXP, mapping)
122
98
  end
123
99
 
124
- ##
125
100
  # Encodes special characters in an XML attribute value.
126
101
  #
127
102
  # @param [String] input
128
103
  # @return [String]
129
- #
130
104
  def self.encode_attribute(input)
131
105
  input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING)
132
106
  end