rubyjedi-oga 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +362 -0
  4. data/README.md +317 -0
  5. data/doc/css/common.css +77 -0
  6. data/doc/css_selectors.md +935 -0
  7. data/doc/manually_creating_documents.md +67 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/doc/xml_namespaces.md +63 -0
  10. data/ext/c/extconf.rb +11 -0
  11. data/ext/c/lexer.c +2595 -0
  12. data/ext/c/lexer.h +16 -0
  13. data/ext/c/lexer.rl +198 -0
  14. data/ext/c/liboga.c +6 -0
  15. data/ext/c/liboga.h +11 -0
  16. data/ext/java/Liboga.java +14 -0
  17. data/ext/java/org/liboga/xml/Lexer.java +1363 -0
  18. data/ext/java/org/liboga/xml/Lexer.rl +223 -0
  19. data/ext/ragel/base_lexer.rl +633 -0
  20. data/lib/oga.rb +57 -0
  21. data/lib/oga/blacklist.rb +40 -0
  22. data/lib/oga/css/lexer.rb +743 -0
  23. data/lib/oga/css/parser.rb +976 -0
  24. data/lib/oga/entity_decoder.rb +21 -0
  25. data/lib/oga/html/entities.rb +2150 -0
  26. data/lib/oga/html/parser.rb +25 -0
  27. data/lib/oga/html/sax_parser.rb +18 -0
  28. data/lib/oga/lru.rb +160 -0
  29. data/lib/oga/oga.rb +57 -0
  30. data/lib/oga/version.rb +3 -0
  31. data/lib/oga/whitelist.rb +20 -0
  32. data/lib/oga/xml/attribute.rb +136 -0
  33. data/lib/oga/xml/cdata.rb +17 -0
  34. data/lib/oga/xml/character_node.rb +37 -0
  35. data/lib/oga/xml/comment.rb +17 -0
  36. data/lib/oga/xml/default_namespace.rb +13 -0
  37. data/lib/oga/xml/doctype.rb +82 -0
  38. data/lib/oga/xml/document.rb +108 -0
  39. data/lib/oga/xml/element.rb +428 -0
  40. data/lib/oga/xml/entities.rb +122 -0
  41. data/lib/oga/xml/html_void_elements.rb +15 -0
  42. data/lib/oga/xml/lexer.rb +550 -0
  43. data/lib/oga/xml/namespace.rb +48 -0
  44. data/lib/oga/xml/node.rb +219 -0
  45. data/lib/oga/xml/node_set.rb +333 -0
  46. data/lib/oga/xml/parser.rb +631 -0
  47. data/lib/oga/xml/processing_instruction.rb +37 -0
  48. data/lib/oga/xml/pull_parser.rb +175 -0
  49. data/lib/oga/xml/querying.rb +56 -0
  50. data/lib/oga/xml/sax_parser.rb +192 -0
  51. data/lib/oga/xml/text.rb +66 -0
  52. data/lib/oga/xml/traversal.rb +50 -0
  53. data/lib/oga/xml/xml_declaration.rb +65 -0
  54. data/lib/oga/xpath/evaluator.rb +1798 -0
  55. data/lib/oga/xpath/lexer.rb +1958 -0
  56. data/lib/oga/xpath/parser.rb +622 -0
  57. data/oga.gemspec +45 -0
  58. metadata +227 -0
@@ -0,0 +1,25 @@
1
+ module Oga
2
+ module HTML
3
+ ##
4
+ # Parser for processing HTML input. This parser is a small wrapper around
5
+ # {Oga::XML::Parser} and takes care of setting the various options required
6
+ # for parsing HTML documents.
7
+ #
8
+ # A basic example:
9
+ #
10
+ # Oga::HTML::Parser.new('<meta charset="utf-8">').parse
11
+ #
12
+ class Parser < XML::Parser
13
+ ##
14
+ # @param [String|IO] data
15
+ # @param [Hash] options
16
+ # @see [Oga::XML::Parser#initialize]
17
+ #
18
+ def initialize(data, options = {})
19
+ options = options.merge(:html => true)
20
+
21
+ super(data, options)
22
+ end
23
+ end # Parser
24
+ end # HTML
25
+ end # Oga
@@ -0,0 +1,18 @@
1
+ module Oga
2
+ module HTML
3
+ ##
4
+ # SAX parser for HTML documents. See the documentation of
5
+ # {Oga::XML::SaxParser} for more information.
6
+ #
7
+ class SaxParser < XML::SaxParser
8
+ ##
9
+ # @see [Oga::XML::SaxParser#initialize]
10
+ #
11
+ def initialize(handler, data, options = {})
12
+ options = options.merge(:html => true)
13
+
14
+ super(handler, data, options)
15
+ end
16
+ end # SaxParser
17
+ end # HTML
18
+ end # Oga
@@ -0,0 +1,160 @@
1
+ module Oga
2
+ ##
3
+ # Thread-safe LRU cache using a Hash as the underlying storage engine.
4
+ # Whenever the size of the cache exceeds the given limit the oldest keys are
5
+ # removed (base on insert order).
6
+ #
7
+ # This class uses its own list of keys (as returned by {LRU#keys}) instead of
8
+ # relying on `Hash#keys` as the latter allocates a new Array upon every call.
9
+ #
10
+ # This class doesn't use MonitorMixin due to the extra overhead it adds
11
+ # compared to using a Mutex directly.
12
+ #
13
+ # Example usage:
14
+ #
15
+ # cache = LRU.new(3)
16
+ #
17
+ # cache[:a] = 10
18
+ # cache[:b] = 20
19
+ # cache[:c] = 30
20
+ # cache[:d] = 40
21
+ #
22
+ # cache.keys # => [:b, :c, :d]
23
+ #
24
+ # @api private
25
+ #
26
+ class LRU
27
+ ##
28
+ # @param [Fixnum] maximum
29
+ #
30
+ def initialize(maximum = 1024)
31
+ @maximum = maximum
32
+ @cache = {}
33
+ @keys = []
34
+ @mutex = Mutex.new
35
+ @owner = Thread.current
36
+ end
37
+
38
+ ##
39
+ # @param [Fixnum] value
40
+ #
41
+ def maximum=(value)
42
+ synchronize do
43
+ @maximum = value
44
+
45
+ resize
46
+ end
47
+ end
48
+
49
+ ##
50
+ # @return [Fixnum]
51
+ #
52
+ def maximum
53
+ synchronize { @maximum }
54
+ end
55
+
56
+ ##
57
+ # Returns the value of the key.
58
+ #
59
+ # @param [Mixed] key
60
+ # @return [Mixed]
61
+ #
62
+ def [](key)
63
+ synchronize { @cache[key] }
64
+ end
65
+
66
+ ##
67
+ # Sets the key and its value. Old keys are discarded if the LRU size exceeds
68
+ # the limit.
69
+ #
70
+ # @param [Mixed] key
71
+ # @param [Mixed] value
72
+ #
73
+ def []=(key, value)
74
+ synchronize do
75
+ @cache[key] = value
76
+
77
+ @keys.delete(key) if @keys.include?(key)
78
+
79
+ @keys << key
80
+
81
+ resize
82
+ end
83
+ end
84
+
85
+ ##
86
+ # Returns a key if it exists, otherwise yields the supplied block and uses
87
+ # its return value as the key value.
88
+ #
89
+ # @param [Mixed] key
90
+ # @return [Mixed]
91
+ #
92
+ def get_or_set(key)
93
+ synchronize { self[key] ||= yield }
94
+ end
95
+
96
+ ##
97
+ # @return [Array]
98
+ #
99
+ def keys
100
+ synchronize { @keys }
101
+ end
102
+
103
+ ##
104
+ # @param [Mixed] key
105
+ # @return [TrueClass|FalseClass]
106
+ #
107
+ def key?(key)
108
+ synchronize { @cache.key?(key) }
109
+ end
110
+
111
+ ##
112
+ # Removes all keys from the cache.
113
+ #
114
+ def clear
115
+ synchronize do
116
+ @keys.clear
117
+ @cache.clear
118
+ end
119
+ end
120
+
121
+ ##
122
+ # @return [Fixnum]
123
+ #
124
+ def size
125
+ synchronize { @cache.size }
126
+ end
127
+
128
+ alias_method :length, :size
129
+
130
+ private
131
+
132
+ ##
133
+ # Yields the supplied block in a synchronized manner (if needed). This
134
+ # method is heavily based on `MonitorMixin#mon_enter`.
135
+ #
136
+ def synchronize
137
+ if @owner != Thread.current
138
+ @mutex.synchronize do
139
+ @owner = Thread.current
140
+
141
+ yield
142
+ end
143
+ else
144
+ yield
145
+ end
146
+ end
147
+
148
+ ##
149
+ # Removes old keys until the size of the hash no longer exceeds the maximum
150
+ # size.
151
+ #
152
+ def resize
153
+ return unless size > @maximum
154
+
155
+ to_remove = @keys.shift(size - @maximum)
156
+
157
+ to_remove.each { |key| @cache.delete(key) }
158
+ end
159
+ end # LRU
160
+ end # Oga
@@ -0,0 +1,57 @@
1
+ module Oga
2
+ ##
3
+ # Parses the given XML document.
4
+ #
5
+ # @example
6
+ # document = Oga.parse_xml('<root>Hello</root>')
7
+ #
8
+ # @see [Oga::XML::Lexer#initialize]
9
+ #
10
+ # @return [Oga::XML::Document]
11
+ #
12
+ def self.parse_xml(xml, options = {})
13
+ XML::Parser.new(xml, options).parse
14
+ end
15
+
16
+ ##
17
+ # Parses the given HTML document.
18
+ #
19
+ # @example
20
+ # document = Oga.parse_html('<html>...</html>')
21
+ #
22
+ # @see [Oga::XML::Lexer#initialize]
23
+ #
24
+ # @return [Oga::XML::Document]
25
+ #
26
+ def self.parse_html(html, options = {})
27
+ HTML::Parser.new(html, options).parse
28
+ end
29
+
30
+ ##
31
+ # Parses the given XML document using the SAX parser.
32
+ #
33
+ # @example
34
+ # handler = SomeSaxHandler.new
35
+ #
36
+ # Oga.sax_parse_html(handler, '<root>Hello</root>')
37
+ #
38
+ # @see [Oga::XML::SaxParser#initialize]
39
+ #
40
+ def self.sax_parse_xml(handler, xml, options = {})
41
+ XML::SaxParser.new(handler, xml, options).parse
42
+ end
43
+
44
+ ##
45
+ # Parses the given HTML document using the SAX parser.
46
+ #
47
+ # @example
48
+ # handler = SomeSaxHandler.new
49
+ #
50
+ # Oga.sax_parse_html(handler, '<script>foo()</script>')
51
+ #
52
+ # @see [Oga::XML::SaxParser#initialize]
53
+ #
54
+ def self.sax_parse_html(handler, html, options = {})
55
+ HTML::SaxParser.new(handler, html, options).parse
56
+ end
57
+ end # Oga
@@ -0,0 +1,3 @@
1
+ module Oga
2
+ VERSION = '1.0.3'
3
+ end # Oga
@@ -0,0 +1,20 @@
1
+ module Oga
2
+ ##
3
+ # @api private
4
+ #
5
+ class Whitelist < Blacklist
6
+ ##
7
+ # @return [TrueClass|FalseClass]
8
+ #
9
+ def allow?(name)
10
+ names.include?(name)
11
+ end
12
+
13
+ ##
14
+ # @return [Oga::Blacklist]
15
+ #
16
+ def to_blacklist
17
+ Blacklist.new(names)
18
+ end
19
+ end # Whitelist
20
+ end # Oga
@@ -0,0 +1,136 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Class for storing information about a single XML attribute.
5
+ #
6
+ class Attribute
7
+ # The name of the attribute.
8
+ # @return [String]
9
+ attr_accessor :name
10
+
11
+ # @return [String]
12
+ attr_accessor :namespace_name
13
+
14
+ # The element this attribute belongs to.
15
+ # @return [Oga::XML::Element]
16
+ attr_accessor :element
17
+
18
+ ##
19
+ # The default namespace available to all attributes. This namespace can
20
+ # not be modified.
21
+ #
22
+ # @return [Oga::XML::Namespace]
23
+ #
24
+ DEFAULT_NAMESPACE = Namespace.new(
25
+ :name => 'xml',
26
+ :uri => XML::DEFAULT_NAMESPACE.uri
27
+ ).freeze
28
+
29
+ ##
30
+ # @param [Hash] options
31
+ #
32
+ # @option options [String] :name
33
+ # @option options [String] :namespace_name
34
+ # @option options [String] :value
35
+ # @option options [Oga::XML::Element] :element
36
+ #
37
+ def initialize(options = {})
38
+ @name = options[:name]
39
+ @value = options[:value]
40
+ @element = options[:element]
41
+
42
+ @namespace_name = options[:namespace_name]
43
+ end
44
+
45
+ ##
46
+ # Returns the {Oga::XML::Namespace} instance for the current namespace
47
+ # name.
48
+ #
49
+ # @return [Oga::XML::Namespace]
50
+ #
51
+ def namespace
52
+ unless @namespace
53
+ if namespace_name == DEFAULT_NAMESPACE.name
54
+ @namespace = DEFAULT_NAMESPACE
55
+ else
56
+ @namespace = element.available_namespaces[namespace_name]
57
+ end
58
+ end
59
+
60
+ @namespace
61
+ end
62
+
63
+ ##
64
+ # @param [String] value
65
+ #
66
+ def value=(value)
67
+ @value = value
68
+ @decoded = false
69
+ end
70
+
71
+ ##
72
+ # Returns the value of the attribute or nil if no explicit value was set.
73
+ #
74
+ # @return [String|NilClass]
75
+ #
76
+ def value
77
+ if !@decoded and @value
78
+ @value = EntityDecoder.try_decode(@value, html?)
79
+ @decoded = true
80
+ end
81
+
82
+ @value
83
+ end
84
+
85
+ ##
86
+ # @return [String]
87
+ #
88
+ def text
89
+ value.to_s
90
+ end
91
+
92
+ alias_method :to_s, :text
93
+
94
+ ##
95
+ # @return [String]
96
+ #
97
+ def to_xml
98
+ if namespace_name
99
+ full_name = "#{namespace_name}:#{name}"
100
+ else
101
+ full_name = name
102
+ end
103
+
104
+ enc_value = value ? Entities.encode_attribute(value) : nil
105
+
106
+ %Q(#{full_name}="#{enc_value}")
107
+ end
108
+
109
+ ##
110
+ # @return [String]
111
+ #
112
+ def inspect
113
+ segments = []
114
+
115
+ [:name, :namespace, :value].each do |attr|
116
+ value = send(attr)
117
+
118
+ if value
119
+ segments << "#{attr}: #{value.inspect}"
120
+ end
121
+ end
122
+
123
+ "Attribute(#{segments.join(' ')})"
124
+ end
125
+
126
+ private
127
+
128
+ ##
129
+ # @return [TrueClass|FalseClass]
130
+ #
131
+ def html?
132
+ !!@element && @element.html?
133
+ end
134
+ end # Attribute
135
+ end # XML
136
+ end # Oga
@@ -0,0 +1,17 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Class used for storing information about CDATA tags.
5
+ #
6
+ class Cdata < CharacterNode
7
+ ##
8
+ # Converts the node back to XML.
9
+ #
10
+ # @return [String]
11
+ #
12
+ def to_xml
13
+ "<![CDATA[#{text}]]>"
14
+ end
15
+ end # Cdata
16
+ end # XML
17
+ end # Oga
@@ -0,0 +1,37 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Base class for nodes that represent a text-like value such as Text and
5
+ # Comment nodes.
6
+ #
7
+ class CharacterNode < Node
8
+ # @return [String]
9
+ attr_accessor :text
10
+
11
+ ##
12
+ # @param [Hash] options
13
+ #
14
+ # @option options [String] :text The text of the node.
15
+ #
16
+ def initialize(options = {})
17
+ super
18
+
19
+ @text = options[:text]
20
+ end
21
+
22
+ ##
23
+ # @return [String]
24
+ #
25
+ def to_xml
26
+ text.to_s
27
+ end
28
+
29
+ ##
30
+ # @return [String]
31
+ #
32
+ def inspect
33
+ "#{self.class.to_s.split('::').last}(#{text.inspect})"
34
+ end
35
+ end # CharacterNode
36
+ end # XML
37
+ end # Oga