oga 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,158 @@
1
+ module Oga
2
+ ##
3
+ # Thread-safe LRU cache using a Hash as the underlying storage engine.
4
+ # Whenever the size of the cache exceeds the given limit the oldest keys are
5
+ # removed (base on insert order).
6
+ #
7
+ # This class uses its own list of keys (as returned by {LRU#keys}) instead of
8
+ # relying on `Hash#keys` as the latter allocates a new Array upon every call.
9
+ #
10
+ # This class doesn't use MonitorMixin due to the extra overhead it adds
11
+ # compared to using a Mutex directly.
12
+ #
13
+ # Example usage:
14
+ #
15
+ # cache = LRU.new(3)
16
+ #
17
+ # cache[:a] = 10
18
+ # cache[:b] = 20
19
+ # cache[:c] = 30
20
+ # cache[:d] = 40
21
+ #
22
+ # cache.keys # => [:b, :c, :d]
23
+ #
24
+ class LRU
25
+ ##
26
+ # @param [Fixnum] maximum
27
+ #
28
+ def initialize(maximum = 1024)
29
+ @maximum = maximum
30
+ @cache = {}
31
+ @keys = []
32
+ @mutex = Mutex.new
33
+ @owner = Thread.current
34
+ end
35
+
36
+ ##
37
+ # @param [Fixnum] value
38
+ #
39
+ def maximum=(value)
40
+ synchronize do
41
+ @maximum = value
42
+
43
+ resize
44
+ end
45
+ end
46
+
47
+ ##
48
+ # @return [Fixnum]
49
+ #
50
+ def maximum
51
+ return synchronize { @maximum }
52
+ end
53
+
54
+ ##
55
+ # Returns the value of the key.
56
+ #
57
+ # @param [Mixed] key
58
+ # @return [Mixed]
59
+ #
60
+ def [](key)
61
+ return synchronize { @cache[key] }
62
+ end
63
+
64
+ ##
65
+ # Sets the key and its value. Old keys are discarded if the LRU size exceeds
66
+ # the limit.
67
+ #
68
+ # @param [Mixed] key
69
+ # @param [Mixed] value
70
+ #
71
+ def []=(key, value)
72
+ synchronize do
73
+ @cache[key] = value
74
+
75
+ @keys.delete(key) if @keys.include?(key)
76
+
77
+ @keys << key
78
+
79
+ resize
80
+ end
81
+ end
82
+
83
+ ##
84
+ # Returns a key if it exists, otherwise yields the supplied block and uses
85
+ # its return value as the key value.
86
+ #
87
+ # @param [Mixed] key
88
+ # @return [Mixed]
89
+ #
90
+ def get_or_set(key)
91
+ return synchronize { self[key] ||= yield }
92
+ end
93
+
94
+ ##
95
+ # @return [Array]
96
+ #
97
+ def keys
98
+ return synchronize { @keys }
99
+ end
100
+
101
+ ##
102
+ # @param [Mixed] key
103
+ # @return [TrueClass|FalseClass]
104
+ #
105
+ def key?(key)
106
+ return synchronize { @cache.key?(key) }
107
+ end
108
+
109
+ ##
110
+ # Removes all keys from the cache.
111
+ #
112
+ def clear
113
+ synchronize do
114
+ @keys.clear
115
+ @cache.clear
116
+ end
117
+ end
118
+
119
+ ##
120
+ # @return [Fixnum]
121
+ #
122
+ def size
123
+ return synchronize { @cache.size }
124
+ end
125
+
126
+ alias_method :length, :size
127
+
128
+ private
129
+
130
+ ##
131
+ # Yields the supplied block in a synchronized manner (if needed). This
132
+ # method is heavily based on `MonitorMixin#mon_enter`.
133
+ #
134
+ def synchronize
135
+ if @owner != Thread.current
136
+ @mutex.synchronize do
137
+ @owner = Thread.current
138
+
139
+ yield
140
+ end
141
+ else
142
+ yield
143
+ end
144
+ end
145
+
146
+ ##
147
+ # Removes old keys until the size of the hash no longer exceeds the maximum
148
+ # size.
149
+ #
150
+ def resize
151
+ return unless size > @maximum
152
+
153
+ to_remove = @keys.shift(size - @maximum)
154
+
155
+ to_remove.each { |key| @cache.delete(key) }
156
+ end
157
+ end # LRU
158
+ end # Oga
@@ -1,3 +1,3 @@
1
1
  module Oga
2
- VERSION = '0.2.3'
2
+ VERSION = '0.3.0'
3
3
  end # Oga
@@ -29,7 +29,7 @@ module Oga
29
29
  #
30
30
  DEFAULT_NAMESPACE = Namespace.new(
31
31
  :name => 'xml',
32
- :uri => 'http://www.w3.org/XML/1998/namespace'
32
+ :uri => XML::DEFAULT_NAMESPACE.uri
33
33
  ).freeze
34
34
 
35
35
  ##
@@ -0,0 +1,13 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # The default XML namespace.
5
+ #
6
+ # @return [Oga::XML::Namespace]
7
+ #
8
+ DEFAULT_NAMESPACE = Namespace.new(
9
+ :name => 'xmlns',
10
+ :uri => 'http://www.w3.org/XML/1998/namespace'
11
+ ).freeze
12
+ end # XML
13
+ end # Oga
@@ -23,7 +23,9 @@ module Oga
23
23
  class Element < Node
24
24
  include Querying
25
25
 
26
- attr_accessor :name, :namespace_name, :attributes, :namespaces
26
+ attr_accessor :name, :namespace_name, :attributes
27
+
28
+ attr_writer :namespaces
27
29
 
28
30
  ##
29
31
  # The attribute prefix/namespace used for registering element namespaces.
@@ -166,6 +168,26 @@ module Oga
166
168
  return @namespace
167
169
  end
168
170
 
171
+ ##
172
+ # Returns the namespaces registered on this element, or an empty Hash in
173
+ # case of an HTML element.
174
+ #
175
+ # @return [Hash]
176
+ #
177
+ def namespaces
178
+ return html? ? {} : @namespaces
179
+ end
180
+
181
+ ##
182
+ # Returns true if the current element resides in the default XML
183
+ # namespace.
184
+ #
185
+ # @return [TrueClass|FalseClass]
186
+ #
187
+ def default_namespace?
188
+ return namespace == DEFAULT_NAMESPACE || namespace.nil?
189
+ end
190
+
169
191
  ##
170
192
  # Returns the text of all child nodes joined together.
171
193
  #
@@ -284,6 +306,8 @@ module Oga
284
306
  # @return [Hash]
285
307
  #
286
308
  def available_namespaces
309
+ return {} if html? # HTML(5) completely ignores namespaces
310
+
287
311
  merged = namespaces.dup
288
312
  node = parent
289
313
 
@@ -1,5 +1,9 @@
1
1
  module Oga
2
2
  module XML
3
+ ##
4
+ # Module for encoding/decoding XML and HTML entities. The mapping of HTML
5
+ # entities can be found in {Oga::HTML::Entities::DECODE_MAPPING}.
6
+ #
3
7
  module Entities
4
8
  ##
5
9
  # Hash containing XML entities and the corresponding characters.
@@ -11,15 +15,10 @@ module Oga
11
15
  #
12
16
  DECODE_MAPPING = {
13
17
  '&lt;' => '<',
14
- '&#60;' => '<',
15
18
  '&gt;' => '>',
16
- '&#62;' => '>',
17
19
  '&apos;' => "'",
18
- '&#39;' => "'",
19
20
  '&quot;' => '"',
20
- '&#34;' => '"',
21
21
  '&amp;' => '&',
22
- '&#38;' => '&',
23
22
  }
24
23
 
25
24
  ##
@@ -35,16 +34,45 @@ module Oga
35
34
  '<' => '&lt;',
36
35
  }
37
36
 
37
+ ##
38
+ # @return [String]
39
+ #
40
+ AMPERSAND = '&'.freeze
41
+
42
+ ##
43
+ # Regexp for matching XML/HTML entities such as "&nbsp;".
44
+ #
45
+ # @return [Regexp]
46
+ #
47
+ REGULAR_ENTITY = /&[a-zA-Z]+;/
48
+
49
+ ##
50
+ # Regexp for matching XML/HTML entities such as "&#38;".
51
+ #
52
+ # @return [Regexp]
53
+ #
54
+ CODEPOINT_ENTITY = /&#(x)?([a-zA-Z0-9]+);/
55
+
56
+ ##
57
+ # @return [Regexp]
58
+ #
59
+ ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|'))
60
+
38
61
  ##
39
62
  # Decodes XML entities.
40
63
  #
41
64
  # @param [String] input
65
+ # @param [Hash] mapping
42
66
  # @return [String]
43
67
  #
44
- def self.decode(input)
45
- if input.include?('&')
46
- DECODE_MAPPING.each do |find, replace|
47
- input = input.gsub(find, replace)
68
+ def self.decode(input, mapping = DECODE_MAPPING)
69
+ return input unless input.include?(AMPERSAND)
70
+
71
+ input = input.gsub(REGULAR_ENTITY, mapping)
72
+
73
+ if input.include?(AMPERSAND)
74
+ input = input.gsub(CODEPOINT_ENTITY) do |match|
75
+ [$1 ? Integer($2, 16) : Integer($2)].pack('U')
48
76
  end
49
77
  end
50
78
 
@@ -55,14 +83,11 @@ module Oga
55
83
  # Encodes special characters as XML entities.
56
84
  #
57
85
  # @param [String] input
86
+ # @param [Hash] mapping
58
87
  # @return [String]
59
88
  #
60
- def self.encode(input)
61
- ENCODE_MAPPING.each do |from, to|
62
- input = input.gsub(from, to) if input.include?(from)
63
- end
64
-
65
- return input
89
+ def self.encode(input, mapping = ENCODE_MAPPING)
90
+ return input.gsub(ENCODE_REGEXP, mapping)
66
91
  end
67
92
  end # Entities
68
93
  end # XML
@@ -217,7 +217,7 @@ module Oga
217
217
  # @param [String] value The data between the quotes.
218
218
  #
219
219
  def on_string_body(value)
220
- add_token(:T_STRING_BODY, Entities.decode(value))
220
+ add_token(:T_STRING_BODY, value)
221
221
  end
222
222
 
223
223
  ##
@@ -373,7 +373,7 @@ module Oga
373
373
  def on_text(value)
374
374
  return if value.empty?
375
375
 
376
- add_token(:T_TEXT, Entities.decode(value))
376
+ add_token(:T_TEXT, value)
377
377
  end
378
378
 
379
379
  ##
@@ -37,6 +37,14 @@ module Oga
37
37
  def inspect
38
38
  return "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
39
39
  end
40
+
41
+ ##
42
+ # @param [Oga::XML::Namespace] other
43
+ # @return [TrueClass|FalseClass]
44
+ #
45
+ def ==(other)
46
+ return other.is_a?(self.class) && name == other.name && uri == other.uri
47
+ end
40
48
  end # Namespace
41
49
  end # XML
42
50
  end # Oga
@@ -163,6 +163,22 @@ module Oga
163
163
 
164
164
  node_set.insert(index, other)
165
165
  end
166
+
167
+ ##
168
+ # @return [TrueClass|FalseClass]
169
+ #
170
+ def html?
171
+ root = root_node
172
+
173
+ return root.is_a?(Document) && root.html?
174
+ end
175
+
176
+ ##
177
+ # @return [TrueClass|FalseClass]
178
+ #
179
+ def xml?
180
+ return !html?
181
+ end
166
182
  end # Element
167
183
  end # XML
168
184
  end # Oga
@@ -44,10 +44,10 @@ module Oga
44
44
  # @param [Oga::XML::NodeSet] owner The owner of the set.
45
45
  #
46
46
  def initialize(nodes = [], owner = nil)
47
- @nodes = nodes.uniq
47
+ @nodes = nodes
48
48
  @owner = owner
49
49
 
50
- @nodes.each { |node| take_ownership(node) }
50
+ @nodes.each { |node| take_ownership(node) } if owner
51
51
  end
52
52
 
53
53
  ##
@@ -1,13 +1,170 @@
1
- #
2
- # DO NOT MODIFY!!!!
3
- # This file is automatically generated by Racc 1.4.12
4
- # from Racc grammer file "".
5
- #
1
+ # This file is automatically generated by ruby-ll. Manually changing this file
2
+ # is not recommended as any changes will be lost the next time this parser is
3
+ # re-generated.
4
+ require 'll/setup'
6
5
 
7
- require 'racc/parser.rb'
8
6
  module Oga
9
- module XML
10
- class Parser < Racc::Parser
7
+ module XML
8
+ ##
9
+ # DOM parser for both XML and HTML.
10
+ #
11
+ # This parser does not produce a dedicated AST, instead it emits XML nodes
12
+ # directly. Basic usage of this parser is as following:
13
+ #
14
+ # parser = Oga::XML::Parser.new('<foo></foo>')
15
+ # document = parser.parse
16
+ #
17
+ # To enable HTML parsing you'd use the following instead:
18
+ #
19
+ # parser = Oga::XML::Parser.new('<foo></foo>', :html => true)
20
+ # document = parser.parse
21
+ #
22
+ # In both cases you can use either a String or an IO as the parser input. IO
23
+ # instances will result in lower memory overhead, especially when parsing large
24
+ # files.
25
+ #
26
+ class Parser < LL::Driver
27
+ CONFIG = LL::DriverConfig.new
28
+
29
+ CONFIG.terminals = [
30
+ :$EOF, # 0
31
+ :T_TEXT, # 1
32
+ :T_STRING_SQUOTE, # 2
33
+ :T_STRING_DQUOTE, # 3
34
+ :T_STRING_BODY, # 4
35
+ :T_DOCTYPE_START, # 5
36
+ :T_DOCTYPE_END, # 6
37
+ :T_DOCTYPE_TYPE, # 7
38
+ :T_DOCTYPE_NAME, # 8
39
+ :T_DOCTYPE_INLINE, # 9
40
+ :T_CDATA, # 10
41
+ :T_COMMENT, # 11
42
+ :T_ELEM_START, # 12
43
+ :T_ELEM_NAME, # 13
44
+ :T_ELEM_NS, # 14
45
+ :T_ELEM_END, # 15
46
+ :T_ATTR, # 16
47
+ :T_ATTR_NS, # 17
48
+ :T_XML_DECL_START, # 18
49
+ :T_XML_DECL_END, # 19
50
+ :T_PROC_INS_START, # 20
51
+ :T_PROC_INS_NAME, # 21
52
+ :T_PROC_INS_END, # 22
53
+ ].freeze
54
+
55
+ CONFIG.rules = [
56
+ [3, 0, 0, 1], # 0
57
+ [3, 1, 4, 19, 6, 0], # 1
58
+ [3, 2, 0, 3], # 2
59
+ [3, 3, 0, 7], # 3
60
+ [3, 4, 0, 8], # 4
61
+ [3, 5, 0, 9], # 5
62
+ [3, 6, 0, 16], # 6
63
+ [3, 7, 0, 12], # 7
64
+ [3, 8, 0, 15], # 8
65
+ [3, 9, 0, 4, 1, 8, 1, 5], # 9
66
+ [3, 10, 1, 6], # 10
67
+ [3, 11, 0, 6, 1, 7], # 11
68
+ [3, 12, 1, 6, 0, 5], # 12
69
+ [3, 13, 5, 20, 6, 0], # 13
70
+ [3, 14, 1, 6, 8, 21, 0, 17], # 14
71
+ [3, 15, 1, 6], # 15
72
+ [3, 16, 1, 10], # 16
73
+ [3, 17, 1, 11], # 17
74
+ [3, 18, 1, 22, 8, 22, 1, 21, 1, 20], # 18
75
+ [3, 19, 1, 13], # 19
76
+ [3, 20, 1, 13, 1, 14], # 20
77
+ [3, 21, 0, 13, 0, 10, 1, 12], # 21
78
+ [3, 22, 1, 15, 0, 1, 0, 11], # 22
79
+ [3, 23, 4, 23, 6, 0], # 23
80
+ [3, 24, 8, 24, 1, 16, 1, 17], # 24
81
+ [3, 25, 8, 25, 1, 16], # 25
82
+ [3, 26, 1, 19, 0, 13, 1, 18], # 26
83
+ [3, 27, 1, 1], # 27
84
+ [3, 28, 1, 3, 0, 18, 1, 3], # 28
85
+ [3, 29, 1, 2, 0, 18, 1, 2], # 29
86
+ [3, 30, 4, 26, 6, 0], # 30
87
+ [3, 31, 0, 2], # 31
88
+ [3, 32, 1, 9], # 32
89
+ [3, 33, 0, 17], # 33
90
+ [3, 34, 1, 1], # 34
91
+ [3, 35, 0, 14], # 35
92
+ [3, 36, 0, 17], # 36
93
+ [3, 37, 0, 17], # 37
94
+ [3, 38, 1, 4], # 38
95
+ ].freeze
96
+
97
+ CONFIG.table = [
98
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # 0
99
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], # 1
100
+ [-1, 6, -1, -1, -1, 2, -1, -1, -1, -1, 3, 4, 7, -1, -1, -1, -1, -1, 8, -1, 5, -1, -1], # 2
101
+ [-1, -1, -1, -1, -1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 3
102
+ [12, 12, 12, 12, 12, 12, 10, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12], # 4
103
+ [13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13], # 5
104
+ [-1, -1, 14, 14, -1, -1, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 6
105
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 7
106
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 8
107
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 18, -1, -1], # 9
108
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, -1, -1, -1, -1, -1, -1, -1, -1], # 10
109
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 21, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 11
110
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 12
111
+ [23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23], # 13
112
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 25, 24, -1, -1, -1, -1, -1], # 14
113
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, -1, -1, -1], # 15
114
+ [-1, 27, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 16
115
+ [-1, -1, 29, 28, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 17
116
+ [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30], # 18
117
+ [-1, 31, -1, -1, -1, 31, -1, -1, -1, -1, 31, 31, 31, -1, -1, -1, -1, -1, 31, -1, 31, -1, -1], # 19
118
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, 32, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 20
119
+ [-1, -1, 33, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 21
120
+ [-1, 34, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 22
121
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 35, 35, -1, -1, -1, -1, -1], # 23
122
+ [-1, -1, 36, 36, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 24
123
+ [-1, -1, 37, 37, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 25
124
+ [-1, -1, -1, -1, 38, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 26
125
+ ].freeze
126
+
127
+ CONFIG.actions = [
128
+ [:_rule_0, 1], # 0
129
+ [:_rule_1, 1], # 1
130
+ [:_rule_2, 1], # 2
131
+ [:_rule_3, 1], # 3
132
+ [:_rule_4, 1], # 4
133
+ [:_rule_5, 1], # 5
134
+ [:_rule_6, 1], # 6
135
+ [:_rule_7, 1], # 7
136
+ [:_rule_8, 1], # 8
137
+ [:_rule_9, 3], # 9
138
+ [:_rule_10, 1], # 10
139
+ [:_rule_11, 2], # 11
140
+ [:_rule_12, 2], # 12
141
+ [:_rule_13, 1], # 13
142
+ [:_rule_14, 3], # 14
143
+ [:_rule_15, 1], # 15
144
+ [:_rule_16, 1], # 16
145
+ [:_rule_17, 1], # 17
146
+ [:_rule_18, 4], # 18
147
+ [:_rule_19, 1], # 19
148
+ [:_rule_20, 2], # 20
149
+ [:_rule_21, 3], # 21
150
+ [:_rule_22, 3], # 22
151
+ [:_rule_23, 1], # 23
152
+ [:_rule_24, 3], # 24
153
+ [:_rule_25, 2], # 25
154
+ [:_rule_26, 3], # 26
155
+ [:_rule_27, 1], # 27
156
+ [:_rule_28, 3], # 28
157
+ [:_rule_29, 3], # 29
158
+ [:_rule_30, 1], # 30
159
+ [:_rule_31, 1], # 31
160
+ [:_rule_32, 1], # 32
161
+ [:_rule_33, 1], # 33
162
+ [:_rule_34, 1], # 34
163
+ [:_rule_35, 1], # 35
164
+ [:_rule_36, 1], # 36
165
+ [:_rule_37, 1], # 37
166
+ [:_rule_38, 1], # 38
167
+ ].freeze
11
168
 
12
169
  ##
13
170
  # Hash mapping token types and dedicated error labels.
@@ -15,27 +172,27 @@ module Oga
15
172
  # @return [Hash]
16
173
  #
17
174
  TOKEN_ERROR_MAPPING = {
18
- 'T_STRING' => 'string',
19
- 'T_TEXT' => 'text',
20
- 'T_DOCTYPE_START' => 'doctype start',
21
- 'T_DOCTYPE_END' => 'doctype closing tag',
22
- 'T_DOCTYPE_TYPE' => 'doctype type',
23
- 'T_DOCTYPE_NAME' => 'doctype name',
24
- 'T_DOCTYPE_INLINE' => 'inline doctype rules',
25
- 'T_CDATA' => 'CDATA',
26
- 'T_COMMENT' => 'comment',
27
- 'T_ELEM_START' => 'element start',
28
- 'T_ELEM_NAME' => 'element name',
29
- 'T_ELEM_NS' => 'element namespace',
30
- 'T_ELEM_END' => 'element closing tag',
31
- 'T_ATTR' => 'attribute',
32
- 'T_ATTR_NS' => 'attribute namespace',
33
- 'T_XML_DECL_START' => 'XML declaration start',
34
- 'T_XML_DECL_END' => 'XML declaration end',
35
- 'T_PROC_INS_START' => 'processing-instruction start',
36
- 'T_PROC_INS_NAME' => 'processing-instruction name',
37
- 'T_PROC_INS_END' => 'processing-instruction closing tag',
38
- '$end' => 'end of input'
175
+ :T_STRING => 'string',
176
+ :T_TEXT => 'text',
177
+ :T_DOCTYPE_START => 'doctype start',
178
+ :T_DOCTYPE_END => 'doctype closing tag',
179
+ :T_DOCTYPE_TYPE => 'doctype type',
180
+ :T_DOCTYPE_NAME => 'doctype name',
181
+ :T_DOCTYPE_INLINE => 'inline doctype rules',
182
+ :T_CDATA => 'CDATA',
183
+ :T_COMMENT => 'comment',
184
+ :T_ELEM_START => 'element start',
185
+ :T_ELEM_NAME => 'element name',
186
+ :T_ELEM_NS => 'element namespace',
187
+ :T_ELEM_END => 'element closing tag',
188
+ :T_ATTR => 'attribute',
189
+ :T_ATTR_NS => 'attribute namespace',
190
+ :T_XML_DECL_START => 'XML declaration start',
191
+ :T_XML_DECL_END => 'XML declaration end',
192
+ :T_PROC_INS_START => 'processing-instruction start',
193
+ :T_PROC_INS_NAME => 'processing-instruction name',
194
+ :T_PROC_INS_END => 'processing-instruction closing tag',
195
+ -1 => 'end of input'
39
196
  }
40
197
 
41
198
  ##
@@ -64,44 +221,49 @@ module Oga
64
221
  #
65
222
  # @yieldparam [Array]
66
223
  #
67
- def yield_next_token
224
+ def each_token
68
225
  @lexer.advance do |type, value, line|
69
226
  @line = line if line
70
227
 
71
228
  yield [type, value]
72
229
  end
73
230
 
74
- yield [false, false]
231
+ yield [-1, -1]
75
232
  end
76
233
 
77
234
  ##
78
- # @param [Fixnum] type The type of token the error occured on.
79
- # @param [String] value The value of the token.
80
- # @param [Array] stack The current stack of parsed nodes.
81
- # @raise [Racc::ParseError]
235
+ # @param [Fixnum] stack_type
236
+ # @param [Fixnum] stack_value
237
+ # @param [Symbol] token_type
238
+ # @param [String] token_value
82
239
  #
83
- def on_error(type, value, stack)
84
- name = token_to_str(type)
85
- name = TOKEN_ERROR_MAPPING[name] || name
240
+ def parser_error(stack_type, stack_value, token_type, token_value)
241
+ case id_to_type(stack_type)
242
+ when :rule
243
+ message = "Unexpected #{token_type} for rule #{stack_value}"
244
+ when :terminal
245
+ expected = id_to_terminal(stack_value)
246
+ expected = TOKEN_ERROR_MAPPING[expected] || expected
247
+ got = TOKEN_ERROR_MAPPING[token_type] || token_type
248
+ message = "Unexpected #{got}, expected #{expected} instead"
249
+ when :eof
250
+ message = 'Unexpected end of input'
251
+ end
252
+
253
+ message += " on line #{@line}"
86
254
 
87
- raise Racc::ParseError, "Unexpected #{name} on line #{@line}"
255
+ raise LL::ParserError, message
88
256
  end
89
257
 
90
258
  ##
91
- # Parses the input and returns the corresponding AST.
92
- #
93
- # @example
94
- # parser = Oga::Parser.new('<foo>bar</foo>')
95
- # ast = parser.parse
96
- #
97
- # @return [Oga::AST::Node]
259
+ # @see [LL::Driver#parse]
98
260
  #
99
261
  def parse
100
- ast = yyparse(self, :yield_next_token)
262
+ retval = super
101
263
 
102
264
  reset
103
265
 
104
- return ast
266
+ return retval
105
267
  end
106
268
 
107
269
  ##
@@ -109,9 +271,7 @@ module Oga
109
271
  # @return [Oga::XML::Document]
110
272
  #
111
273
  def on_document(children = [])
112
- document = Document.new(
113
- :type => @lexer.html ? :html : :xml
114
- )
274
+ document = Document.new(:type => @lexer.html ? :html : :xml)
115
275
 
116
276
  children.each do |child|
117
277
  if child.is_a?(Doctype)
@@ -217,404 +377,203 @@ module Oga
217
377
  return element
218
378
  end
219
379
 
220
- # vim: set ft=racc:
221
- ##### State transition tables begin ###
222
-
223
- racc_action_table = [
224
- 41, 20, 48, 47, 54, 12, 52, 55, 25, 26,
225
- 13, 14, 16, 20, 21, 48, 47, 12, 19, 62,
226
- 15, 40, 13, 14, 16, 20, 60, 23, 59, 12,
227
- 19, 66, 15, 65, 13, 14, 16, 36, 37, 24,
228
- 39, 35, 19, 42, 15, 31, 32, 31, 32, 31,
229
- 32, 48, 47, 57, 59, 64, 65, 49, 50, 51,
230
- 56, 67 ]
231
-
232
- racc_action_check = [
233
- 24, 0, 37, 37, 38, 0, 37, 38, 16, 16,
234
- 0, 0, 0, 3, 1, 53, 53, 3, 0, 53,
235
- 0, 24, 3, 3, 3, 18, 48, 12, 48, 18,
236
- 3, 61, 3, 61, 18, 18, 18, 23, 23, 15,
237
- 23, 21, 18, 26, 18, 17, 17, 19, 19, 28,
238
- 28, 30, 30, 47, 47, 58, 58, 32, 33, 34,
239
- 41, 63 ]
240
-
241
- racc_action_pointer = [
242
- -1, 14, nil, 11, nil, nil, nil, nil, nil, nil,
243
- nil, nil, 18, nil, nil, 17, -6, 28, 23, 30,
244
- nil, 41, nil, 30, -2, nil, 29, nil, 32, nil,
245
- 48, nil, 40, 42, 39, nil, nil, -1, -3, nil,
246
- nil, 37, nil, nil, nil, nil, nil, 49, 23, nil,
247
- nil, nil, nil, 12, nil, nil, nil, nil, 51, nil,
248
- nil, 28, nil, 54, nil, nil, nil, nil ]
249
-
250
- racc_action_default = [
251
- -3, -46, -1, -2, -5, -6, -7, -8, -9, -10,
252
- -11, -12, -46, -20, -21, -46, -46, -29, -3, -29,
253
- -37, -46, -4, -46, -46, -24, -46, -26, -28, -31,
254
- -32, -34, -46, -46, -46, 68, -13, -46, -46, -18,
255
- -22, -46, -25, -30, -33, -38, -39, -46, -46, -35,
256
- -27, -36, -14, -46, -17, -19, -23, -40, -46, -44,
257
- -42, -46, -15, -46, -41, -45, -43, -16 ]
258
-
259
- racc_goto_table = [
260
- 44, 2, 27, 1, 34, 58, 61, 53, 22, 38,
261
- 43, nil, nil, nil, nil, nil, nil, nil, nil, 33,
262
- nil, nil, nil, 63 ]
263
-
264
- racc_goto_check = [
265
- 12, 2, 16, 1, 16, 22, 22, 12, 4, 13,
266
- 18, nil, nil, nil, nil, nil, nil, nil, nil, 2,
267
- nil, nil, nil, 12 ]
268
-
269
- racc_goto_pointer = [
270
- nil, 3, 1, nil, 5, nil, nil, nil, nil, nil,
271
- nil, nil, -30, -14, nil, nil, -15, nil, -18, nil,
272
- nil, nil, -42 ]
273
-
274
- racc_goto_default = [
275
- nil, nil, nil, 3, 4, 5, 6, 7, 8, 9,
276
- 10, 11, nil, nil, 17, 18, nil, 28, 29, 30,
277
- 45, 46, nil ]
278
-
279
- racc_reduce_table = [
280
- 0, 0, :racc_error,
281
- 1, 25, :_reduce_1,
282
- 1, 26, :_reduce_2,
283
- 0, 26, :_reduce_3,
284
- 2, 27, :_reduce_4,
285
- 1, 27, :_reduce_5,
286
- 1, 28, :_reduce_none,
287
- 1, 28, :_reduce_none,
288
- 1, 28, :_reduce_none,
289
- 1, 28, :_reduce_none,
290
- 1, 28, :_reduce_none,
291
- 1, 28, :_reduce_none,
292
- 1, 28, :_reduce_none,
293
- 3, 29, :_reduce_13,
294
- 4, 29, :_reduce_14,
295
- 5, 29, :_reduce_15,
296
- 6, 29, :_reduce_16,
297
- 4, 29, :_reduce_17,
298
- 1, 37, :_reduce_18,
299
- 2, 37, :_reduce_19,
300
- 1, 30, :_reduce_20,
301
- 1, 31, :_reduce_21,
302
- 3, 35, :_reduce_22,
303
- 4, 35, :_reduce_23,
304
- 2, 38, :_reduce_24,
305
- 3, 38, :_reduce_25,
306
- 2, 39, :_reduce_26,
307
- 3, 32, :_reduce_27,
308
- 1, 40, :_reduce_28,
309
- 0, 40, :_reduce_29,
310
- 2, 41, :_reduce_30,
311
- 1, 41, :_reduce_31,
312
- 1, 42, :_reduce_32,
313
- 2, 42, :_reduce_33,
314
- 1, 43, :_reduce_34,
315
- 2, 43, :_reduce_35,
316
- 3, 34, :_reduce_36,
317
- 1, 33, :_reduce_37,
318
- 1, 36, :_reduce_none,
319
- 1, 36, :_reduce_none,
320
- 2, 44, :_reduce_40,
321
- 3, 44, :_reduce_41,
322
- 2, 45, :_reduce_42,
323
- 3, 45, :_reduce_43,
324
- 1, 46, :_reduce_44,
325
- 2, 46, :_reduce_45 ]
326
-
327
- racc_reduce_n = 46
328
-
329
- racc_shift_n = 68
330
-
331
- racc_token_table = {
332
- false => 0,
333
- :error => 1,
334
- :T_TEXT => 2,
335
- :T_STRING_SQUOTE => 3,
336
- :T_STRING_DQUOTE => 4,
337
- :T_STRING_BODY => 5,
338
- :T_DOCTYPE_START => 6,
339
- :T_DOCTYPE_END => 7,
340
- :T_DOCTYPE_TYPE => 8,
341
- :T_DOCTYPE_NAME => 9,
342
- :T_DOCTYPE_INLINE => 10,
343
- :T_CDATA => 11,
344
- :T_COMMENT => 12,
345
- :T_ELEM_START => 13,
346
- :T_ELEM_NAME => 14,
347
- :T_ELEM_NS => 15,
348
- :T_ELEM_END => 16,
349
- :T_ATTR => 17,
350
- :T_ATTR_NS => 18,
351
- :T_XML_DECL_START => 19,
352
- :T_XML_DECL_END => 20,
353
- :T_PROC_INS_START => 21,
354
- :T_PROC_INS_NAME => 22,
355
- :T_PROC_INS_END => 23 }
356
-
357
- racc_nt_base = 24
358
-
359
- racc_use_result_var = false
360
-
361
- Racc_arg = [
362
- racc_action_table,
363
- racc_action_check,
364
- racc_action_default,
365
- racc_action_pointer,
366
- racc_goto_table,
367
- racc_goto_check,
368
- racc_goto_default,
369
- racc_goto_pointer,
370
- racc_nt_base,
371
- racc_reduce_table,
372
- racc_token_table,
373
- racc_shift_n,
374
- racc_reduce_n,
375
- racc_use_result_var ]
376
-
377
- Racc_token_to_s_table = [
378
- "$end",
379
- "error",
380
- "T_TEXT",
381
- "T_STRING_SQUOTE",
382
- "T_STRING_DQUOTE",
383
- "T_STRING_BODY",
384
- "T_DOCTYPE_START",
385
- "T_DOCTYPE_END",
386
- "T_DOCTYPE_TYPE",
387
- "T_DOCTYPE_NAME",
388
- "T_DOCTYPE_INLINE",
389
- "T_CDATA",
390
- "T_COMMENT",
391
- "T_ELEM_START",
392
- "T_ELEM_NAME",
393
- "T_ELEM_NS",
394
- "T_ELEM_END",
395
- "T_ATTR",
396
- "T_ATTR_NS",
397
- "T_XML_DECL_START",
398
- "T_XML_DECL_END",
399
- "T_PROC_INS_START",
400
- "T_PROC_INS_NAME",
401
- "T_PROC_INS_END",
402
- "$start",
403
- "document",
404
- "expressions",
405
- "expressions_",
406
- "expression",
407
- "doctype",
408
- "cdata",
409
- "comment",
410
- "element",
411
- "text",
412
- "xmldecl",
413
- "proc_ins",
414
- "string",
415
- "doctype_inline",
416
- "element_open",
417
- "element_start",
418
- "attributes",
419
- "attributes_",
420
- "attribute",
421
- "attribute_name",
422
- "string_dquote",
423
- "string_squote",
424
- "string_body" ]
425
-
426
- Racc_debug_parser = false
427
-
428
- ##### State transition tables end #####
429
-
430
- # reduce 0 omitted
431
-
432
- def _reduce_1(val, _values)
433
- on_document(val[0])
434
- end
435
-
436
- def _reduce_2(val, _values)
437
- val[0]
438
- end
439
-
440
- def _reduce_3(val, _values)
441
- []
442
- end
443
-
444
- def _reduce_4(val, _values)
445
- val[0] << val[1]
446
- end
380
+ ##
381
+ # @param [String] name
382
+ # @param [String] ns_name
383
+ # @param [String] value
384
+ # @return [Oga::XML::Attribute]
385
+ #
386
+ def on_attribute(name, ns_name = nil, value = nil)
387
+ return Attribute.new(
388
+ :namespace_name => ns_name,
389
+ :name => name,
390
+ :value => value
391
+ )
392
+ end
447
393
 
448
- def _reduce_5(val, _values)
449
- val
450
- end
394
+ ##
395
+ # @param [Array] attrs
396
+ #
397
+ def on_attributes(attrs)
398
+ return attrs
399
+ end
451
400
 
452
- # reduce 6 omitted
401
+ def _rule_0(val)
402
+ on_document(val[0])
403
+ end
453
404
 
454
- # reduce 7 omitted
405
+ def _rule_1(val)
406
+ val[0]
407
+ end
455
408
 
456
- # reduce 8 omitted
409
+ def _rule_2(val)
410
+ val[0]
411
+ end
457
412
 
458
- # reduce 9 omitted
413
+ def _rule_3(val)
414
+ val[0]
415
+ end
459
416
 
460
- # reduce 10 omitted
417
+ def _rule_4(val)
418
+ val[0]
419
+ end
461
420
 
462
- # reduce 11 omitted
421
+ def _rule_5(val)
422
+ val[0]
423
+ end
463
424
 
464
- # reduce 12 omitted
425
+ def _rule_6(val)
426
+ val[0]
427
+ end
465
428
 
466
- def _reduce_13(val, _values)
467
- on_doctype(:name => val[1])
468
-
469
- end
429
+ def _rule_7(val)
430
+ val[0]
431
+ end
470
432
 
471
- def _reduce_14(val, _values)
472
- on_doctype(:name => val[1], :type => val[2])
473
-
474
- end
433
+ def _rule_8(val)
434
+ val[0]
435
+ end
475
436
 
476
- def _reduce_15(val, _values)
477
- on_doctype(:name => val[1], :type => val[2], :public_id => val[3])
478
-
479
- end
437
+ def _rule_9(val)
438
+
439
+ name = val[1]
440
+ follow = val[2]
441
+
442
+ on_doctype(
443
+ :name => name,
444
+ :type => follow[0],
445
+ :public_id => follow[1],
446
+ :system_id => follow[2],
447
+ :inline_rules => follow[3]
448
+ )
449
+
450
+ end
480
451
 
481
- def _reduce_16(val, _values)
482
- on_doctype(
483
- :name => val[1],
484
- :type => val[2],
485
- :public_id => val[3],
486
- :system_id => val[4]
487
- )
488
-
489
- end
452
+ def _rule_10(val)
453
+ []
454
+ end
490
455
 
491
- def _reduce_17(val, _values)
492
- on_doctype(:name => val[1], :inline_rules => val[2])
493
-
494
- end
456
+ def _rule_11(val)
457
+ [val[0], *val[1]]
458
+ end
495
459
 
496
- def _reduce_18(val, _values)
497
- val[0]
498
- end
460
+ def _rule_12(val)
461
+ [nil, nil, nil, val[0]]
462
+ end
499
463
 
500
- def _reduce_19(val, _values)
501
- val[0] + val[1]
502
- end
464
+ def _rule_13(val)
465
+ val[0].inject(:+)
466
+ end
503
467
 
504
- def _reduce_20(val, _values)
505
- on_cdata(val[0])
506
- end
468
+ def _rule_14(val)
469
+ [val[0], val[1]]
470
+ end
507
471
 
508
- def _reduce_21(val, _values)
509
- on_comment(val[0])
510
- end
472
+ def _rule_15(val)
473
+ nil
474
+ end
511
475
 
512
- def _reduce_22(val, _values)
513
- on_proc_ins(val[1])
514
-
515
- end
476
+ def _rule_16(val)
477
+ on_cdata(val[0])
478
+ end
516
479
 
517
- def _reduce_23(val, _values)
518
- on_proc_ins(val[1], val[2])
519
-
520
- end
480
+ def _rule_17(val)
481
+ on_comment(val[0])
482
+ end
521
483
 
522
- def _reduce_24(val, _values)
523
- [nil, val[1]]
524
- end
484
+ def _rule_18(val)
485
+
486
+ on_proc_ins(val[1], val[2])
487
+
488
+ end
525
489
 
526
- def _reduce_25(val, _values)
527
- [val[1], val[2]]
528
- end
490
+ def _rule_19(val)
491
+ [nil, val[0]]
492
+ end
529
493
 
530
- def _reduce_26(val, _values)
531
- on_element(val[0][0], val[0][1], val[1])
532
- end
494
+ def _rule_20(val)
495
+ val
496
+ end
533
497
 
534
- def _reduce_27(val, _values)
535
- if val[0]
536
- on_element_children(val[0], val[1])
537
- end
498
+ def _rule_21(val)
499
+
500
+ on_element(val[1][0], val[1][1], val[2])
501
+
502
+ end
538
503
 
539
- after_element(val[0])
540
-
541
- end
504
+ def _rule_22(val)
505
+
506
+ if val[0]
507
+ on_element_children(val[0], val[1])
508
+ end
542
509
 
543
- def _reduce_28(val, _values)
544
- val[0]
545
- end
510
+ after_element(val[0])
511
+
512
+ end
546
513
 
547
- def _reduce_29(val, _values)
548
- []
549
- end
514
+ def _rule_23(val)
515
+ on_attributes(val[0])
516
+ end
550
517
 
551
- def _reduce_30(val, _values)
552
- val[0] << val[1]
553
- end
518
+ def _rule_24(val)
519
+ on_attribute(val[1], val[0], val[2])
520
+ end
554
521
 
555
- def _reduce_31(val, _values)
556
- val
557
- end
522
+ def _rule_25(val)
523
+ on_attribute(val[0], nil, val[1])
524
+ end
558
525
 
559
- def _reduce_32(val, _values)
560
- val[0]
561
- end
526
+ def _rule_26(val)
527
+ on_xml_decl(val[1])
528
+ end
562
529
 
563
- def _reduce_33(val, _values)
564
- val[0].value = val[1]
565
- val[0]
566
-
567
- end
530
+ def _rule_27(val)
531
+ on_text(val[0])
532
+ end
568
533
 
569
- def _reduce_34(val, _values)
570
- Attribute.new(:name => val[0])
571
- end
534
+ def _rule_28(val)
535
+ val[1]
536
+ end
572
537
 
573
- def _reduce_35(val, _values)
574
- Attribute.new(:namespace_name => val[0], :name => val[1])
575
-
576
- end
538
+ def _rule_29(val)
539
+ val[1]
540
+ end
577
541
 
578
- def _reduce_36(val, _values)
579
- on_xml_decl(val[1])
580
- end
542
+ def _rule_30(val)
543
+ val[0].inject(:+) || ''
544
+ end
581
545
 
582
- def _reduce_37(val, _values)
583
- on_text(val[0])
584
- end
546
+ def _rule_31(val)
547
+ val[0]
548
+ end
585
549
 
586
- # reduce 38 omitted
550
+ def _rule_32(val)
551
+ val[0]
552
+ end
587
553
 
588
- # reduce 39 omitted
554
+ def _rule_33(val)
555
+ val[0]
556
+ end
589
557
 
590
- def _reduce_40(val, _values)
591
- ''
592
- end
558
+ def _rule_34(val)
559
+ val[0]
560
+ end
593
561
 
594
- def _reduce_41(val, _values)
595
- val[1]
596
- end
562
+ def _rule_35(val)
563
+ val[0]
564
+ end
597
565
 
598
- def _reduce_42(val, _values)
599
- ''
600
- end
566
+ def _rule_36(val)
567
+ val[0]
568
+ end
601
569
 
602
- def _reduce_43(val, _values)
603
- val[1]
604
- end
570
+ def _rule_37(val)
571
+ val[0]
572
+ end
605
573
 
606
- def _reduce_44(val, _values)
607
- val[0]
574
+ def _rule_38(val)
575
+ val[0]
576
+ end
608
577
  end
609
-
610
- def _reduce_45(val, _values)
611
- val[0] + val[1]
612
578
  end
613
-
614
- def _reduce_none(val, _values)
615
- val[0]
616
579
  end
617
-
618
- end # class Parser
619
- end # module XML
620
- end # module Oga