oga 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,158 @@
1
+ module Oga
2
+ ##
3
+ # Thread-safe LRU cache using a Hash as the underlying storage engine.
4
+ # Whenever the size of the cache exceeds the given limit the oldest keys are
5
+ # removed (base on insert order).
6
+ #
7
+ # This class uses its own list of keys (as returned by {LRU#keys}) instead of
8
+ # relying on `Hash#keys` as the latter allocates a new Array upon every call.
9
+ #
10
+ # This class doesn't use MonitorMixin due to the extra overhead it adds
11
+ # compared to using a Mutex directly.
12
+ #
13
+ # Example usage:
14
+ #
15
+ # cache = LRU.new(3)
16
+ #
17
+ # cache[:a] = 10
18
+ # cache[:b] = 20
19
+ # cache[:c] = 30
20
+ # cache[:d] = 40
21
+ #
22
+ # cache.keys # => [:b, :c, :d]
23
+ #
24
+ class LRU
25
+ ##
26
+ # @param [Fixnum] maximum
27
+ #
28
+ def initialize(maximum = 1024)
29
+ @maximum = maximum
30
+ @cache = {}
31
+ @keys = []
32
+ @mutex = Mutex.new
33
+ @owner = Thread.current
34
+ end
35
+
36
+ ##
37
+ # @param [Fixnum] value
38
+ #
39
+ def maximum=(value)
40
+ synchronize do
41
+ @maximum = value
42
+
43
+ resize
44
+ end
45
+ end
46
+
47
+ ##
48
+ # @return [Fixnum]
49
+ #
50
+ def maximum
51
+ return synchronize { @maximum }
52
+ end
53
+
54
+ ##
55
+ # Returns the value of the key.
56
+ #
57
+ # @param [Mixed] key
58
+ # @return [Mixed]
59
+ #
60
+ def [](key)
61
+ return synchronize { @cache[key] }
62
+ end
63
+
64
+ ##
65
+ # Sets the key and its value. Old keys are discarded if the LRU size exceeds
66
+ # the limit.
67
+ #
68
+ # @param [Mixed] key
69
+ # @param [Mixed] value
70
+ #
71
+ def []=(key, value)
72
+ synchronize do
73
+ @cache[key] = value
74
+
75
+ @keys.delete(key) if @keys.include?(key)
76
+
77
+ @keys << key
78
+
79
+ resize
80
+ end
81
+ end
82
+
83
+ ##
84
+ # Returns a key if it exists, otherwise yields the supplied block and uses
85
+ # its return value as the key value.
86
+ #
87
+ # @param [Mixed] key
88
+ # @return [Mixed]
89
+ #
90
+ def get_or_set(key)
91
+ return synchronize { self[key] ||= yield }
92
+ end
93
+
94
+ ##
95
+ # @return [Array]
96
+ #
97
+ def keys
98
+ return synchronize { @keys }
99
+ end
100
+
101
+ ##
102
+ # @param [Mixed] key
103
+ # @return [TrueClass|FalseClass]
104
+ #
105
+ def key?(key)
106
+ return synchronize { @cache.key?(key) }
107
+ end
108
+
109
+ ##
110
+ # Removes all keys from the cache.
111
+ #
112
+ def clear
113
+ synchronize do
114
+ @keys.clear
115
+ @cache.clear
116
+ end
117
+ end
118
+
119
+ ##
120
+ # @return [Fixnum]
121
+ #
122
+ def size
123
+ return synchronize { @cache.size }
124
+ end
125
+
126
+ alias_method :length, :size
127
+
128
+ private
129
+
130
+ ##
131
+ # Yields the supplied block in a synchronized manner (if needed). This
132
+ # method is heavily based on `MonitorMixin#mon_enter`.
133
+ #
134
+ def synchronize
135
+ if @owner != Thread.current
136
+ @mutex.synchronize do
137
+ @owner = Thread.current
138
+
139
+ yield
140
+ end
141
+ else
142
+ yield
143
+ end
144
+ end
145
+
146
+ ##
147
+ # Removes old keys until the size of the hash no longer exceeds the maximum
148
+ # size.
149
+ #
150
+ def resize
151
+ return unless size > @maximum
152
+
153
+ to_remove = @keys.shift(size - @maximum)
154
+
155
+ to_remove.each { |key| @cache.delete(key) }
156
+ end
157
+ end # LRU
158
+ end # Oga
@@ -1,3 +1,3 @@
1
1
  module Oga
2
- VERSION = '0.2.3'
2
+ VERSION = '0.3.0'
3
3
  end # Oga
@@ -29,7 +29,7 @@ module Oga
29
29
  #
30
30
  DEFAULT_NAMESPACE = Namespace.new(
31
31
  :name => 'xml',
32
- :uri => 'http://www.w3.org/XML/1998/namespace'
32
+ :uri => XML::DEFAULT_NAMESPACE.uri
33
33
  ).freeze
34
34
 
35
35
  ##
@@ -0,0 +1,13 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # The default XML namespace.
5
+ #
6
+ # @return [Oga::XML::Namespace]
7
+ #
8
+ DEFAULT_NAMESPACE = Namespace.new(
9
+ :name => 'xmlns',
10
+ :uri => 'http://www.w3.org/XML/1998/namespace'
11
+ ).freeze
12
+ end # XML
13
+ end # Oga
@@ -23,7 +23,9 @@ module Oga
23
23
  class Element < Node
24
24
  include Querying
25
25
 
26
- attr_accessor :name, :namespace_name, :attributes, :namespaces
26
+ attr_accessor :name, :namespace_name, :attributes
27
+
28
+ attr_writer :namespaces
27
29
 
28
30
  ##
29
31
  # The attribute prefix/namespace used for registering element namespaces.
@@ -166,6 +168,26 @@ module Oga
166
168
  return @namespace
167
169
  end
168
170
 
171
+ ##
172
+ # Returns the namespaces registered on this element, or an empty Hash in
173
+ # case of an HTML element.
174
+ #
175
+ # @return [Hash]
176
+ #
177
+ def namespaces
178
+ return html? ? {} : @namespaces
179
+ end
180
+
181
+ ##
182
+ # Returns true if the current element resides in the default XML
183
+ # namespace.
184
+ #
185
+ # @return [TrueClass|FalseClass]
186
+ #
187
+ def default_namespace?
188
+ return namespace == DEFAULT_NAMESPACE || namespace.nil?
189
+ end
190
+
169
191
  ##
170
192
  # Returns the text of all child nodes joined together.
171
193
  #
@@ -284,6 +306,8 @@ module Oga
284
306
  # @return [Hash]
285
307
  #
286
308
  def available_namespaces
309
+ return {} if html? # HTML(5) completely ignores namespaces
310
+
287
311
  merged = namespaces.dup
288
312
  node = parent
289
313
 
@@ -1,5 +1,9 @@
1
1
  module Oga
2
2
  module XML
3
+ ##
4
+ # Module for encoding/decoding XML and HTML entities. The mapping of HTML
5
+ # entities can be found in {Oga::HTML::Entities::DECODE_MAPPING}.
6
+ #
3
7
  module Entities
4
8
  ##
5
9
  # Hash containing XML entities and the corresponding characters.
@@ -11,15 +15,10 @@ module Oga
11
15
  #
12
16
  DECODE_MAPPING = {
13
17
  '&lt;' => '<',
14
- '&#60;' => '<',
15
18
  '&gt;' => '>',
16
- '&#62;' => '>',
17
19
  '&apos;' => "'",
18
- '&#39;' => "'",
19
20
  '&quot;' => '"',
20
- '&#34;' => '"',
21
21
  '&amp;' => '&',
22
- '&#38;' => '&',
23
22
  }
24
23
 
25
24
  ##
@@ -35,16 +34,45 @@ module Oga
35
34
  '<' => '&lt;',
36
35
  }
37
36
 
37
+ ##
38
+ # @return [String]
39
+ #
40
+ AMPERSAND = '&'.freeze
41
+
42
+ ##
43
+ # Regexp for matching XML/HTML entities such as "&nbsp;".
44
+ #
45
+ # @return [Regexp]
46
+ #
47
+ REGULAR_ENTITY = /&[a-zA-Z]+;/
48
+
49
+ ##
50
+ # Regexp for matching XML/HTML entities such as "&#38;".
51
+ #
52
+ # @return [Regexp]
53
+ #
54
+ CODEPOINT_ENTITY = /&#(x)?([a-zA-Z0-9]+);/
55
+
56
+ ##
57
+ # @return [Regexp]
58
+ #
59
+ ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|'))
60
+
38
61
  ##
39
62
  # Decodes XML entities.
40
63
  #
41
64
  # @param [String] input
65
+ # @param [Hash] mapping
42
66
  # @return [String]
43
67
  #
44
- def self.decode(input)
45
- if input.include?('&')
46
- DECODE_MAPPING.each do |find, replace|
47
- input = input.gsub(find, replace)
68
+ def self.decode(input, mapping = DECODE_MAPPING)
69
+ return input unless input.include?(AMPERSAND)
70
+
71
+ input = input.gsub(REGULAR_ENTITY, mapping)
72
+
73
+ if input.include?(AMPERSAND)
74
+ input = input.gsub(CODEPOINT_ENTITY) do |match|
75
+ [$1 ? Integer($2, 16) : Integer($2)].pack('U')
48
76
  end
49
77
  end
50
78
 
@@ -55,14 +83,11 @@ module Oga
55
83
  # Encodes special characters as XML entities.
56
84
  #
57
85
  # @param [String] input
86
+ # @param [Hash] mapping
58
87
  # @return [String]
59
88
  #
60
- def self.encode(input)
61
- ENCODE_MAPPING.each do |from, to|
62
- input = input.gsub(from, to) if input.include?(from)
63
- end
64
-
65
- return input
89
+ def self.encode(input, mapping = ENCODE_MAPPING)
90
+ return input.gsub(ENCODE_REGEXP, mapping)
66
91
  end
67
92
  end # Entities
68
93
  end # XML
@@ -217,7 +217,7 @@ module Oga
217
217
  # @param [String] value The data between the quotes.
218
218
  #
219
219
  def on_string_body(value)
220
- add_token(:T_STRING_BODY, Entities.decode(value))
220
+ add_token(:T_STRING_BODY, value)
221
221
  end
222
222
 
223
223
  ##
@@ -373,7 +373,7 @@ module Oga
373
373
  def on_text(value)
374
374
  return if value.empty?
375
375
 
376
- add_token(:T_TEXT, Entities.decode(value))
376
+ add_token(:T_TEXT, value)
377
377
  end
378
378
 
379
379
  ##
@@ -37,6 +37,14 @@ module Oga
37
37
  def inspect
38
38
  return "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
39
39
  end
40
+
41
+ ##
42
+ # @param [Oga::XML::Namespace] other
43
+ # @return [TrueClass|FalseClass]
44
+ #
45
+ def ==(other)
46
+ return other.is_a?(self.class) && name == other.name && uri == other.uri
47
+ end
40
48
  end # Namespace
41
49
  end # XML
42
50
  end # Oga
@@ -163,6 +163,22 @@ module Oga
163
163
 
164
164
  node_set.insert(index, other)
165
165
  end
166
+
167
+ ##
168
+ # @return [TrueClass|FalseClass]
169
+ #
170
+ def html?
171
+ root = root_node
172
+
173
+ return root.is_a?(Document) && root.html?
174
+ end
175
+
176
+ ##
177
+ # @return [TrueClass|FalseClass]
178
+ #
179
+ def xml?
180
+ return !html?
181
+ end
166
182
  end # Element
167
183
  end # XML
168
184
  end # Oga
@@ -44,10 +44,10 @@ module Oga
44
44
  # @param [Oga::XML::NodeSet] owner The owner of the set.
45
45
  #
46
46
  def initialize(nodes = [], owner = nil)
47
- @nodes = nodes.uniq
47
+ @nodes = nodes
48
48
  @owner = owner
49
49
 
50
- @nodes.each { |node| take_ownership(node) }
50
+ @nodes.each { |node| take_ownership(node) } if owner
51
51
  end
52
52
 
53
53
  ##
@@ -1,13 +1,170 @@
1
- #
2
- # DO NOT MODIFY!!!!
3
- # This file is automatically generated by Racc 1.4.12
4
- # from Racc grammer file "".
5
- #
1
+ # This file is automatically generated by ruby-ll. Manually changing this file
2
+ # is not recommended as any changes will be lost the next time this parser is
3
+ # re-generated.
4
+ require 'll/setup'
6
5
 
7
- require 'racc/parser.rb'
8
6
  module Oga
9
- module XML
10
- class Parser < Racc::Parser
7
+ module XML
8
+ ##
9
+ # DOM parser for both XML and HTML.
10
+ #
11
+ # This parser does not produce a dedicated AST, instead it emits XML nodes
12
+ # directly. Basic usage of this parser is as following:
13
+ #
14
+ # parser = Oga::XML::Parser.new('<foo></foo>')
15
+ # document = parser.parse
16
+ #
17
+ # To enable HTML parsing you'd use the following instead:
18
+ #
19
+ # parser = Oga::XML::Parser.new('<foo></foo>', :html => true)
20
+ # document = parser.parse
21
+ #
22
+ # In both cases you can use either a String or an IO as the parser input. IO
23
+ # instances will result in lower memory overhead, especially when parsing large
24
+ # files.
25
+ #
26
+ class Parser < LL::Driver
27
+ CONFIG = LL::DriverConfig.new
28
+
29
+ CONFIG.terminals = [
30
+ :$EOF, # 0
31
+ :T_TEXT, # 1
32
+ :T_STRING_SQUOTE, # 2
33
+ :T_STRING_DQUOTE, # 3
34
+ :T_STRING_BODY, # 4
35
+ :T_DOCTYPE_START, # 5
36
+ :T_DOCTYPE_END, # 6
37
+ :T_DOCTYPE_TYPE, # 7
38
+ :T_DOCTYPE_NAME, # 8
39
+ :T_DOCTYPE_INLINE, # 9
40
+ :T_CDATA, # 10
41
+ :T_COMMENT, # 11
42
+ :T_ELEM_START, # 12
43
+ :T_ELEM_NAME, # 13
44
+ :T_ELEM_NS, # 14
45
+ :T_ELEM_END, # 15
46
+ :T_ATTR, # 16
47
+ :T_ATTR_NS, # 17
48
+ :T_XML_DECL_START, # 18
49
+ :T_XML_DECL_END, # 19
50
+ :T_PROC_INS_START, # 20
51
+ :T_PROC_INS_NAME, # 21
52
+ :T_PROC_INS_END, # 22
53
+ ].freeze
54
+
55
+ CONFIG.rules = [
56
+ [3, 0, 0, 1], # 0
57
+ [3, 1, 4, 19, 6, 0], # 1
58
+ [3, 2, 0, 3], # 2
59
+ [3, 3, 0, 7], # 3
60
+ [3, 4, 0, 8], # 4
61
+ [3, 5, 0, 9], # 5
62
+ [3, 6, 0, 16], # 6
63
+ [3, 7, 0, 12], # 7
64
+ [3, 8, 0, 15], # 8
65
+ [3, 9, 0, 4, 1, 8, 1, 5], # 9
66
+ [3, 10, 1, 6], # 10
67
+ [3, 11, 0, 6, 1, 7], # 11
68
+ [3, 12, 1, 6, 0, 5], # 12
69
+ [3, 13, 5, 20, 6, 0], # 13
70
+ [3, 14, 1, 6, 8, 21, 0, 17], # 14
71
+ [3, 15, 1, 6], # 15
72
+ [3, 16, 1, 10], # 16
73
+ [3, 17, 1, 11], # 17
74
+ [3, 18, 1, 22, 8, 22, 1, 21, 1, 20], # 18
75
+ [3, 19, 1, 13], # 19
76
+ [3, 20, 1, 13, 1, 14], # 20
77
+ [3, 21, 0, 13, 0, 10, 1, 12], # 21
78
+ [3, 22, 1, 15, 0, 1, 0, 11], # 22
79
+ [3, 23, 4, 23, 6, 0], # 23
80
+ [3, 24, 8, 24, 1, 16, 1, 17], # 24
81
+ [3, 25, 8, 25, 1, 16], # 25
82
+ [3, 26, 1, 19, 0, 13, 1, 18], # 26
83
+ [3, 27, 1, 1], # 27
84
+ [3, 28, 1, 3, 0, 18, 1, 3], # 28
85
+ [3, 29, 1, 2, 0, 18, 1, 2], # 29
86
+ [3, 30, 4, 26, 6, 0], # 30
87
+ [3, 31, 0, 2], # 31
88
+ [3, 32, 1, 9], # 32
89
+ [3, 33, 0, 17], # 33
90
+ [3, 34, 1, 1], # 34
91
+ [3, 35, 0, 14], # 35
92
+ [3, 36, 0, 17], # 36
93
+ [3, 37, 0, 17], # 37
94
+ [3, 38, 1, 4], # 38
95
+ ].freeze
96
+
97
+ CONFIG.table = [
98
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # 0
99
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], # 1
100
+ [-1, 6, -1, -1, -1, 2, -1, -1, -1, -1, 3, 4, 7, -1, -1, -1, -1, -1, 8, -1, 5, -1, -1], # 2
101
+ [-1, -1, -1, -1, -1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 3
102
+ [12, 12, 12, 12, 12, 12, 10, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12], # 4
103
+ [13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13], # 5
104
+ [-1, -1, 14, 14, -1, -1, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 6
105
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 7
106
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 8
107
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 18, -1, -1], # 9
108
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 20, -1, -1, -1, -1, -1, -1, -1, -1], # 10
109
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 21, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 11
110
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 12
111
+ [23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23], # 13
112
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 25, 24, -1, -1, -1, -1, -1], # 14
113
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, -1, -1, -1], # 15
114
+ [-1, 27, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 16
115
+ [-1, -1, 29, 28, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 17
116
+ [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30], # 18
117
+ [-1, 31, -1, -1, -1, 31, -1, -1, -1, -1, 31, 31, 31, -1, -1, -1, -1, -1, 31, -1, 31, -1, -1], # 19
118
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, 32, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 20
119
+ [-1, -1, 33, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 21
120
+ [-1, 34, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 22
121
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 35, 35, -1, -1, -1, -1, -1], # 23
122
+ [-1, -1, 36, 36, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 24
123
+ [-1, -1, 37, 37, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 25
124
+ [-1, -1, -1, -1, 38, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], # 26
125
+ ].freeze
126
+
127
+ CONFIG.actions = [
128
+ [:_rule_0, 1], # 0
129
+ [:_rule_1, 1], # 1
130
+ [:_rule_2, 1], # 2
131
+ [:_rule_3, 1], # 3
132
+ [:_rule_4, 1], # 4
133
+ [:_rule_5, 1], # 5
134
+ [:_rule_6, 1], # 6
135
+ [:_rule_7, 1], # 7
136
+ [:_rule_8, 1], # 8
137
+ [:_rule_9, 3], # 9
138
+ [:_rule_10, 1], # 10
139
+ [:_rule_11, 2], # 11
140
+ [:_rule_12, 2], # 12
141
+ [:_rule_13, 1], # 13
142
+ [:_rule_14, 3], # 14
143
+ [:_rule_15, 1], # 15
144
+ [:_rule_16, 1], # 16
145
+ [:_rule_17, 1], # 17
146
+ [:_rule_18, 4], # 18
147
+ [:_rule_19, 1], # 19
148
+ [:_rule_20, 2], # 20
149
+ [:_rule_21, 3], # 21
150
+ [:_rule_22, 3], # 22
151
+ [:_rule_23, 1], # 23
152
+ [:_rule_24, 3], # 24
153
+ [:_rule_25, 2], # 25
154
+ [:_rule_26, 3], # 26
155
+ [:_rule_27, 1], # 27
156
+ [:_rule_28, 3], # 28
157
+ [:_rule_29, 3], # 29
158
+ [:_rule_30, 1], # 30
159
+ [:_rule_31, 1], # 31
160
+ [:_rule_32, 1], # 32
161
+ [:_rule_33, 1], # 33
162
+ [:_rule_34, 1], # 34
163
+ [:_rule_35, 1], # 35
164
+ [:_rule_36, 1], # 36
165
+ [:_rule_37, 1], # 37
166
+ [:_rule_38, 1], # 38
167
+ ].freeze
11
168
 
12
169
  ##
13
170
  # Hash mapping token types and dedicated error labels.
@@ -15,27 +172,27 @@ module Oga
15
172
  # @return [Hash]
16
173
  #
17
174
  TOKEN_ERROR_MAPPING = {
18
- 'T_STRING' => 'string',
19
- 'T_TEXT' => 'text',
20
- 'T_DOCTYPE_START' => 'doctype start',
21
- 'T_DOCTYPE_END' => 'doctype closing tag',
22
- 'T_DOCTYPE_TYPE' => 'doctype type',
23
- 'T_DOCTYPE_NAME' => 'doctype name',
24
- 'T_DOCTYPE_INLINE' => 'inline doctype rules',
25
- 'T_CDATA' => 'CDATA',
26
- 'T_COMMENT' => 'comment',
27
- 'T_ELEM_START' => 'element start',
28
- 'T_ELEM_NAME' => 'element name',
29
- 'T_ELEM_NS' => 'element namespace',
30
- 'T_ELEM_END' => 'element closing tag',
31
- 'T_ATTR' => 'attribute',
32
- 'T_ATTR_NS' => 'attribute namespace',
33
- 'T_XML_DECL_START' => 'XML declaration start',
34
- 'T_XML_DECL_END' => 'XML declaration end',
35
- 'T_PROC_INS_START' => 'processing-instruction start',
36
- 'T_PROC_INS_NAME' => 'processing-instruction name',
37
- 'T_PROC_INS_END' => 'processing-instruction closing tag',
38
- '$end' => 'end of input'
175
+ :T_STRING => 'string',
176
+ :T_TEXT => 'text',
177
+ :T_DOCTYPE_START => 'doctype start',
178
+ :T_DOCTYPE_END => 'doctype closing tag',
179
+ :T_DOCTYPE_TYPE => 'doctype type',
180
+ :T_DOCTYPE_NAME => 'doctype name',
181
+ :T_DOCTYPE_INLINE => 'inline doctype rules',
182
+ :T_CDATA => 'CDATA',
183
+ :T_COMMENT => 'comment',
184
+ :T_ELEM_START => 'element start',
185
+ :T_ELEM_NAME => 'element name',
186
+ :T_ELEM_NS => 'element namespace',
187
+ :T_ELEM_END => 'element closing tag',
188
+ :T_ATTR => 'attribute',
189
+ :T_ATTR_NS => 'attribute namespace',
190
+ :T_XML_DECL_START => 'XML declaration start',
191
+ :T_XML_DECL_END => 'XML declaration end',
192
+ :T_PROC_INS_START => 'processing-instruction start',
193
+ :T_PROC_INS_NAME => 'processing-instruction name',
194
+ :T_PROC_INS_END => 'processing-instruction closing tag',
195
+ -1 => 'end of input'
39
196
  }
40
197
 
41
198
  ##
@@ -64,44 +221,49 @@ module Oga
64
221
  #
65
222
  # @yieldparam [Array]
66
223
  #
67
- def yield_next_token
224
+ def each_token
68
225
  @lexer.advance do |type, value, line|
69
226
  @line = line if line
70
227
 
71
228
  yield [type, value]
72
229
  end
73
230
 
74
- yield [false, false]
231
+ yield [-1, -1]
75
232
  end
76
233
 
77
234
  ##
78
- # @param [Fixnum] type The type of token the error occured on.
79
- # @param [String] value The value of the token.
80
- # @param [Array] stack The current stack of parsed nodes.
81
- # @raise [Racc::ParseError]
235
+ # @param [Fixnum] stack_type
236
+ # @param [Fixnum] stack_value
237
+ # @param [Symbol] token_type
238
+ # @param [String] token_value
82
239
  #
83
- def on_error(type, value, stack)
84
- name = token_to_str(type)
85
- name = TOKEN_ERROR_MAPPING[name] || name
240
+ def parser_error(stack_type, stack_value, token_type, token_value)
241
+ case id_to_type(stack_type)
242
+ when :rule
243
+ message = "Unexpected #{token_type} for rule #{stack_value}"
244
+ when :terminal
245
+ expected = id_to_terminal(stack_value)
246
+ expected = TOKEN_ERROR_MAPPING[expected] || expected
247
+ got = TOKEN_ERROR_MAPPING[token_type] || token_type
248
+ message = "Unexpected #{got}, expected #{expected} instead"
249
+ when :eof
250
+ message = 'Unexpected end of input'
251
+ end
252
+
253
+ message += " on line #{@line}"
86
254
 
87
- raise Racc::ParseError, "Unexpected #{name} on line #{@line}"
255
+ raise LL::ParserError, message
88
256
  end
89
257
 
90
258
  ##
91
- # Parses the input and returns the corresponding AST.
92
- #
93
- # @example
94
- # parser = Oga::Parser.new('<foo>bar</foo>')
95
- # ast = parser.parse
96
- #
97
- # @return [Oga::AST::Node]
259
+ # @see [LL::Driver#parse]
98
260
  #
99
261
  def parse
100
- ast = yyparse(self, :yield_next_token)
262
+ retval = super
101
263
 
102
264
  reset
103
265
 
104
- return ast
266
+ return retval
105
267
  end
106
268
 
107
269
  ##
@@ -109,9 +271,7 @@ module Oga
109
271
  # @return [Oga::XML::Document]
110
272
  #
111
273
  def on_document(children = [])
112
- document = Document.new(
113
- :type => @lexer.html ? :html : :xml
114
- )
274
+ document = Document.new(:type => @lexer.html ? :html : :xml)
115
275
 
116
276
  children.each do |child|
117
277
  if child.is_a?(Doctype)
@@ -217,404 +377,203 @@ module Oga
217
377
  return element
218
378
  end
219
379
 
220
- # vim: set ft=racc:
221
- ##### State transition tables begin ###
222
-
223
- racc_action_table = [
224
- 41, 20, 48, 47, 54, 12, 52, 55, 25, 26,
225
- 13, 14, 16, 20, 21, 48, 47, 12, 19, 62,
226
- 15, 40, 13, 14, 16, 20, 60, 23, 59, 12,
227
- 19, 66, 15, 65, 13, 14, 16, 36, 37, 24,
228
- 39, 35, 19, 42, 15, 31, 32, 31, 32, 31,
229
- 32, 48, 47, 57, 59, 64, 65, 49, 50, 51,
230
- 56, 67 ]
231
-
232
- racc_action_check = [
233
- 24, 0, 37, 37, 38, 0, 37, 38, 16, 16,
234
- 0, 0, 0, 3, 1, 53, 53, 3, 0, 53,
235
- 0, 24, 3, 3, 3, 18, 48, 12, 48, 18,
236
- 3, 61, 3, 61, 18, 18, 18, 23, 23, 15,
237
- 23, 21, 18, 26, 18, 17, 17, 19, 19, 28,
238
- 28, 30, 30, 47, 47, 58, 58, 32, 33, 34,
239
- 41, 63 ]
240
-
241
- racc_action_pointer = [
242
- -1, 14, nil, 11, nil, nil, nil, nil, nil, nil,
243
- nil, nil, 18, nil, nil, 17, -6, 28, 23, 30,
244
- nil, 41, nil, 30, -2, nil, 29, nil, 32, nil,
245
- 48, nil, 40, 42, 39, nil, nil, -1, -3, nil,
246
- nil, 37, nil, nil, nil, nil, nil, 49, 23, nil,
247
- nil, nil, nil, 12, nil, nil, nil, nil, 51, nil,
248
- nil, 28, nil, 54, nil, nil, nil, nil ]
249
-
250
- racc_action_default = [
251
- -3, -46, -1, -2, -5, -6, -7, -8, -9, -10,
252
- -11, -12, -46, -20, -21, -46, -46, -29, -3, -29,
253
- -37, -46, -4, -46, -46, -24, -46, -26, -28, -31,
254
- -32, -34, -46, -46, -46, 68, -13, -46, -46, -18,
255
- -22, -46, -25, -30, -33, -38, -39, -46, -46, -35,
256
- -27, -36, -14, -46, -17, -19, -23, -40, -46, -44,
257
- -42, -46, -15, -46, -41, -45, -43, -16 ]
258
-
259
- racc_goto_table = [
260
- 44, 2, 27, 1, 34, 58, 61, 53, 22, 38,
261
- 43, nil, nil, nil, nil, nil, nil, nil, nil, 33,
262
- nil, nil, nil, 63 ]
263
-
264
- racc_goto_check = [
265
- 12, 2, 16, 1, 16, 22, 22, 12, 4, 13,
266
- 18, nil, nil, nil, nil, nil, nil, nil, nil, 2,
267
- nil, nil, nil, 12 ]
268
-
269
- racc_goto_pointer = [
270
- nil, 3, 1, nil, 5, nil, nil, nil, nil, nil,
271
- nil, nil, -30, -14, nil, nil, -15, nil, -18, nil,
272
- nil, nil, -42 ]
273
-
274
- racc_goto_default = [
275
- nil, nil, nil, 3, 4, 5, 6, 7, 8, 9,
276
- 10, 11, nil, nil, 17, 18, nil, 28, 29, 30,
277
- 45, 46, nil ]
278
-
279
- racc_reduce_table = [
280
- 0, 0, :racc_error,
281
- 1, 25, :_reduce_1,
282
- 1, 26, :_reduce_2,
283
- 0, 26, :_reduce_3,
284
- 2, 27, :_reduce_4,
285
- 1, 27, :_reduce_5,
286
- 1, 28, :_reduce_none,
287
- 1, 28, :_reduce_none,
288
- 1, 28, :_reduce_none,
289
- 1, 28, :_reduce_none,
290
- 1, 28, :_reduce_none,
291
- 1, 28, :_reduce_none,
292
- 1, 28, :_reduce_none,
293
- 3, 29, :_reduce_13,
294
- 4, 29, :_reduce_14,
295
- 5, 29, :_reduce_15,
296
- 6, 29, :_reduce_16,
297
- 4, 29, :_reduce_17,
298
- 1, 37, :_reduce_18,
299
- 2, 37, :_reduce_19,
300
- 1, 30, :_reduce_20,
301
- 1, 31, :_reduce_21,
302
- 3, 35, :_reduce_22,
303
- 4, 35, :_reduce_23,
304
- 2, 38, :_reduce_24,
305
- 3, 38, :_reduce_25,
306
- 2, 39, :_reduce_26,
307
- 3, 32, :_reduce_27,
308
- 1, 40, :_reduce_28,
309
- 0, 40, :_reduce_29,
310
- 2, 41, :_reduce_30,
311
- 1, 41, :_reduce_31,
312
- 1, 42, :_reduce_32,
313
- 2, 42, :_reduce_33,
314
- 1, 43, :_reduce_34,
315
- 2, 43, :_reduce_35,
316
- 3, 34, :_reduce_36,
317
- 1, 33, :_reduce_37,
318
- 1, 36, :_reduce_none,
319
- 1, 36, :_reduce_none,
320
- 2, 44, :_reduce_40,
321
- 3, 44, :_reduce_41,
322
- 2, 45, :_reduce_42,
323
- 3, 45, :_reduce_43,
324
- 1, 46, :_reduce_44,
325
- 2, 46, :_reduce_45 ]
326
-
327
- racc_reduce_n = 46
328
-
329
- racc_shift_n = 68
330
-
331
- racc_token_table = {
332
- false => 0,
333
- :error => 1,
334
- :T_TEXT => 2,
335
- :T_STRING_SQUOTE => 3,
336
- :T_STRING_DQUOTE => 4,
337
- :T_STRING_BODY => 5,
338
- :T_DOCTYPE_START => 6,
339
- :T_DOCTYPE_END => 7,
340
- :T_DOCTYPE_TYPE => 8,
341
- :T_DOCTYPE_NAME => 9,
342
- :T_DOCTYPE_INLINE => 10,
343
- :T_CDATA => 11,
344
- :T_COMMENT => 12,
345
- :T_ELEM_START => 13,
346
- :T_ELEM_NAME => 14,
347
- :T_ELEM_NS => 15,
348
- :T_ELEM_END => 16,
349
- :T_ATTR => 17,
350
- :T_ATTR_NS => 18,
351
- :T_XML_DECL_START => 19,
352
- :T_XML_DECL_END => 20,
353
- :T_PROC_INS_START => 21,
354
- :T_PROC_INS_NAME => 22,
355
- :T_PROC_INS_END => 23 }
356
-
357
- racc_nt_base = 24
358
-
359
- racc_use_result_var = false
360
-
361
- Racc_arg = [
362
- racc_action_table,
363
- racc_action_check,
364
- racc_action_default,
365
- racc_action_pointer,
366
- racc_goto_table,
367
- racc_goto_check,
368
- racc_goto_default,
369
- racc_goto_pointer,
370
- racc_nt_base,
371
- racc_reduce_table,
372
- racc_token_table,
373
- racc_shift_n,
374
- racc_reduce_n,
375
- racc_use_result_var ]
376
-
377
- Racc_token_to_s_table = [
378
- "$end",
379
- "error",
380
- "T_TEXT",
381
- "T_STRING_SQUOTE",
382
- "T_STRING_DQUOTE",
383
- "T_STRING_BODY",
384
- "T_DOCTYPE_START",
385
- "T_DOCTYPE_END",
386
- "T_DOCTYPE_TYPE",
387
- "T_DOCTYPE_NAME",
388
- "T_DOCTYPE_INLINE",
389
- "T_CDATA",
390
- "T_COMMENT",
391
- "T_ELEM_START",
392
- "T_ELEM_NAME",
393
- "T_ELEM_NS",
394
- "T_ELEM_END",
395
- "T_ATTR",
396
- "T_ATTR_NS",
397
- "T_XML_DECL_START",
398
- "T_XML_DECL_END",
399
- "T_PROC_INS_START",
400
- "T_PROC_INS_NAME",
401
- "T_PROC_INS_END",
402
- "$start",
403
- "document",
404
- "expressions",
405
- "expressions_",
406
- "expression",
407
- "doctype",
408
- "cdata",
409
- "comment",
410
- "element",
411
- "text",
412
- "xmldecl",
413
- "proc_ins",
414
- "string",
415
- "doctype_inline",
416
- "element_open",
417
- "element_start",
418
- "attributes",
419
- "attributes_",
420
- "attribute",
421
- "attribute_name",
422
- "string_dquote",
423
- "string_squote",
424
- "string_body" ]
425
-
426
- Racc_debug_parser = false
427
-
428
- ##### State transition tables end #####
429
-
430
- # reduce 0 omitted
431
-
432
- def _reduce_1(val, _values)
433
- on_document(val[0])
434
- end
435
-
436
- def _reduce_2(val, _values)
437
- val[0]
438
- end
439
-
440
- def _reduce_3(val, _values)
441
- []
442
- end
443
-
444
- def _reduce_4(val, _values)
445
- val[0] << val[1]
446
- end
380
+ ##
381
+ # @param [String] name
382
+ # @param [String] ns_name
383
+ # @param [String] value
384
+ # @return [Oga::XML::Attribute]
385
+ #
386
+ def on_attribute(name, ns_name = nil, value = nil)
387
+ return Attribute.new(
388
+ :namespace_name => ns_name,
389
+ :name => name,
390
+ :value => value
391
+ )
392
+ end
447
393
 
448
- def _reduce_5(val, _values)
449
- val
450
- end
394
+ ##
395
+ # @param [Array] attrs
396
+ #
397
+ def on_attributes(attrs)
398
+ return attrs
399
+ end
451
400
 
452
- # reduce 6 omitted
401
+ def _rule_0(val)
402
+ on_document(val[0])
403
+ end
453
404
 
454
- # reduce 7 omitted
405
+ def _rule_1(val)
406
+ val[0]
407
+ end
455
408
 
456
- # reduce 8 omitted
409
+ def _rule_2(val)
410
+ val[0]
411
+ end
457
412
 
458
- # reduce 9 omitted
413
+ def _rule_3(val)
414
+ val[0]
415
+ end
459
416
 
460
- # reduce 10 omitted
417
+ def _rule_4(val)
418
+ val[0]
419
+ end
461
420
 
462
- # reduce 11 omitted
421
+ def _rule_5(val)
422
+ val[0]
423
+ end
463
424
 
464
- # reduce 12 omitted
425
+ def _rule_6(val)
426
+ val[0]
427
+ end
465
428
 
466
- def _reduce_13(val, _values)
467
- on_doctype(:name => val[1])
468
-
469
- end
429
+ def _rule_7(val)
430
+ val[0]
431
+ end
470
432
 
471
- def _reduce_14(val, _values)
472
- on_doctype(:name => val[1], :type => val[2])
473
-
474
- end
433
+ def _rule_8(val)
434
+ val[0]
435
+ end
475
436
 
476
- def _reduce_15(val, _values)
477
- on_doctype(:name => val[1], :type => val[2], :public_id => val[3])
478
-
479
- end
437
+ def _rule_9(val)
438
+
439
+ name = val[1]
440
+ follow = val[2]
441
+
442
+ on_doctype(
443
+ :name => name,
444
+ :type => follow[0],
445
+ :public_id => follow[1],
446
+ :system_id => follow[2],
447
+ :inline_rules => follow[3]
448
+ )
449
+
450
+ end
480
451
 
481
- def _reduce_16(val, _values)
482
- on_doctype(
483
- :name => val[1],
484
- :type => val[2],
485
- :public_id => val[3],
486
- :system_id => val[4]
487
- )
488
-
489
- end
452
+ def _rule_10(val)
453
+ []
454
+ end
490
455
 
491
- def _reduce_17(val, _values)
492
- on_doctype(:name => val[1], :inline_rules => val[2])
493
-
494
- end
456
+ def _rule_11(val)
457
+ [val[0], *val[1]]
458
+ end
495
459
 
496
- def _reduce_18(val, _values)
497
- val[0]
498
- end
460
+ def _rule_12(val)
461
+ [nil, nil, nil, val[0]]
462
+ end
499
463
 
500
- def _reduce_19(val, _values)
501
- val[0] + val[1]
502
- end
464
+ def _rule_13(val)
465
+ val[0].inject(:+)
466
+ end
503
467
 
504
- def _reduce_20(val, _values)
505
- on_cdata(val[0])
506
- end
468
+ def _rule_14(val)
469
+ [val[0], val[1]]
470
+ end
507
471
 
508
- def _reduce_21(val, _values)
509
- on_comment(val[0])
510
- end
472
+ def _rule_15(val)
473
+ nil
474
+ end
511
475
 
512
- def _reduce_22(val, _values)
513
- on_proc_ins(val[1])
514
-
515
- end
476
+ def _rule_16(val)
477
+ on_cdata(val[0])
478
+ end
516
479
 
517
- def _reduce_23(val, _values)
518
- on_proc_ins(val[1], val[2])
519
-
520
- end
480
+ def _rule_17(val)
481
+ on_comment(val[0])
482
+ end
521
483
 
522
- def _reduce_24(val, _values)
523
- [nil, val[1]]
524
- end
484
+ def _rule_18(val)
485
+
486
+ on_proc_ins(val[1], val[2])
487
+
488
+ end
525
489
 
526
- def _reduce_25(val, _values)
527
- [val[1], val[2]]
528
- end
490
+ def _rule_19(val)
491
+ [nil, val[0]]
492
+ end
529
493
 
530
- def _reduce_26(val, _values)
531
- on_element(val[0][0], val[0][1], val[1])
532
- end
494
+ def _rule_20(val)
495
+ val
496
+ end
533
497
 
534
- def _reduce_27(val, _values)
535
- if val[0]
536
- on_element_children(val[0], val[1])
537
- end
498
+ def _rule_21(val)
499
+
500
+ on_element(val[1][0], val[1][1], val[2])
501
+
502
+ end
538
503
 
539
- after_element(val[0])
540
-
541
- end
504
+ def _rule_22(val)
505
+
506
+ if val[0]
507
+ on_element_children(val[0], val[1])
508
+ end
542
509
 
543
- def _reduce_28(val, _values)
544
- val[0]
545
- end
510
+ after_element(val[0])
511
+
512
+ end
546
513
 
547
- def _reduce_29(val, _values)
548
- []
549
- end
514
+ def _rule_23(val)
515
+ on_attributes(val[0])
516
+ end
550
517
 
551
- def _reduce_30(val, _values)
552
- val[0] << val[1]
553
- end
518
+ def _rule_24(val)
519
+ on_attribute(val[1], val[0], val[2])
520
+ end
554
521
 
555
- def _reduce_31(val, _values)
556
- val
557
- end
522
+ def _rule_25(val)
523
+ on_attribute(val[0], nil, val[1])
524
+ end
558
525
 
559
- def _reduce_32(val, _values)
560
- val[0]
561
- end
526
+ def _rule_26(val)
527
+ on_xml_decl(val[1])
528
+ end
562
529
 
563
- def _reduce_33(val, _values)
564
- val[0].value = val[1]
565
- val[0]
566
-
567
- end
530
+ def _rule_27(val)
531
+ on_text(val[0])
532
+ end
568
533
 
569
- def _reduce_34(val, _values)
570
- Attribute.new(:name => val[0])
571
- end
534
+ def _rule_28(val)
535
+ val[1]
536
+ end
572
537
 
573
- def _reduce_35(val, _values)
574
- Attribute.new(:namespace_name => val[0], :name => val[1])
575
-
576
- end
538
+ def _rule_29(val)
539
+ val[1]
540
+ end
577
541
 
578
- def _reduce_36(val, _values)
579
- on_xml_decl(val[1])
580
- end
542
+ def _rule_30(val)
543
+ val[0].inject(:+) || ''
544
+ end
581
545
 
582
- def _reduce_37(val, _values)
583
- on_text(val[0])
584
- end
546
+ def _rule_31(val)
547
+ val[0]
548
+ end
585
549
 
586
- # reduce 38 omitted
550
+ def _rule_32(val)
551
+ val[0]
552
+ end
587
553
 
588
- # reduce 39 omitted
554
+ def _rule_33(val)
555
+ val[0]
556
+ end
589
557
 
590
- def _reduce_40(val, _values)
591
- ''
592
- end
558
+ def _rule_34(val)
559
+ val[0]
560
+ end
593
561
 
594
- def _reduce_41(val, _values)
595
- val[1]
596
- end
562
+ def _rule_35(val)
563
+ val[0]
564
+ end
597
565
 
598
- def _reduce_42(val, _values)
599
- ''
600
- end
566
+ def _rule_36(val)
567
+ val[0]
568
+ end
601
569
 
602
- def _reduce_43(val, _values)
603
- val[1]
604
- end
570
+ def _rule_37(val)
571
+ val[0]
572
+ end
605
573
 
606
- def _reduce_44(val, _values)
607
- val[0]
574
+ def _rule_38(val)
575
+ val[0]
576
+ end
608
577
  end
609
-
610
- def _reduce_45(val, _values)
611
- val[0] + val[1]
612
578
  end
613
-
614
- def _reduce_none(val, _values)
615
- val[0]
616
579
  end
617
-
618
- end # class Parser
619
- end # module XML
620
- end # module Oga