gammo 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -23,3 +23,9 @@ task :generate do
23
23
  table = eval(Erubi::Engine.new(File.read('misc/table.erubi')).src, binding)
24
24
  File.write('lib/gammo/tags/table.rb', table)
25
25
  end
26
+
27
+ namespace :racc do
28
+ task :parser do
29
+ `bundle exec racc lib/gammo/xpath/parser.y -o lib/gammo/xpath/parser.rb`
30
+ end
31
+ end
@@ -3,15 +3,24 @@ module Gammo
3
3
  class Attribute
4
4
  attr_accessor :key, :value, :namespace
5
5
 
6
+ # @!visibility private
7
+ attr_accessor :owner_element
8
+
6
9
  # Constructs an attribute with the key-value pair.
7
10
  # @param [String] key
8
11
  # @param [String] value
9
12
  # @param [String] namespace
13
+ # @param [Gammo::Element] owner_element
10
14
  # @return [Attribute]
11
- def initialize(key:, value:, namespace: nil)
12
- @key = key
13
- @value = value
14
- @namespace = namespace
15
+ def initialize(key:, value:, namespace: nil, owner_element: nil)
16
+ @key = key
17
+ @value = value
18
+ @namespace = namespace
19
+ @owner_element = owner_element
20
+ end
21
+
22
+ def to_s
23
+ "<Gammo::Attribute #{key}='#{value}'>"
15
24
  end
16
25
  end
17
26
  end
@@ -0,0 +1,95 @@
1
+ require 'delegate'
2
+
3
+ module Gammo
4
+ # Class for representing attributes.
5
+ class Attributes < DelegateClass(Array)
6
+ attr_accessor :owner_element
7
+
8
+ def initialize(array, owner_element: nil)
9
+ super(array)
10
+ array.each { |attr| attr.owner_element = owner_element }
11
+ @owner_element = owner_element
12
+ @attributes_hash = attributes_to_hash(array)
13
+ end
14
+
15
+ def <<(attr)
16
+ super
17
+ @attributes_hash[attr.key] = attr.value
18
+ end
19
+
20
+ def [](key)
21
+ @attributes_hash[key.to_s]
22
+ end
23
+
24
+ def []=(key, value)
25
+ self << Attribute.new(key: key.to_s, value: value, owner_element: owner_element)
26
+ end
27
+
28
+ def prepend(*attrs)
29
+ prepended = super
30
+ attrs.each { |attr| @attributes_hash[attr.key.to_s] = attr.value }
31
+ prepended
32
+ end
33
+ alias_method :unshift, :prepend
34
+
35
+ def shift(n = nil)
36
+ original = self.dup
37
+ ret = n ? super : super()
38
+ (original - self).each { |attr| @attributes_hash.delete(attr.key.to_s) }
39
+ ret
40
+ end
41
+
42
+ def pop(n = nil)
43
+ original = self.dup
44
+ ret = n ? super : super()
45
+ (original - self).each { |attr| @attributes_hash.delete(attr.key.to_s) }
46
+ ret
47
+ end
48
+
49
+ def append(*attrs)
50
+ super
51
+ attrs.each { |attr| @attributes_hash[attr.key.to_s] = attr.value }
52
+ end
53
+ alias_method :push, :append
54
+
55
+ def delete(attr)
56
+ deleted = super
57
+ @attributes_hash.delete(deleted.key) if deleted
58
+ deleted
59
+ end
60
+
61
+ def reject!
62
+ original = self.dup
63
+ rejected = super
64
+ (original - self).each { |attr| @attributes_hash.delete(attr.key.to_s) }
65
+ rejected
66
+ end
67
+
68
+ def delete_if
69
+ original = self.dup
70
+ super
71
+ (original - self).each { |attr| @attributes_hash.delete(attr.key.to_s) }
72
+ self
73
+ end
74
+
75
+ def delete_at(pos)
76
+ deleted = super
77
+ deleted.each { |attr| @attributes_hash.delete(attr.key.to_s) }
78
+ deleted
79
+ end
80
+
81
+ def to_h
82
+ @attributes_hash.dup
83
+ end
84
+
85
+ def to_s
86
+ @attributes_hash.to_s
87
+ end
88
+
89
+ private
90
+
91
+ def attributes_to_hash(attrs)
92
+ attrs.each_with_object({}) { |attr, h| h[attr.key.to_s] = attr.value }
93
+ end
94
+ end
95
+ end
@@ -1,18 +1,91 @@
1
+ require 'gammo/xpath'
2
+ require 'gammo/attributes'
3
+
1
4
  module Gammo
2
5
  # Class for representing Node.
3
6
  # https://html.spec.whatwg.org/multipage/parsing.html#tokenization
4
7
  class Node
8
+ # Raised if uncaught node is given for particular operations.
9
+ # @!visibility private
10
+ UncaughtTypeError = Class.new(ArgumentError)
11
+
12
+ # Raised if anything goes wrong on hierarchy while node operations.
13
+ # @!visibility private
14
+ HierarchyRequestError = Class.new(ArgumentError)
15
+
16
+ # `parent` is the pointer for the parent node.
17
+ attr_accessor :parent
18
+
19
+ # `first_child` and `last_child` are pointers for the first and the last nodes.
20
+ attr_accessor :first_child, :last_child
21
+
22
+ # `previous_sibling` and `next_sibling` are pointers for the previous and next sibling nodes.
23
+ attr_accessor :previous_sibling, :next_sibling
24
+
25
+ # Properties required to represent node.
26
+ attr_accessor :tag, :data, :namespace
27
+
28
+ # Reader for attributes associated with this node.
29
+ attr_reader :attributes
30
+
5
31
  # Represents the error token.
6
32
  Error = Class.new(Node)
7
33
 
34
+ def text_content
35
+ nil
36
+ end
37
+
38
+ def get_attribute_node(key, namespace: nil)
39
+ attributes.find { |attr| attr.key == key && attr.namespace == namespace }
40
+ end
41
+
42
+ def each_descendant
43
+ stack = [self]
44
+ until stack.empty?
45
+ node = stack.pop
46
+ yield node unless node == self
47
+ stack << node.next_sibling if node != self && node.next_sibling
48
+ stack << node.first_child if node.first_child
49
+ end
50
+ end
51
+
8
52
  # Represents the text token.
9
- Text = Class.new(Node)
53
+ class Text < Node
54
+ alias_method :text_content, :data
55
+ alias_method :to_s, :text_content
56
+ end
10
57
 
11
58
  # Represents the root document token.
12
- Document = Class.new(Node)
59
+ class Document < Node
60
+ include XPath
61
+ end
13
62
 
14
63
  # Represents the element token including start, end and self-closing token.
15
- Element = Class.new(Node)
64
+ class Element < Node
65
+
66
+ # TODO: The current innerText() implementation does not conform to WHATWG spec.
67
+ # https://html.spec.whatwg.org/multipage/dom.html#the-innertext-idl-attribute
68
+ def inner_text
69
+ text = ''
70
+ each_descendant { |node| text << node.data if node.instance_of?(Text) }
71
+ text
72
+ end
73
+
74
+ def to_s
75
+ s = "<#{tag}"
76
+ attrs = attributes_to_string
77
+ s << ' ' unless attrs.empty?
78
+ s << "#{attrs}>"
79
+ end
80
+
81
+ private
82
+
83
+ def attributes_to_string
84
+ attributes.each_with_object([]) { |attr, attrs|
85
+ attrs << "#{attr.key}=#{attr.value}"
86
+ }.join(?\s)
87
+ end
88
+ end
16
89
 
17
90
  # Represents the comment token like "<!-- foo -->".
18
91
  Comment = Class.new(Node)
@@ -30,37 +103,25 @@ module Gammo
30
103
  # template, td, th, and caption elements"
31
104
  DEFAULT_SCOPE_MARKER = Node::ScopeMarker.new
32
105
 
33
- # Raised if uncaught node is given for particular operations.
34
- # @!visibility private
35
- UncaughtTypeError = Class.new(ArgumentError)
36
-
37
- # Raised if anything goes wrong on hierarchy while node operations.
38
- # @!visibility private
39
- HierarchyRequestError = Class.new(ArgumentError)
40
-
41
- # `parent` is the pointer for the parent node.
42
- attr_accessor :parent
43
-
44
- # `first_child` and `last_child` are pointers for the first and the last nodes.
45
- attr_accessor :first_child, :last_child
46
-
47
- # `previous_sibling` and `next_sibling` are pointers for the previous and next sibling nodes.
48
- attr_accessor :previous_sibling, :next_sibling
49
-
50
- # Properties required to represent node.
51
- attr_accessor :tag, :data, :namespace, :attributes
52
-
53
106
  # Constructs a node which represents HTML element node.
54
107
  # @param [String] tag
55
108
  # @param [String] data
56
109
  # @param [String, NilClass] namespace
57
- # @param [Hash(String => String)] attributes
110
+ # @param [Gammo::Attributes] attributes
58
111
  # @return [Gammo::Node]
59
- def initialize(tag: nil, data: nil, namespace: nil, attributes: [])
112
+ def initialize(tag: nil, data: nil, namespace: nil, attributes: Attributes.new([]))
60
113
  @tag = tag
61
114
  @data = data
62
115
  @namespace = namespace
63
- @attributes = attributes
116
+ @attributes = Attributes.new(attributes, owner_element: self)
117
+ end
118
+
119
+ # Sets attributes in self.
120
+ # @param [Gammo::Attributes] attrs
121
+ def attributes=(attrs)
122
+ cloned = attrs.dup
123
+ cloned.each { |attr| attr.owner_element = self }
124
+ @attributes = cloned
64
125
  end
65
126
 
66
127
  # Inserts a node before a reference node as a child of a specified parent node.
@@ -142,6 +203,39 @@ module Gammo
142
203
  }
143
204
  end
144
205
 
206
+ # Select all nodes whose the evaluation of a given block is true.
207
+ def select(&block)
208
+ nodes = []
209
+ stack = [self]
210
+ until stack.empty?
211
+ node = stack.pop
212
+ nodes << node if block.call(node)
213
+ stack << node.next_sibling if node.next_sibling
214
+ stack << node.first_child if node.first_child
215
+ end
216
+ nodes
217
+ end
218
+
219
+ def children
220
+ ret = []
221
+ child = first_child
222
+ while child
223
+ ret << child
224
+ child = child.next_sibling
225
+ end
226
+ ret
227
+ end
228
+
229
+ def owner_document
230
+ node = self
231
+ node = node.parent until node.document?
232
+ node
233
+ end
234
+
235
+ def document?
236
+ self.instance_of?(Document)
237
+ end
238
+
145
239
  private
146
240
 
147
241
  # @!visibility private
@@ -319,7 +319,9 @@ module Gammo
319
319
 
320
320
  # @!visibility private
321
321
  def add_element
322
- add_child(Node::Element.new(tag: token.tag, data: token.data, attributes: token.attributes))
322
+ elm = Node::Element.new(tag: token.tag, data: token.data)
323
+ elm.attributes = Attributes.new(token.attributes, owner_element: elm)
324
+ add_child(elm)
323
325
  end
324
326
 
325
327
  # @!visibility private
@@ -1,3 +1,3 @@
1
1
  module Gammo
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -0,0 +1,74 @@
1
+ require 'gammo/xpath/parser'
2
+ require 'gammo/xpath/context'
3
+
4
+ module Gammo
5
+ module XPath
6
+ # Result types
7
+ # But features related to snapshot and ordered node are not supported.
8
+ # TODO: Support official result types.
9
+ # - ORDERED_NODE_ITERATOR_TYPE
10
+ # - UNORDERED_NODE_SNAPSHOT_TYPE
11
+ # - ORDERED_NODE_SNAPSHOT_TYPE
12
+ ANY_TYPE = 0
13
+ NUMBER_TYPE = 1
14
+ STRING_TYPE = 2
15
+ BOOLEAN_TYPE = 3
16
+ UNORDERED_NODE_ITERATOR_TYPE = 4
17
+ ANY_UNORDERED_NODE_TYPE = 8
18
+ FIRST_ORDERED_NODE_TYPE = 9
19
+
20
+ # Class for traversing DOM tree built by Gammo::Parser by a given expression.
21
+ # @!visibility private
22
+ class Traverser
23
+ # Constructs an instance of Gammo::XPath::Traverser.
24
+ # @param [String] expr
25
+ # @param [Integer] result_type
26
+ # @!visibility private
27
+ def initialize(expr:, result_type:)
28
+ @expr = expr
29
+ @result_type = result_type
30
+ end
31
+
32
+ # Evaluates a given expression and returns value according to the
33
+ # result type.
34
+ # @param [Gammo::XPath::Context] context
35
+ # @return [String, Integer, TrueClass, FalseClass, Gammo::XPath::NodeSet]
36
+ # @!visibility private
37
+ def evaluate(context)
38
+ convert_value context, Parser.new(@expr).parse.evaluate(context)
39
+ end
40
+
41
+ private
42
+
43
+ # @!visibility private
44
+ def convert_value(context, value)
45
+ case @result_type
46
+ when ANY_TYPE then return value.value
47
+ when NUMBER_TYPE then return value.to_number
48
+ when STRING_TYPE then return value.to_s
49
+ when BOOLEAN_TYPE then return value.to_bool
50
+ when UNORDERED_NODE_ITERATOR_TYPE
51
+ fail TypeError, 'the result is not a node set' unless value.node_set?
52
+ value.to_node_set(context)
53
+ when ANY_UNORDERED_NODE_TYPE, FIRST_UNORDERED_NODE_TYPE
54
+ fail TypeError, 'the result is not a node set' unless value.node_set?
55
+ value.to_node_set(context).first
56
+ end
57
+ end
58
+ end
59
+
60
+ # Traverses DOM tree by a given expression, and returns a result according
61
+ # to the result type.
62
+ # @param [String] expr
63
+ # @param [Hash{Symbol => String, Symbol, Integer, TrueClass, FalseClass, #call}] variables
64
+ # @param [Integer] result_type
65
+ # @param [Gammo::Node] context_node
66
+ # @return [String, Integer, TrueClass, FalseClass, Gammo::XPath::NodeSet]
67
+ def xpath(expr, variables: {}, result_type: UNORDERED_NODE_ITERATOR_TYPE, context_node: self)
68
+ Traverser.new(
69
+ expr: expr,
70
+ result_type: result_type,
71
+ ).evaluate(Context.new(node: context_node, variables: variables))
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,231 @@
1
+ require 'gammo/xpath/node_set'
2
+ require 'gammo/xpath/ast/subclassify'
3
+ require 'gammo/tags'
4
+
5
+ module Gammo
6
+ module XPath
7
+ module AST
8
+ # Class for representing Axes.
9
+ # https://www.w3.org/TR/1999/REC-xpath-19991116/#axes
10
+ # @!visibility private
11
+ class Axis
12
+ attr_reader :node_test, :predicates
13
+
14
+ extend Subclassify
15
+
16
+ def initialize(node_test:, predicates: [])
17
+ @node_test = node_test
18
+ @predicates = Array(predicates)
19
+ end
20
+
21
+ def evaluate_context_node_with_node_set(context, context_node, node_set)
22
+ context.position = 0
23
+ # Strain nodes from context node for each axis.
24
+ strain(context, context_node, node_set)
25
+ # After straining try to filter by given predicates.
26
+ predicates.each do |predicate|
27
+ new_nodes = Gammo::XPath::NodeSet.new
28
+ node_set.each_with_index do |node, i|
29
+ context.node = node
30
+ context.size = node_set.size
31
+ context.position = i + 1
32
+ new_nodes << node if predicate.evaluate(context)
33
+ end
34
+ node_set.replace(new_nodes)
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ class Ancestor < Axis
41
+ declare :ancestor
42
+
43
+ def strain(context, context_node, node_set)
44
+ if context_node.instance_of?(Gammo::Attribute)
45
+ context_node = context_node.owner_element
46
+ node_set << context_node if node_test.match?(context_node)
47
+ end
48
+ node = context_node
49
+ while node = node.parent
50
+ node_set << node if node_test.match?(node)
51
+ end
52
+ end
53
+ end
54
+
55
+ class AncestorOrSelf < Axis
56
+ declare :ancestor_or_self
57
+
58
+ def strain(context, context_node, node_set)
59
+ node_set << context_node if node_test.match?(context_node)
60
+ if context_node.instance_of?(Gammo::Attribute)
61
+ context_node = context_node.owner_element
62
+ node_set << context_node if node_test.match?(context_node)
63
+ end
64
+ node = context_node
65
+ while node = node.parent
66
+ node_set << node if node_test.match?(node)
67
+ end
68
+ end
69
+ end
70
+
71
+ class Attribute < Axis
72
+ declare :attribute
73
+
74
+ def strain(context, context_node, node_set)
75
+ if node_test.instance_of?(NodeTest::Name) && node_test.local != ?*
76
+ attribute =
77
+ if !node_test.namespace
78
+ context_node.get_attribute_node(node_test.local)
79
+ else
80
+ # TODO: Test this properly.
81
+ context_node.get_attribute_node(node_test.local, namespace: node_test.namespace)
82
+ end
83
+ if attribute && attribute.namespace != 'http://www.w3.org/XML/1998/namespace'
84
+ node_set << attribute if node_test.match?(attribute)
85
+ end
86
+ return
87
+ end
88
+
89
+ node_set.concat(context_node.attributes.select { |attribute|
90
+ node_test.match?(attribute)
91
+ })
92
+ end
93
+ end
94
+
95
+ class Child < Axis
96
+ declare :child
97
+
98
+ def strain(context, context_node, node_set)
99
+ return if context_node.instance_of?(Gammo::Attribute)
100
+ node = context_node.first_child
101
+ while node
102
+ node_set << node if node_test.match?(node)
103
+ node = node.next_sibling
104
+ end
105
+ end
106
+ end
107
+
108
+ class Descendant < Axis
109
+ declare :descendant
110
+
111
+ def strain(context, context_node, node_set)
112
+ return if context_node.instance_of?(Gammo::Attribute)
113
+ context_node.each_descendant do |node|
114
+ node_set << node if node_test.match?(node)
115
+ end
116
+ end
117
+ end
118
+
119
+ class DescendantOrSelf < Axis
120
+ declare :descendant_or_self
121
+
122
+ def strain(context, context_node, node_set)
123
+ node_set << context_node if node_test.match?(context_node)
124
+ return if context_node.instance_of?(Gammo::Attribute)
125
+ context_node.each_descendant do |node|
126
+ node_set << node if node_test.match?(node)
127
+ end
128
+ end
129
+ end
130
+
131
+ class Following < Axis
132
+ declare :following
133
+
134
+ def strain(context, context_node, node_set)
135
+ context_node = context_node.owner_element if context_node.instance_of?(Gammo::Attribute)
136
+ while node = context_node.next_sibling
137
+ each_following(node) do |node|
138
+ node_set << node if node_test.match?(node)
139
+ end
140
+ break if context_node.parent.tag != Gammo::Tags::Html
141
+ end
142
+ end
143
+
144
+ def each_following(context_node)
145
+ stack = [context_node]
146
+ until stack.empty?
147
+ node = stack.pop
148
+ yield node unless node == context_node
149
+ stack << node.next_sibling if node.next_sibling
150
+ stack << node.first_child if node.first_child
151
+ end
152
+ end
153
+ end
154
+
155
+ class FollowingSibling < Axis
156
+ declare :following_sibling
157
+
158
+ def strain(context, context_node, node_set)
159
+ return if context_node.instance_of?(Gammo::Attribute)
160
+ node = context_node
161
+ while node = node.next_sibling
162
+ node_set << node if node_test.match?(node)
163
+ end
164
+ end
165
+ end
166
+
167
+ class Namespace < Axis
168
+ declare :namespace
169
+
170
+ def strain(context, context_node, node_set)
171
+ # Not implemented
172
+ end
173
+ end
174
+
175
+ class Parent < Axis
176
+ declare :parent
177
+
178
+ def strain(context, context_node, node_set)
179
+ context_node = context_node.instance_of?(Gammo::Attribute) ?
180
+ context_node.owner_element : context_node.parent
181
+ node_set << context_node if node_test.match?(context_node)
182
+ end
183
+ end
184
+
185
+ class Preceding < Axis
186
+ declare :preceding
187
+
188
+ def strain(context, context_node, node_set)
189
+ context_node = context_node.owner_element if context_node.instance_of?(Gammo::Attribute)
190
+ each_preceding(context_node) do |node|
191
+ node_set << node if node_test.match?(node)
192
+ end
193
+ end
194
+
195
+ private
196
+
197
+ def each_preceding(context_node)
198
+ node = context_node
199
+ while parent = node.parent
200
+ while node = node.previous_sibling
201
+ yield node
202
+ break if node == parent
203
+ end
204
+ node = parent
205
+ end
206
+ end
207
+ end
208
+
209
+ class PrecedingSibling < Axis
210
+ declare :preceding_sibling
211
+
212
+ def strain(context, context_node, node_set)
213
+ return if context_node.instance_of?(Gammo::Attribute)
214
+ node = context_node
215
+ while node = node.previous_sibling
216
+ node_set << node if node_test.match?(node)
217
+ end
218
+ end
219
+ end
220
+
221
+ class Self < Axis
222
+ declare :self
223
+
224
+ def strain(context, context_node, node_set)
225
+ node_set << context_node if node_test.match?(context_node)
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
231
+ end