gammo 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -23,3 +23,9 @@ task :generate do
23
23
  table = eval(Erubi::Engine.new(File.read('misc/table.erubi')).src, binding)
24
24
  File.write('lib/gammo/tags/table.rb', table)
25
25
  end
26
+
27
+ namespace :racc do
28
+ task :parser do
29
+ `bundle exec racc lib/gammo/xpath/parser.y -o lib/gammo/xpath/parser.rb`
30
+ end
31
+ end
@@ -3,15 +3,24 @@ module Gammo
3
3
  class Attribute
4
4
  attr_accessor :key, :value, :namespace
5
5
 
6
+ # @!visibility private
7
+ attr_accessor :owner_element
8
+
6
9
  # Constructs an attribute with the key-value pair.
7
10
  # @param [String] key
8
11
  # @param [String] value
9
12
  # @param [String] namespace
13
+ # @param [Gammo::Element] owner_element
10
14
  # @return [Attribute]
11
- def initialize(key:, value:, namespace: nil)
12
- @key = key
13
- @value = value
14
- @namespace = namespace
15
+ def initialize(key:, value:, namespace: nil, owner_element: nil)
16
+ @key = key
17
+ @value = value
18
+ @namespace = namespace
19
+ @owner_element = owner_element
20
+ end
21
+
22
+ def to_s
23
+ "<Gammo::Attribute #{key}='#{value}'>"
15
24
  end
16
25
  end
17
26
  end
@@ -0,0 +1,95 @@
1
+ require 'delegate'
2
+
3
+ module Gammo
4
+ # Class for representing attributes.
5
+ class Attributes < DelegateClass(Array)
6
+ attr_accessor :owner_element
7
+
8
+ def initialize(array, owner_element: nil)
9
+ super(array)
10
+ array.each { |attr| attr.owner_element = owner_element }
11
+ @owner_element = owner_element
12
+ @attributes_hash = attributes_to_hash(array)
13
+ end
14
+
15
+ def <<(attr)
16
+ super
17
+ @attributes_hash[attr.key] = attr.value
18
+ end
19
+
20
+ def [](key)
21
+ @attributes_hash[key.to_s]
22
+ end
23
+
24
+ def []=(key, value)
25
+ self << Attribute.new(key: key.to_s, value: value, owner_element: owner_element)
26
+ end
27
+
28
+ def prepend(*attrs)
29
+ prepended = super
30
+ attrs.each { |attr| @attributes_hash[attr.key.to_s] = attr.value }
31
+ prepended
32
+ end
33
+ alias_method :unshift, :prepend
34
+
35
+ def shift(n = nil)
36
+ original = self.dup
37
+ ret = n ? super : super()
38
+ (original - self).each { |attr| @attributes_hash.delete(attr.key.to_s) }
39
+ ret
40
+ end
41
+
42
+ def pop(n = nil)
43
+ original = self.dup
44
+ ret = n ? super : super()
45
+ (original - self).each { |attr| @attributes_hash.delete(attr.key.to_s) }
46
+ ret
47
+ end
48
+
49
+ def append(*attrs)
50
+ super
51
+ attrs.each { |attr| @attributes_hash[attr.key.to_s] = attr.value }
52
+ end
53
+ alias_method :push, :append
54
+
55
+ def delete(attr)
56
+ deleted = super
57
+ @attributes_hash.delete(deleted.key) if deleted
58
+ deleted
59
+ end
60
+
61
+ def reject!
62
+ original = self.dup
63
+ rejected = super
64
+ (original - self).each { |attr| @attributes_hash.delete(attr.key.to_s) }
65
+ rejected
66
+ end
67
+
68
+ def delete_if
69
+ original = self.dup
70
+ super
71
+ (original - self).each { |attr| @attributes_hash.delete(attr.key.to_s) }
72
+ self
73
+ end
74
+
75
+ def delete_at(pos)
76
+ deleted = super
77
+ deleted.each { |attr| @attributes_hash.delete(attr.key.to_s) }
78
+ deleted
79
+ end
80
+
81
+ def to_h
82
+ @attributes_hash.dup
83
+ end
84
+
85
+ def to_s
86
+ @attributes_hash.to_s
87
+ end
88
+
89
+ private
90
+
91
+ def attributes_to_hash(attrs)
92
+ attrs.each_with_object({}) { |attr, h| h[attr.key.to_s] = attr.value }
93
+ end
94
+ end
95
+ end
@@ -1,18 +1,91 @@
1
+ require 'gammo/xpath'
2
+ require 'gammo/attributes'
3
+
1
4
  module Gammo
2
5
  # Class for representing Node.
3
6
  # https://html.spec.whatwg.org/multipage/parsing.html#tokenization
4
7
  class Node
8
+ # Raised if uncaught node is given for particular operations.
9
+ # @!visibility private
10
+ UncaughtTypeError = Class.new(ArgumentError)
11
+
12
+ # Raised if anything goes wrong on hierarchy while node operations.
13
+ # @!visibility private
14
+ HierarchyRequestError = Class.new(ArgumentError)
15
+
16
+ # `parent` is the pointer for the parent node.
17
+ attr_accessor :parent
18
+
19
+ # `first_child` and `last_child` are pointers for the first and the last nodes.
20
+ attr_accessor :first_child, :last_child
21
+
22
+ # `previous_sibling` and `next_sibling` are pointers for the previous and next sibling nodes.
23
+ attr_accessor :previous_sibling, :next_sibling
24
+
25
+ # Properties required to represent node.
26
+ attr_accessor :tag, :data, :namespace
27
+
28
+ # Reader for attributes associated with this node.
29
+ attr_reader :attributes
30
+
5
31
  # Represents the error token.
6
32
  Error = Class.new(Node)
7
33
 
34
+ def text_content
35
+ nil
36
+ end
37
+
38
+ def get_attribute_node(key, namespace: nil)
39
+ attributes.find { |attr| attr.key == key && attr.namespace == namespace }
40
+ end
41
+
42
+ def each_descendant
43
+ stack = [self]
44
+ until stack.empty?
45
+ node = stack.pop
46
+ yield node unless node == self
47
+ stack << node.next_sibling if node != self && node.next_sibling
48
+ stack << node.first_child if node.first_child
49
+ end
50
+ end
51
+
8
52
  # Represents the text token.
9
- Text = Class.new(Node)
53
+ class Text < Node
54
+ alias_method :text_content, :data
55
+ alias_method :to_s, :text_content
56
+ end
10
57
 
11
58
  # Represents the root document token.
12
- Document = Class.new(Node)
59
+ class Document < Node
60
+ include XPath
61
+ end
13
62
 
14
63
  # Represents the element token including start, end and self-closing token.
15
- Element = Class.new(Node)
64
+ class Element < Node
65
+
66
+ # TODO: The current innerText() implementation does not conform to WHATWG spec.
67
+ # https://html.spec.whatwg.org/multipage/dom.html#the-innertext-idl-attribute
68
+ def inner_text
69
+ text = ''
70
+ each_descendant { |node| text << node.data if node.instance_of?(Text) }
71
+ text
72
+ end
73
+
74
+ def to_s
75
+ s = "<#{tag}"
76
+ attrs = attributes_to_string
77
+ s << ' ' unless attrs.empty?
78
+ s << "#{attrs}>"
79
+ end
80
+
81
+ private
82
+
83
+ def attributes_to_string
84
+ attributes.each_with_object([]) { |attr, attrs|
85
+ attrs << "#{attr.key}=#{attr.value}"
86
+ }.join(?\s)
87
+ end
88
+ end
16
89
 
17
90
  # Represents the comment token like "<!-- foo -->".
18
91
  Comment = Class.new(Node)
@@ -30,37 +103,25 @@ module Gammo
30
103
  # template, td, th, and caption elements"
31
104
  DEFAULT_SCOPE_MARKER = Node::ScopeMarker.new
32
105
 
33
- # Raised if uncaught node is given for particular operations.
34
- # @!visibility private
35
- UncaughtTypeError = Class.new(ArgumentError)
36
-
37
- # Raised if anything goes wrong on hierarchy while node operations.
38
- # @!visibility private
39
- HierarchyRequestError = Class.new(ArgumentError)
40
-
41
- # `parent` is the pointer for the parent node.
42
- attr_accessor :parent
43
-
44
- # `first_child` and `last_child` are pointers for the first and the last nodes.
45
- attr_accessor :first_child, :last_child
46
-
47
- # `previous_sibling` and `next_sibling` are pointers for the previous and next sibling nodes.
48
- attr_accessor :previous_sibling, :next_sibling
49
-
50
- # Properties required to represent node.
51
- attr_accessor :tag, :data, :namespace, :attributes
52
-
53
106
  # Constructs a node which represents HTML element node.
54
107
  # @param [String] tag
55
108
  # @param [String] data
56
109
  # @param [String, NilClass] namespace
57
- # @param [Hash(String => String)] attributes
110
+ # @param [Gammo::Attributes] attributes
58
111
  # @return [Gammo::Node]
59
- def initialize(tag: nil, data: nil, namespace: nil, attributes: [])
112
+ def initialize(tag: nil, data: nil, namespace: nil, attributes: Attributes.new([]))
60
113
  @tag = tag
61
114
  @data = data
62
115
  @namespace = namespace
63
- @attributes = attributes
116
+ @attributes = Attributes.new(attributes, owner_element: self)
117
+ end
118
+
119
+ # Sets attributes in self.
120
+ # @param [Gammo::Attributes] attrs
121
+ def attributes=(attrs)
122
+ cloned = attrs.dup
123
+ cloned.each { |attr| attr.owner_element = self }
124
+ @attributes = cloned
64
125
  end
65
126
 
66
127
  # Inserts a node before a reference node as a child of a specified parent node.
@@ -142,6 +203,39 @@ module Gammo
142
203
  }
143
204
  end
144
205
 
206
+ # Select all nodes whose the evaluation of a given block is true.
207
+ def select(&block)
208
+ nodes = []
209
+ stack = [self]
210
+ until stack.empty?
211
+ node = stack.pop
212
+ nodes << node if block.call(node)
213
+ stack << node.next_sibling if node.next_sibling
214
+ stack << node.first_child if node.first_child
215
+ end
216
+ nodes
217
+ end
218
+
219
+ def children
220
+ ret = []
221
+ child = first_child
222
+ while child
223
+ ret << child
224
+ child = child.next_sibling
225
+ end
226
+ ret
227
+ end
228
+
229
+ def owner_document
230
+ node = self
231
+ node = node.parent until node.document?
232
+ node
233
+ end
234
+
235
+ def document?
236
+ self.instance_of?(Document)
237
+ end
238
+
145
239
  private
146
240
 
147
241
  # @!visibility private
@@ -319,7 +319,9 @@ module Gammo
319
319
 
320
320
  # @!visibility private
321
321
  def add_element
322
- add_child(Node::Element.new(tag: token.tag, data: token.data, attributes: token.attributes))
322
+ elm = Node::Element.new(tag: token.tag, data: token.data)
323
+ elm.attributes = Attributes.new(token.attributes, owner_element: elm)
324
+ add_child(elm)
323
325
  end
324
326
 
325
327
  # @!visibility private
@@ -1,3 +1,3 @@
1
1
  module Gammo
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -0,0 +1,74 @@
1
+ require 'gammo/xpath/parser'
2
+ require 'gammo/xpath/context'
3
+
4
+ module Gammo
5
+ module XPath
6
+ # Result types
7
+ # But features related to snapshot and ordered node are not supported.
8
+ # TODO: Support official result types.
9
+ # - ORDERED_NODE_ITERATOR_TYPE
10
+ # - UNORDERED_NODE_SNAPSHOT_TYPE
11
+ # - ORDERED_NODE_SNAPSHOT_TYPE
12
+ ANY_TYPE = 0
13
+ NUMBER_TYPE = 1
14
+ STRING_TYPE = 2
15
+ BOOLEAN_TYPE = 3
16
+ UNORDERED_NODE_ITERATOR_TYPE = 4
17
+ ANY_UNORDERED_NODE_TYPE = 8
18
+ FIRST_ORDERED_NODE_TYPE = 9
19
+
20
+ # Class for traversing DOM tree built by Gammo::Parser by a given expression.
21
+ # @!visibility private
22
+ class Traverser
23
+ # Constructs an instance of Gammo::XPath::Traverser.
24
+ # @param [String] expr
25
+ # @param [Integer] result_type
26
+ # @!visibility private
27
+ def initialize(expr:, result_type:)
28
+ @expr = expr
29
+ @result_type = result_type
30
+ end
31
+
32
+ # Evaluates a given expression and returns value according to the
33
+ # result type.
34
+ # @param [Gammo::XPath::Context] context
35
+ # @return [String, Integer, TrueClass, FalseClass, Gammo::XPath::NodeSet]
36
+ # @!visibility private
37
+ def evaluate(context)
38
+ convert_value context, Parser.new(@expr).parse.evaluate(context)
39
+ end
40
+
41
+ private
42
+
43
+ # @!visibility private
44
+ def convert_value(context, value)
45
+ case @result_type
46
+ when ANY_TYPE then return value.value
47
+ when NUMBER_TYPE then return value.to_number
48
+ when STRING_TYPE then return value.to_s
49
+ when BOOLEAN_TYPE then return value.to_bool
50
+ when UNORDERED_NODE_ITERATOR_TYPE
51
+ fail TypeError, 'the result is not a node set' unless value.node_set?
52
+ value.to_node_set(context)
53
+ when ANY_UNORDERED_NODE_TYPE, FIRST_UNORDERED_NODE_TYPE
54
+ fail TypeError, 'the result is not a node set' unless value.node_set?
55
+ value.to_node_set(context).first
56
+ end
57
+ end
58
+ end
59
+
60
+ # Traverses DOM tree by a given expression, and returns a result according
61
+ # to the result type.
62
+ # @param [String] expr
63
+ # @param [Hash{Symbol => String, Symbol, Integer, TrueClass, FalseClass, #call}] variables
64
+ # @param [Integer] result_type
65
+ # @param [Gammo::Node] context_node
66
+ # @return [String, Integer, TrueClass, FalseClass, Gammo::XPath::NodeSet]
67
+ def xpath(expr, variables: {}, result_type: UNORDERED_NODE_ITERATOR_TYPE, context_node: self)
68
+ Traverser.new(
69
+ expr: expr,
70
+ result_type: result_type,
71
+ ).evaluate(Context.new(node: context_node, variables: variables))
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,231 @@
1
+ require 'gammo/xpath/node_set'
2
+ require 'gammo/xpath/ast/subclassify'
3
+ require 'gammo/tags'
4
+
5
+ module Gammo
6
+ module XPath
7
+ module AST
8
+ # Class for representing Axes.
9
+ # https://www.w3.org/TR/1999/REC-xpath-19991116/#axes
10
+ # @!visibility private
11
+ class Axis
12
+ attr_reader :node_test, :predicates
13
+
14
+ extend Subclassify
15
+
16
+ def initialize(node_test:, predicates: [])
17
+ @node_test = node_test
18
+ @predicates = Array(predicates)
19
+ end
20
+
21
+ def evaluate_context_node_with_node_set(context, context_node, node_set)
22
+ context.position = 0
23
+ # Strain nodes from context node for each axis.
24
+ strain(context, context_node, node_set)
25
+ # After straining try to filter by given predicates.
26
+ predicates.each do |predicate|
27
+ new_nodes = Gammo::XPath::NodeSet.new
28
+ node_set.each_with_index do |node, i|
29
+ context.node = node
30
+ context.size = node_set.size
31
+ context.position = i + 1
32
+ new_nodes << node if predicate.evaluate(context)
33
+ end
34
+ node_set.replace(new_nodes)
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ class Ancestor < Axis
41
+ declare :ancestor
42
+
43
+ def strain(context, context_node, node_set)
44
+ if context_node.instance_of?(Gammo::Attribute)
45
+ context_node = context_node.owner_element
46
+ node_set << context_node if node_test.match?(context_node)
47
+ end
48
+ node = context_node
49
+ while node = node.parent
50
+ node_set << node if node_test.match?(node)
51
+ end
52
+ end
53
+ end
54
+
55
+ class AncestorOrSelf < Axis
56
+ declare :ancestor_or_self
57
+
58
+ def strain(context, context_node, node_set)
59
+ node_set << context_node if node_test.match?(context_node)
60
+ if context_node.instance_of?(Gammo::Attribute)
61
+ context_node = context_node.owner_element
62
+ node_set << context_node if node_test.match?(context_node)
63
+ end
64
+ node = context_node
65
+ while node = node.parent
66
+ node_set << node if node_test.match?(node)
67
+ end
68
+ end
69
+ end
70
+
71
+ class Attribute < Axis
72
+ declare :attribute
73
+
74
+ def strain(context, context_node, node_set)
75
+ if node_test.instance_of?(NodeTest::Name) && node_test.local != ?*
76
+ attribute =
77
+ if !node_test.namespace
78
+ context_node.get_attribute_node(node_test.local)
79
+ else
80
+ # TODO: Test this properly.
81
+ context_node.get_attribute_node(node_test.local, namespace: node_test.namespace)
82
+ end
83
+ if attribute && attribute.namespace != 'http://www.w3.org/XML/1998/namespace'
84
+ node_set << attribute if node_test.match?(attribute)
85
+ end
86
+ return
87
+ end
88
+
89
+ node_set.concat(context_node.attributes.select { |attribute|
90
+ node_test.match?(attribute)
91
+ })
92
+ end
93
+ end
94
+
95
+ class Child < Axis
96
+ declare :child
97
+
98
+ def strain(context, context_node, node_set)
99
+ return if context_node.instance_of?(Gammo::Attribute)
100
+ node = context_node.first_child
101
+ while node
102
+ node_set << node if node_test.match?(node)
103
+ node = node.next_sibling
104
+ end
105
+ end
106
+ end
107
+
108
+ class Descendant < Axis
109
+ declare :descendant
110
+
111
+ def strain(context, context_node, node_set)
112
+ return if context_node.instance_of?(Gammo::Attribute)
113
+ context_node.each_descendant do |node|
114
+ node_set << node if node_test.match?(node)
115
+ end
116
+ end
117
+ end
118
+
119
+ class DescendantOrSelf < Axis
120
+ declare :descendant_or_self
121
+
122
+ def strain(context, context_node, node_set)
123
+ node_set << context_node if node_test.match?(context_node)
124
+ return if context_node.instance_of?(Gammo::Attribute)
125
+ context_node.each_descendant do |node|
126
+ node_set << node if node_test.match?(node)
127
+ end
128
+ end
129
+ end
130
+
131
+ class Following < Axis
132
+ declare :following
133
+
134
+ def strain(context, context_node, node_set)
135
+ context_node = context_node.owner_element if context_node.instance_of?(Gammo::Attribute)
136
+ while node = context_node.next_sibling
137
+ each_following(node) do |node|
138
+ node_set << node if node_test.match?(node)
139
+ end
140
+ break if context_node.parent.tag != Gammo::Tags::Html
141
+ end
142
+ end
143
+
144
+ def each_following(context_node)
145
+ stack = [context_node]
146
+ until stack.empty?
147
+ node = stack.pop
148
+ yield node unless node == context_node
149
+ stack << node.next_sibling if node.next_sibling
150
+ stack << node.first_child if node.first_child
151
+ end
152
+ end
153
+ end
154
+
155
+ class FollowingSibling < Axis
156
+ declare :following_sibling
157
+
158
+ def strain(context, context_node, node_set)
159
+ return if context_node.instance_of?(Gammo::Attribute)
160
+ node = context_node
161
+ while node = node.next_sibling
162
+ node_set << node if node_test.match?(node)
163
+ end
164
+ end
165
+ end
166
+
167
+ class Namespace < Axis
168
+ declare :namespace
169
+
170
+ def strain(context, context_node, node_set)
171
+ # Not implemented
172
+ end
173
+ end
174
+
175
+ class Parent < Axis
176
+ declare :parent
177
+
178
+ def strain(context, context_node, node_set)
179
+ context_node = context_node.instance_of?(Gammo::Attribute) ?
180
+ context_node.owner_element : context_node.parent
181
+ node_set << context_node if node_test.match?(context_node)
182
+ end
183
+ end
184
+
185
+ class Preceding < Axis
186
+ declare :preceding
187
+
188
+ def strain(context, context_node, node_set)
189
+ context_node = context_node.owner_element if context_node.instance_of?(Gammo::Attribute)
190
+ each_preceding(context_node) do |node|
191
+ node_set << node if node_test.match?(node)
192
+ end
193
+ end
194
+
195
+ private
196
+
197
+ def each_preceding(context_node)
198
+ node = context_node
199
+ while parent = node.parent
200
+ while node = node.previous_sibling
201
+ yield node
202
+ break if node == parent
203
+ end
204
+ node = parent
205
+ end
206
+ end
207
+ end
208
+
209
+ class PrecedingSibling < Axis
210
+ declare :preceding_sibling
211
+
212
+ def strain(context, context_node, node_set)
213
+ return if context_node.instance_of?(Gammo::Attribute)
214
+ node = context_node
215
+ while node = node.previous_sibling
216
+ node_set << node if node_test.match?(node)
217
+ end
218
+ end
219
+ end
220
+
221
+ class Self < Axis
222
+ declare :self
223
+
224
+ def strain(context, context_node, node_set)
225
+ node_set << context_node if node_test.match?(context_node)
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
231
+ end