hexp 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +4 -0
- data/.travis.yml +2 -3
- data/.yardopts +1 -0
- data/Gemfile.devtools +17 -24
- data/Gemfile.lock +77 -82
- data/{LICENSE.md → LICENSE} +0 -0
- data/README.md +20 -10
- data/Rakefile +17 -1
- data/bench/node/rewrite_bench.rb +23 -0
- data/config/flay.yml +1 -1
- data/config/flog.yml +1 -1
- data/config/reek.yml +6 -0
- data/config/yardstick.yml +7 -2
- data/hexp.gemspec +6 -5
- data/lib/hexp.rb +26 -18
- data/lib/hexp/builder.rb +54 -35
- data/lib/hexp/css_selector.rb +124 -25
- data/lib/hexp/css_selector/parser.rb +45 -2
- data/lib/hexp/css_selector/sass_parser.rb +16 -0
- data/lib/hexp/dom.rb +2 -0
- data/lib/hexp/dsl.rb +20 -21
- data/lib/hexp/errors.rb +2 -2
- data/lib/hexp/list.rb +14 -11
- data/lib/hexp/node.rb +89 -38
- data/lib/hexp/node/attributes.rb +43 -26
- data/lib/hexp/node/children.rb +59 -4
- data/lib/hexp/node/css_selection.rb +113 -7
- data/lib/hexp/node/domize.rb +22 -13
- data/lib/hexp/node/normalize.rb +3 -9
- data/lib/hexp/node/pp.rb +13 -9
- data/lib/hexp/node/rewriter.rb +28 -3
- data/lib/hexp/node/{selector.rb → selection.rb} +48 -2
- data/lib/hexp/nokogiri/reader.rb +2 -2
- data/lib/hexp/text_node.rb +1 -1
- data/lib/hexp/version.rb +1 -1
- data/spec/unit/hexp/css_selector/universal_spec.rb +7 -0
- data/spec/unit/hexp/node/domize_spec.rb +12 -0
- data/spec/unit/hexp/node/{selector_spec.rb → selection_spec.rb} +9 -9
- data/spec/unit/hexp/parse_spec.rb +10 -0
- metadata +40 -44
- data/SPEC.md +0 -53
- data/notes +0 -34
data/lib/hexp/node/children.rb
CHANGED
@@ -5,16 +5,30 @@ module Hexp
|
|
5
5
|
module Children
|
6
6
|
# Is this node an empty node
|
7
7
|
#
|
8
|
-
#
|
9
|
-
#
|
8
|
+
# @example
|
9
|
+
# H[:p, class: 'foo'].empty? #=> true
|
10
|
+
# H[:p, [H[:span]].empty? #=> false
|
10
11
|
#
|
11
|
-
# @return [
|
12
|
-
#
|
12
|
+
# @return [true,false]
|
13
|
+
# True if this node has no children
|
13
14
|
#
|
15
|
+
# @api public
|
14
16
|
def empty?
|
15
17
|
children.empty?
|
16
18
|
end
|
17
19
|
|
20
|
+
# Add a child node to the end of the list of children
|
21
|
+
#
|
22
|
+
# @example
|
23
|
+
# H[:ul].add_child(H[:li, "chunky"]) #=> H[:ul, [H[:li, "chunky]]]
|
24
|
+
#
|
25
|
+
# @param [Hexp::Node] child
|
26
|
+
# The child node to add
|
27
|
+
#
|
28
|
+
# @return [Hexp::Node]
|
29
|
+
# A new node containing that has the child added to its children
|
30
|
+
#
|
31
|
+
# @api public
|
18
32
|
def add_child(child)
|
19
33
|
H[
|
20
34
|
self.tag,
|
@@ -25,16 +39,57 @@ module Hexp
|
|
25
39
|
alias :add :add_child
|
26
40
|
alias :<< :add_child
|
27
41
|
|
42
|
+
# All the text in this node and its descendants
|
43
|
+
#
|
44
|
+
# Concatenates the contents of all text nodes.
|
45
|
+
#
|
46
|
+
# @example
|
47
|
+
# H[:div, [
|
48
|
+
# H[:span, "My name is"],
|
49
|
+
# " ",
|
50
|
+
# H[:strong, "@plexus"],
|
51
|
+
# "."
|
52
|
+
# ]
|
53
|
+
# ].text #=> "My name is @plexus."
|
54
|
+
#
|
55
|
+
# @return [String]
|
56
|
+
#
|
57
|
+
# @api public
|
28
58
|
def text
|
29
59
|
children.map do |node|
|
30
60
|
node.text? ? node : node.text
|
31
61
|
end.join
|
32
62
|
end
|
33
63
|
|
64
|
+
# Replace the children of this node with a new list of children
|
65
|
+
#
|
66
|
+
# @example
|
67
|
+
# H[:div, "Hello"].set_children([H[:span, "wicked!"], H[:br]])
|
68
|
+
# # => H[:div, [H[:span, "wicked!"], H[:br]]]
|
69
|
+
#
|
70
|
+
# @param [Array,Hexp::NodeList] new_children
|
71
|
+
#
|
72
|
+
# @return [Hexp::Node]
|
73
|
+
#
|
74
|
+
# @api public
|
34
75
|
def set_children(new_children)
|
35
76
|
H[tag, attributes, new_children]
|
36
77
|
end
|
37
78
|
|
79
|
+
# Perform an action on each child node, and replace the node with the result
|
80
|
+
#
|
81
|
+
# @example
|
82
|
+
# H[:div, [H[:span, "foo"]]].map_children do |node|
|
83
|
+
# node.add_class(node.text)
|
84
|
+
# end
|
85
|
+
# # => H[:div, [H[:span, {class: "foo"}, "foo"]]]
|
86
|
+
#
|
87
|
+
# @yieldparam [Hexp::Node]
|
88
|
+
# The child node
|
89
|
+
#
|
90
|
+
# @return [Hexp::Node]
|
91
|
+
#
|
92
|
+
# @api public
|
38
93
|
def map_children(&blk)
|
39
94
|
return to_enum(:map_children) unless block_given?
|
40
95
|
H[tag, attributes, children.map(&blk)]
|
@@ -2,15 +2,74 @@ module Hexp
|
|
2
2
|
class Node
|
3
3
|
# Select nodes using CSS selectors
|
4
4
|
#
|
5
|
-
|
5
|
+
# The main interface to this is {Hexp::Node#select}, although there's
|
6
|
+
# nothing stopping you from using this class directly.
|
7
|
+
#
|
8
|
+
# This class is +Enumerable+, and calling {#each} without a block will give
|
9
|
+
# you an +Enumerator+, so you have all Ruby's tasty list operations at your
|
10
|
+
# disposal.
|
11
|
+
#
|
12
|
+
# Only a subset of the
|
13
|
+
# {http://www.w3.org/TR/css3-selectors/ CSS 3 selector syntax}
|
14
|
+
# is supported. Parsing a selector that contains unsupported elements
|
15
|
+
# should raise an exception.
|
16
|
+
#
|
17
|
+
# * tag selector : +div+, +a+, +section+
|
18
|
+
# * class selector : +.big+, +.user_profile+
|
19
|
+
# * id selector : +#main_content+, +#sidebar+
|
20
|
+
# * attribute selectors : +[href]+, +[class~=foo]+, +[lang|=en]+
|
21
|
+
#
|
22
|
+
# Attribute selectors support
|
23
|
+
# {http://www.w3.org/TR/css3-selectors/#attribute-selectors all the operators of the CSS 3 spec}
|
24
|
+
#, so have a look there for more details.
|
25
|
+
#
|
26
|
+
# Of course you can combine all these.
|
27
|
+
#
|
28
|
+
# @example
|
29
|
+
# link = H[:a, {class: 'foo bar', lang: 'fr-be', href: 'http://example.com'}, "Hello, World"]
|
30
|
+
# node = H[:div, {class: 'wrap'}, link]
|
31
|
+
# node.select('div.wrap a.foo.bar[lang|=fr][href^=http]') do |a|
|
32
|
+
# p a.text
|
33
|
+
# end
|
34
|
+
#
|
35
|
+
class CssSelection < Selection
|
36
|
+
include Enumerable
|
37
|
+
|
38
|
+
# Create a new CssSelection based on a root node and a selector
|
39
|
+
#
|
40
|
+
# The selector can be unparsed (a plain +String+), or parsed. This class
|
41
|
+
# works recursively by passing a subset of the parsed selector to a subset
|
42
|
+
# of the tree, hence why this matters.
|
43
|
+
#
|
44
|
+
# @param [Hexp::Node] node
|
45
|
+
# Root node of the tree
|
46
|
+
#
|
47
|
+
# @param [String,Hexp::CssSelector::CommaSequence] css_selector
|
48
|
+
# CSS selector
|
49
|
+
#
|
50
|
+
# @api public
|
6
51
|
def initialize(node, css_selector)
|
7
|
-
@node, @css_selector = node, css_selector
|
52
|
+
@node, @css_selector = node.to_hexp, css_selector.freeze
|
8
53
|
end
|
9
54
|
|
55
|
+
# Debugging representation
|
56
|
+
#
|
57
|
+
# @return [String]
|
58
|
+
#
|
59
|
+
# @api public
|
10
60
|
def inspect
|
11
61
|
"#<#{self.class} @node=#{@node.inspect} @css_selector=#{@css_selector.inspect} matches=#{node_matches?}>"
|
12
62
|
end
|
13
63
|
|
64
|
+
# Iterate over the nodes that match
|
65
|
+
#
|
66
|
+
# @yieldparam [Hexp::Node]
|
67
|
+
# Each matching node
|
68
|
+
#
|
69
|
+
# @return [Enumerator,CssSelection]
|
70
|
+
# Enumerator if no block is given, or self
|
71
|
+
#
|
72
|
+
# @api public
|
14
73
|
def each(&block)
|
15
74
|
return to_enum(:each) unless block_given?
|
16
75
|
|
@@ -18,8 +77,17 @@ module Hexp
|
|
18
77
|
next_selection_for(child).each(&block)
|
19
78
|
end
|
20
79
|
yield @node if node_matches?
|
80
|
+
self
|
21
81
|
end
|
22
82
|
|
83
|
+
# Replace / alter each node that matches
|
84
|
+
#
|
85
|
+
# @yieldparam [Hexp::Node]
|
86
|
+
# Each matching node
|
87
|
+
#
|
88
|
+
# @return [Hexp::Node]
|
89
|
+
#
|
90
|
+
# @api private
|
23
91
|
def rewrite(&block)
|
24
92
|
return @node if @node.text?
|
25
93
|
|
@@ -35,29 +103,67 @@ module Hexp
|
|
35
103
|
|
36
104
|
private
|
37
105
|
|
106
|
+
# The CSS selector, parsed to a comma sequence
|
107
|
+
#
|
108
|
+
# @return [Hexp::CssSelector::CommaSequence]
|
109
|
+
#
|
110
|
+
# @api private
|
38
111
|
def comma_sequence
|
39
|
-
@comma_sequence ||=
|
112
|
+
@comma_sequence ||= parse_selector
|
40
113
|
end
|
41
114
|
|
42
|
-
|
43
|
-
|
115
|
+
# Parse the CSS selector, if it isn't in a parsed form already
|
116
|
+
#
|
117
|
+
# @return [Hexp::CssSelector::CommaSequence]
|
118
|
+
#
|
119
|
+
# @api private
|
120
|
+
def parse_selector
|
121
|
+
return @css_selector if @css_selector.is_a? CssSelector::CommaSequence
|
44
122
|
CssSelector::Parser.call(@css_selector)
|
45
123
|
end
|
46
124
|
|
125
|
+
# Is the current node part of the selection
|
126
|
+
#
|
127
|
+
# @return [true,false]
|
128
|
+
#
|
129
|
+
# @api private
|
47
130
|
def node_matches?
|
48
131
|
comma_sequence.matches?(@node)
|
49
132
|
end
|
50
133
|
|
51
|
-
#
|
52
|
-
#
|
134
|
+
# Consume the matching part of the comma sequence, return the rest
|
135
|
+
#
|
136
|
+
# Returns a new comma sequence with the parts removed that have been
|
137
|
+
# consumed by matching against this node. If no part matches, returns nil.
|
138
|
+
#
|
139
|
+
# @return [Hexp::CssSelector::CommaSequence]
|
140
|
+
#
|
141
|
+
# @api private
|
53
142
|
def next_comma_sequence
|
54
143
|
@next_comma_sequence ||= CssSelector::CommaSequence.new(consume_matching_heads)
|
55
144
|
end
|
56
145
|
|
146
|
+
# Recurse down a child down, passing in the remaining part of the selector
|
147
|
+
#
|
148
|
+
# @param [Hexp::Node] child
|
149
|
+
# One of the children of the node in this selection object
|
150
|
+
#
|
151
|
+
# @return [Hexp::Node::CssSelection]
|
152
|
+
#
|
153
|
+
# @api private
|
57
154
|
def next_selection_for(child)
|
58
155
|
self.class.new(child, next_comma_sequence)
|
59
156
|
end
|
60
157
|
|
158
|
+
# For each sequence in the comma sequence, remove the head if it matches
|
159
|
+
#
|
160
|
+
# For example, if this node is a `H[:div]`, and the selector is
|
161
|
+
# `span.foo, div a[:href]`, then the result of this method will be
|
162
|
+
# `span.foo, a[:href]`. This can then be used to match any child nodes.
|
163
|
+
#
|
164
|
+
# @return [Hexp::CssSelector::CommaSequence]
|
165
|
+
#
|
166
|
+
# @api private
|
61
167
|
def consume_matching_heads
|
62
168
|
comma_sequence.members.flat_map do |sequence|
|
63
169
|
if sequence.head_matches? @node
|
data/lib/hexp/node/domize.rb
CHANGED
@@ -9,16 +9,18 @@ module Hexp
|
|
9
9
|
# The resulting DOM Document
|
10
10
|
#
|
11
11
|
# @return [Nokogiri::HTML::Document]
|
12
|
-
# @api private
|
13
12
|
#
|
13
|
+
# @api private
|
14
14
|
attr_reader :dom
|
15
15
|
|
16
16
|
# Instanitiate a Domizer
|
17
17
|
#
|
18
|
-
# @param
|
19
|
-
# @param options [Hash] :include_doctype defaults to true
|
20
|
-
# @api private
|
18
|
+
# @param [Hexp::Node] hexp
|
21
19
|
#
|
20
|
+
# @param [Hash] options
|
21
|
+
# @options options [true,false] :include_doctype Defaults to true
|
22
|
+
#
|
23
|
+
# @api private
|
22
24
|
def initialize(hexp, options = {})
|
23
25
|
@dom = Hexp::DOM
|
24
26
|
@raw = hexp
|
@@ -28,10 +30,14 @@ module Hexp
|
|
28
30
|
# Turn the hexp into a DOM
|
29
31
|
#
|
30
32
|
# @return [Nokogiri::HTML::Document]
|
31
|
-
# @api private
|
32
33
|
#
|
34
|
+
# @api private
|
33
35
|
def call
|
34
36
|
@doc = dom::Document.new
|
37
|
+
if @options[:html5]
|
38
|
+
@doc.children = dom::NodeSet.new(@doc, [])
|
39
|
+
@doc.create_internal_subset(nil, nil, nil)
|
40
|
+
end
|
35
41
|
@root = domize(@raw)
|
36
42
|
@doc << @root
|
37
43
|
|
@@ -46,10 +52,11 @@ module Hexp
|
|
46
52
|
|
47
53
|
# Turn a Hexp::Node into a Document
|
48
54
|
#
|
49
|
-
# @param
|
55
|
+
# @param [Hexp::Node] hexp
|
56
|
+
#
|
50
57
|
# @return [Nokogiri::HTML::Document]
|
51
|
-
# @api private
|
52
58
|
#
|
59
|
+
# @api private
|
53
60
|
def domize(hexp)
|
54
61
|
dom::Node.new(hexp.tag.to_s, @doc).tap do |node|
|
55
62
|
set_attributes(node, hexp.attributes)
|
@@ -59,11 +66,12 @@ module Hexp
|
|
59
66
|
|
60
67
|
# Set attributes on a DOM node
|
61
68
|
#
|
62
|
-
# @param
|
63
|
-
# @param
|
69
|
+
# @param [Nokogiri::XML::Element] node
|
70
|
+
# @param [Hash] attributes
|
71
|
+
#
|
64
72
|
# @return [void]
|
65
|
-
# @api private
|
66
73
|
#
|
74
|
+
# @api private
|
67
75
|
def set_attributes(node, attributes)
|
68
76
|
attributes.each do |key,value|
|
69
77
|
node[key] = value
|
@@ -72,11 +80,12 @@ module Hexp
|
|
72
80
|
|
73
81
|
# Set children on the DOM node
|
74
82
|
#
|
75
|
-
# @param
|
76
|
-
# @param
|
83
|
+
# @param [Nokogiri::XML::Element] node
|
84
|
+
# @param [Hexp::List] children
|
85
|
+
#
|
77
86
|
# @return [void]
|
78
|
-
# @api private
|
79
87
|
#
|
88
|
+
# @api private
|
80
89
|
def set_children(node, children)
|
81
90
|
children.each do |child|
|
82
91
|
if child.instance_of?(TextNode)
|
data/lib/hexp/node/normalize.rb
CHANGED
@@ -11,7 +11,6 @@ module Hexp
|
|
11
11
|
# Hexp::Node::Normalize.new([:p, {class:'foo'}])
|
12
12
|
#
|
13
13
|
# @api public
|
14
|
-
#
|
15
14
|
def initialize(node)
|
16
15
|
@raw = node
|
17
16
|
end
|
@@ -21,7 +20,6 @@ module Hexp
|
|
21
20
|
# @return [Array] strict hexp node
|
22
21
|
#
|
23
22
|
# @api private
|
24
|
-
#
|
25
23
|
def call
|
26
24
|
[@raw.first, normalized_attributes, normalized_children]
|
27
25
|
end
|
@@ -33,7 +31,6 @@ module Hexp
|
|
33
31
|
# @return [Hash] the attributes hash
|
34
32
|
#
|
35
33
|
# @api private
|
36
|
-
#
|
37
34
|
def attributes
|
38
35
|
attrs = @raw[1]
|
39
36
|
return attrs if attrs.instance_of?(Hash)
|
@@ -45,7 +42,6 @@ module Hexp
|
|
45
42
|
# @return [Hash]
|
46
43
|
#
|
47
44
|
# @api private
|
48
|
-
#
|
49
45
|
def normalized_attributes
|
50
46
|
Hash[*
|
51
47
|
attributes.flat_map do |key, value|
|
@@ -59,7 +55,6 @@ module Hexp
|
|
59
55
|
# @return [Array] the list of child hexps, non-strict
|
60
56
|
#
|
61
57
|
# @api private
|
62
|
-
#
|
63
58
|
def children
|
64
59
|
last = @raw.last
|
65
60
|
if last.respond_to? :to_ary
|
@@ -76,18 +71,17 @@ module Hexp
|
|
76
71
|
# @return [Array] list of normalized hexps
|
77
72
|
#
|
78
73
|
# @api private
|
79
|
-
#
|
80
74
|
def normalized_children
|
81
75
|
Hexp::List[*
|
82
76
|
children.map do |child|
|
83
77
|
case child
|
84
|
-
when Hexp::Node
|
78
|
+
when Hexp::Node, Hexp::TextNode
|
85
79
|
child
|
86
|
-
when String
|
80
|
+
when String
|
87
81
|
Hexp::TextNode.new(child)
|
88
82
|
when ->(ch) { ch.respond_to? :to_hexp }
|
89
83
|
response = child.to_hexp
|
90
|
-
raise FormatError, "to_hexp must return a Hexp::Node, got #{response.inspect}" unless response.instance_of?(Hexp::Node)
|
84
|
+
raise FormatError, "to_hexp must return a Hexp::Node, got #{response.inspect}" unless response.instance_of?(Hexp::Node) || response.instance_of?(Hexp::TextNode)
|
91
85
|
response
|
92
86
|
when Array
|
93
87
|
Hexp::Node[*child.map(&:freeze)]
|
data/lib/hexp/node/pp.rb
CHANGED
@@ -4,9 +4,10 @@ module Hexp
|
|
4
4
|
class PP
|
5
5
|
# Create a new pretty-printer
|
6
6
|
#
|
7
|
-
# @param
|
8
|
-
#
|
7
|
+
# @param [Hexp::Node] node
|
8
|
+
# The node to represent
|
9
9
|
#
|
10
|
+
# @api private
|
10
11
|
def initialize(node)
|
11
12
|
@node = node
|
12
13
|
end
|
@@ -14,8 +15,8 @@ module Hexp
|
|
14
15
|
# Perform the pretty-printing
|
15
16
|
#
|
16
17
|
# @return [String] The pp output
|
17
|
-
# @api private
|
18
18
|
#
|
19
|
+
# @api private
|
19
20
|
def call
|
20
21
|
[
|
21
22
|
@node.class.inspect_name,
|
@@ -27,8 +28,8 @@ module Hexp
|
|
27
28
|
# Format the node tag
|
28
29
|
#
|
29
30
|
# @return [String]
|
30
|
-
# @api private
|
31
31
|
#
|
32
|
+
# @api private
|
32
33
|
def pp_tag
|
33
34
|
"[#{@node.tag.inspect}"
|
34
35
|
end
|
@@ -36,8 +37,8 @@ module Hexp
|
|
36
37
|
# Format the node attributes
|
37
38
|
#
|
38
39
|
# @return [String]
|
39
|
-
# @api private
|
40
40
|
#
|
41
|
+
# @api private
|
41
42
|
def pp_attributes
|
42
43
|
attrs = @node.attributes
|
43
44
|
return '' if attrs.empty?
|
@@ -47,8 +48,8 @@ module Hexp
|
|
47
48
|
# Format the node children
|
48
49
|
#
|
49
50
|
# @return [String]
|
50
|
-
# @api private
|
51
51
|
#
|
52
|
+
# @api private
|
52
53
|
def pp_children
|
53
54
|
children = @node.children
|
54
55
|
return ']' if children.empty?
|
@@ -57,11 +58,14 @@ module Hexp
|
|
57
58
|
|
58
59
|
# Indent a multiline string with a number of spaces
|
59
60
|
#
|
60
|
-
# @param
|
61
|
-
#
|
61
|
+
# @param [String] string
|
62
|
+
# The string to indent
|
63
|
+
# @param [Integer] indent
|
64
|
+
# The number of spaces to use for indentation
|
65
|
+
#
|
62
66
|
# @return [String]
|
63
|
-
# @api private
|
64
67
|
#
|
68
|
+
# @api private
|
65
69
|
def self.indent(string, indent = 2)
|
66
70
|
string.lines.map {|line| " "*indent + line}.join
|
67
71
|
end
|