hexp 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +4 -0
- data/.travis.yml +2 -3
- data/.yardopts +1 -0
- data/Gemfile.devtools +17 -24
- data/Gemfile.lock +77 -82
- data/{LICENSE.md → LICENSE} +0 -0
- data/README.md +20 -10
- data/Rakefile +17 -1
- data/bench/node/rewrite_bench.rb +23 -0
- data/config/flay.yml +1 -1
- data/config/flog.yml +1 -1
- data/config/reek.yml +6 -0
- data/config/yardstick.yml +7 -2
- data/hexp.gemspec +6 -5
- data/lib/hexp.rb +26 -18
- data/lib/hexp/builder.rb +54 -35
- data/lib/hexp/css_selector.rb +124 -25
- data/lib/hexp/css_selector/parser.rb +45 -2
- data/lib/hexp/css_selector/sass_parser.rb +16 -0
- data/lib/hexp/dom.rb +2 -0
- data/lib/hexp/dsl.rb +20 -21
- data/lib/hexp/errors.rb +2 -2
- data/lib/hexp/list.rb +14 -11
- data/lib/hexp/node.rb +89 -38
- data/lib/hexp/node/attributes.rb +43 -26
- data/lib/hexp/node/children.rb +59 -4
- data/lib/hexp/node/css_selection.rb +113 -7
- data/lib/hexp/node/domize.rb +22 -13
- data/lib/hexp/node/normalize.rb +3 -9
- data/lib/hexp/node/pp.rb +13 -9
- data/lib/hexp/node/rewriter.rb +28 -3
- data/lib/hexp/node/{selector.rb → selection.rb} +48 -2
- data/lib/hexp/nokogiri/reader.rb +2 -2
- data/lib/hexp/text_node.rb +1 -1
- data/lib/hexp/version.rb +1 -1
- data/spec/unit/hexp/css_selector/universal_spec.rb +7 -0
- data/spec/unit/hexp/node/domize_spec.rb +12 -0
- data/spec/unit/hexp/node/{selector_spec.rb → selection_spec.rb} +9 -9
- data/spec/unit/hexp/parse_spec.rb +10 -0
- metadata +40 -44
- data/SPEC.md +0 -53
- data/notes +0 -34
data/lib/hexp/node/children.rb
CHANGED
@@ -5,16 +5,30 @@ module Hexp
|
|
5
5
|
module Children
|
6
6
|
# Is this node an empty node
|
7
7
|
#
|
8
|
-
#
|
9
|
-
#
|
8
|
+
# @example
|
9
|
+
# H[:p, class: 'foo'].empty? #=> true
|
10
|
+
# H[:p, [H[:span]].empty? #=> false
|
10
11
|
#
|
11
|
-
# @return [
|
12
|
-
#
|
12
|
+
# @return [true,false]
|
13
|
+
# True if this node has no children
|
13
14
|
#
|
15
|
+
# @api public
|
14
16
|
def empty?
|
15
17
|
children.empty?
|
16
18
|
end
|
17
19
|
|
20
|
+
# Add a child node to the end of the list of children
|
21
|
+
#
|
22
|
+
# @example
|
23
|
+
# H[:ul].add_child(H[:li, "chunky"]) #=> H[:ul, [H[:li, "chunky]]]
|
24
|
+
#
|
25
|
+
# @param [Hexp::Node] child
|
26
|
+
# The child node to add
|
27
|
+
#
|
28
|
+
# @return [Hexp::Node]
|
29
|
+
# A new node containing that has the child added to its children
|
30
|
+
#
|
31
|
+
# @api public
|
18
32
|
def add_child(child)
|
19
33
|
H[
|
20
34
|
self.tag,
|
@@ -25,16 +39,57 @@ module Hexp
|
|
25
39
|
alias :add :add_child
|
26
40
|
alias :<< :add_child
|
27
41
|
|
42
|
+
# All the text in this node and its descendants
|
43
|
+
#
|
44
|
+
# Concatenates the contents of all text nodes.
|
45
|
+
#
|
46
|
+
# @example
|
47
|
+
# H[:div, [
|
48
|
+
# H[:span, "My name is"],
|
49
|
+
# " ",
|
50
|
+
# H[:strong, "@plexus"],
|
51
|
+
# "."
|
52
|
+
# ]
|
53
|
+
# ].text #=> "My name is @plexus."
|
54
|
+
#
|
55
|
+
# @return [String]
|
56
|
+
#
|
57
|
+
# @api public
|
28
58
|
def text
|
29
59
|
children.map do |node|
|
30
60
|
node.text? ? node : node.text
|
31
61
|
end.join
|
32
62
|
end
|
33
63
|
|
64
|
+
# Replace the children of this node with a new list of children
|
65
|
+
#
|
66
|
+
# @example
|
67
|
+
# H[:div, "Hello"].set_children([H[:span, "wicked!"], H[:br]])
|
68
|
+
# # => H[:div, [H[:span, "wicked!"], H[:br]]]
|
69
|
+
#
|
70
|
+
# @param [Array,Hexp::NodeList] new_children
|
71
|
+
#
|
72
|
+
# @return [Hexp::Node]
|
73
|
+
#
|
74
|
+
# @api public
|
34
75
|
def set_children(new_children)
|
35
76
|
H[tag, attributes, new_children]
|
36
77
|
end
|
37
78
|
|
79
|
+
# Perform an action on each child node, and replace the node with the result
|
80
|
+
#
|
81
|
+
# @example
|
82
|
+
# H[:div, [H[:span, "foo"]]].map_children do |node|
|
83
|
+
# node.add_class(node.text)
|
84
|
+
# end
|
85
|
+
# # => H[:div, [H[:span, {class: "foo"}, "foo"]]]
|
86
|
+
#
|
87
|
+
# @yieldparam [Hexp::Node]
|
88
|
+
# The child node
|
89
|
+
#
|
90
|
+
# @return [Hexp::Node]
|
91
|
+
#
|
92
|
+
# @api public
|
38
93
|
def map_children(&blk)
|
39
94
|
return to_enum(:map_children) unless block_given?
|
40
95
|
H[tag, attributes, children.map(&blk)]
|
@@ -2,15 +2,74 @@ module Hexp
|
|
2
2
|
class Node
|
3
3
|
# Select nodes using CSS selectors
|
4
4
|
#
|
5
|
-
|
5
|
+
# The main interface to this is {Hexp::Node#select}, although there's
|
6
|
+
# nothing stopping you from using this class directly.
|
7
|
+
#
|
8
|
+
# This class is +Enumerable+, and calling {#each} without a block will give
|
9
|
+
# you an +Enumerator+, so you have all Ruby's tasty list operations at your
|
10
|
+
# disposal.
|
11
|
+
#
|
12
|
+
# Only a subset of the
|
13
|
+
# {http://www.w3.org/TR/css3-selectors/ CSS 3 selector syntax}
|
14
|
+
# is supported. Parsing a selector that contains unsupported elements
|
15
|
+
# should raise an exception.
|
16
|
+
#
|
17
|
+
# * tag selector : +div+, +a+, +section+
|
18
|
+
# * class selector : +.big+, +.user_profile+
|
19
|
+
# * id selector : +#main_content+, +#sidebar+
|
20
|
+
# * attribute selectors : +[href]+, +[class~=foo]+, +[lang|=en]+
|
21
|
+
#
|
22
|
+
# Attribute selectors support
|
23
|
+
# {http://www.w3.org/TR/css3-selectors/#attribute-selectors all the operators of the CSS 3 spec}
|
24
|
+
#, so have a look there for more details.
|
25
|
+
#
|
26
|
+
# Of course you can combine all these.
|
27
|
+
#
|
28
|
+
# @example
|
29
|
+
# link = H[:a, {class: 'foo bar', lang: 'fr-be', href: 'http://example.com'}, "Hello, World"]
|
30
|
+
# node = H[:div, {class: 'wrap'}, link]
|
31
|
+
# node.select('div.wrap a.foo.bar[lang|=fr][href^=http]') do |a|
|
32
|
+
# p a.text
|
33
|
+
# end
|
34
|
+
#
|
35
|
+
class CssSelection < Selection
|
36
|
+
include Enumerable
|
37
|
+
|
38
|
+
# Create a new CssSelection based on a root node and a selector
|
39
|
+
#
|
40
|
+
# The selector can be unparsed (a plain +String+), or parsed. This class
|
41
|
+
# works recursively by passing a subset of the parsed selector to a subset
|
42
|
+
# of the tree, hence why this matters.
|
43
|
+
#
|
44
|
+
# @param [Hexp::Node] node
|
45
|
+
# Root node of the tree
|
46
|
+
#
|
47
|
+
# @param [String,Hexp::CssSelector::CommaSequence] css_selector
|
48
|
+
# CSS selector
|
49
|
+
#
|
50
|
+
# @api public
|
6
51
|
def initialize(node, css_selector)
|
7
|
-
@node, @css_selector = node, css_selector
|
52
|
+
@node, @css_selector = node.to_hexp, css_selector.freeze
|
8
53
|
end
|
9
54
|
|
55
|
+
# Debugging representation
|
56
|
+
#
|
57
|
+
# @return [String]
|
58
|
+
#
|
59
|
+
# @api public
|
10
60
|
def inspect
|
11
61
|
"#<#{self.class} @node=#{@node.inspect} @css_selector=#{@css_selector.inspect} matches=#{node_matches?}>"
|
12
62
|
end
|
13
63
|
|
64
|
+
# Iterate over the nodes that match
|
65
|
+
#
|
66
|
+
# @yieldparam [Hexp::Node]
|
67
|
+
# Each matching node
|
68
|
+
#
|
69
|
+
# @return [Enumerator,CssSelection]
|
70
|
+
# Enumerator if no block is given, or self
|
71
|
+
#
|
72
|
+
# @api public
|
14
73
|
def each(&block)
|
15
74
|
return to_enum(:each) unless block_given?
|
16
75
|
|
@@ -18,8 +77,17 @@ module Hexp
|
|
18
77
|
next_selection_for(child).each(&block)
|
19
78
|
end
|
20
79
|
yield @node if node_matches?
|
80
|
+
self
|
21
81
|
end
|
22
82
|
|
83
|
+
# Replace / alter each node that matches
|
84
|
+
#
|
85
|
+
# @yieldparam [Hexp::Node]
|
86
|
+
# Each matching node
|
87
|
+
#
|
88
|
+
# @return [Hexp::Node]
|
89
|
+
#
|
90
|
+
# @api private
|
23
91
|
def rewrite(&block)
|
24
92
|
return @node if @node.text?
|
25
93
|
|
@@ -35,29 +103,67 @@ module Hexp
|
|
35
103
|
|
36
104
|
private
|
37
105
|
|
106
|
+
# The CSS selector, parsed to a comma sequence
|
107
|
+
#
|
108
|
+
# @return [Hexp::CssSelector::CommaSequence]
|
109
|
+
#
|
110
|
+
# @api private
|
38
111
|
def comma_sequence
|
39
|
-
@comma_sequence ||=
|
112
|
+
@comma_sequence ||= parse_selector
|
40
113
|
end
|
41
114
|
|
42
|
-
|
43
|
-
|
115
|
+
# Parse the CSS selector, if it isn't in a parsed form already
|
116
|
+
#
|
117
|
+
# @return [Hexp::CssSelector::CommaSequence]
|
118
|
+
#
|
119
|
+
# @api private
|
120
|
+
def parse_selector
|
121
|
+
return @css_selector if @css_selector.is_a? CssSelector::CommaSequence
|
44
122
|
CssSelector::Parser.call(@css_selector)
|
45
123
|
end
|
46
124
|
|
125
|
+
# Is the current node part of the selection
|
126
|
+
#
|
127
|
+
# @return [true,false]
|
128
|
+
#
|
129
|
+
# @api private
|
47
130
|
def node_matches?
|
48
131
|
comma_sequence.matches?(@node)
|
49
132
|
end
|
50
133
|
|
51
|
-
#
|
52
|
-
#
|
134
|
+
# Consume the matching part of the comma sequence, return the rest
|
135
|
+
#
|
136
|
+
# Returns a new comma sequence with the parts removed that have been
|
137
|
+
# consumed by matching against this node. If no part matches, returns nil.
|
138
|
+
#
|
139
|
+
# @return [Hexp::CssSelector::CommaSequence]
|
140
|
+
#
|
141
|
+
# @api private
|
53
142
|
def next_comma_sequence
|
54
143
|
@next_comma_sequence ||= CssSelector::CommaSequence.new(consume_matching_heads)
|
55
144
|
end
|
56
145
|
|
146
|
+
# Recurse down a child down, passing in the remaining part of the selector
|
147
|
+
#
|
148
|
+
# @param [Hexp::Node] child
|
149
|
+
# One of the children of the node in this selection object
|
150
|
+
#
|
151
|
+
# @return [Hexp::Node::CssSelection]
|
152
|
+
#
|
153
|
+
# @api private
|
57
154
|
def next_selection_for(child)
|
58
155
|
self.class.new(child, next_comma_sequence)
|
59
156
|
end
|
60
157
|
|
158
|
+
# For each sequence in the comma sequence, remove the head if it matches
|
159
|
+
#
|
160
|
+
# For example, if this node is a `H[:div]`, and the selector is
|
161
|
+
# `span.foo, div a[:href]`, then the result of this method will be
|
162
|
+
# `span.foo, a[:href]`. This can then be used to match any child nodes.
|
163
|
+
#
|
164
|
+
# @return [Hexp::CssSelector::CommaSequence]
|
165
|
+
#
|
166
|
+
# @api private
|
61
167
|
def consume_matching_heads
|
62
168
|
comma_sequence.members.flat_map do |sequence|
|
63
169
|
if sequence.head_matches? @node
|
data/lib/hexp/node/domize.rb
CHANGED
@@ -9,16 +9,18 @@ module Hexp
|
|
9
9
|
# The resulting DOM Document
|
10
10
|
#
|
11
11
|
# @return [Nokogiri::HTML::Document]
|
12
|
-
# @api private
|
13
12
|
#
|
13
|
+
# @api private
|
14
14
|
attr_reader :dom
|
15
15
|
|
16
16
|
# Instanitiate a Domizer
|
17
17
|
#
|
18
|
-
# @param
|
19
|
-
# @param options [Hash] :include_doctype defaults to true
|
20
|
-
# @api private
|
18
|
+
# @param [Hexp::Node] hexp
|
21
19
|
#
|
20
|
+
# @param [Hash] options
|
21
|
+
# @options options [true,false] :include_doctype Defaults to true
|
22
|
+
#
|
23
|
+
# @api private
|
22
24
|
def initialize(hexp, options = {})
|
23
25
|
@dom = Hexp::DOM
|
24
26
|
@raw = hexp
|
@@ -28,10 +30,14 @@ module Hexp
|
|
28
30
|
# Turn the hexp into a DOM
|
29
31
|
#
|
30
32
|
# @return [Nokogiri::HTML::Document]
|
31
|
-
# @api private
|
32
33
|
#
|
34
|
+
# @api private
|
33
35
|
def call
|
34
36
|
@doc = dom::Document.new
|
37
|
+
if @options[:html5]
|
38
|
+
@doc.children = dom::NodeSet.new(@doc, [])
|
39
|
+
@doc.create_internal_subset(nil, nil, nil)
|
40
|
+
end
|
35
41
|
@root = domize(@raw)
|
36
42
|
@doc << @root
|
37
43
|
|
@@ -46,10 +52,11 @@ module Hexp
|
|
46
52
|
|
47
53
|
# Turn a Hexp::Node into a Document
|
48
54
|
#
|
49
|
-
# @param
|
55
|
+
# @param [Hexp::Node] hexp
|
56
|
+
#
|
50
57
|
# @return [Nokogiri::HTML::Document]
|
51
|
-
# @api private
|
52
58
|
#
|
59
|
+
# @api private
|
53
60
|
def domize(hexp)
|
54
61
|
dom::Node.new(hexp.tag.to_s, @doc).tap do |node|
|
55
62
|
set_attributes(node, hexp.attributes)
|
@@ -59,11 +66,12 @@ module Hexp
|
|
59
66
|
|
60
67
|
# Set attributes on a DOM node
|
61
68
|
#
|
62
|
-
# @param
|
63
|
-
# @param
|
69
|
+
# @param [Nokogiri::XML::Element] node
|
70
|
+
# @param [Hash] attributes
|
71
|
+
#
|
64
72
|
# @return [void]
|
65
|
-
# @api private
|
66
73
|
#
|
74
|
+
# @api private
|
67
75
|
def set_attributes(node, attributes)
|
68
76
|
attributes.each do |key,value|
|
69
77
|
node[key] = value
|
@@ -72,11 +80,12 @@ module Hexp
|
|
72
80
|
|
73
81
|
# Set children on the DOM node
|
74
82
|
#
|
75
|
-
# @param
|
76
|
-
# @param
|
83
|
+
# @param [Nokogiri::XML::Element] node
|
84
|
+
# @param [Hexp::List] children
|
85
|
+
#
|
77
86
|
# @return [void]
|
78
|
-
# @api private
|
79
87
|
#
|
88
|
+
# @api private
|
80
89
|
def set_children(node, children)
|
81
90
|
children.each do |child|
|
82
91
|
if child.instance_of?(TextNode)
|
data/lib/hexp/node/normalize.rb
CHANGED
@@ -11,7 +11,6 @@ module Hexp
|
|
11
11
|
# Hexp::Node::Normalize.new([:p, {class:'foo'}])
|
12
12
|
#
|
13
13
|
# @api public
|
14
|
-
#
|
15
14
|
def initialize(node)
|
16
15
|
@raw = node
|
17
16
|
end
|
@@ -21,7 +20,6 @@ module Hexp
|
|
21
20
|
# @return [Array] strict hexp node
|
22
21
|
#
|
23
22
|
# @api private
|
24
|
-
#
|
25
23
|
def call
|
26
24
|
[@raw.first, normalized_attributes, normalized_children]
|
27
25
|
end
|
@@ -33,7 +31,6 @@ module Hexp
|
|
33
31
|
# @return [Hash] the attributes hash
|
34
32
|
#
|
35
33
|
# @api private
|
36
|
-
#
|
37
34
|
def attributes
|
38
35
|
attrs = @raw[1]
|
39
36
|
return attrs if attrs.instance_of?(Hash)
|
@@ -45,7 +42,6 @@ module Hexp
|
|
45
42
|
# @return [Hash]
|
46
43
|
#
|
47
44
|
# @api private
|
48
|
-
#
|
49
45
|
def normalized_attributes
|
50
46
|
Hash[*
|
51
47
|
attributes.flat_map do |key, value|
|
@@ -59,7 +55,6 @@ module Hexp
|
|
59
55
|
# @return [Array] the list of child hexps, non-strict
|
60
56
|
#
|
61
57
|
# @api private
|
62
|
-
#
|
63
58
|
def children
|
64
59
|
last = @raw.last
|
65
60
|
if last.respond_to? :to_ary
|
@@ -76,18 +71,17 @@ module Hexp
|
|
76
71
|
# @return [Array] list of normalized hexps
|
77
72
|
#
|
78
73
|
# @api private
|
79
|
-
#
|
80
74
|
def normalized_children
|
81
75
|
Hexp::List[*
|
82
76
|
children.map do |child|
|
83
77
|
case child
|
84
|
-
when Hexp::Node
|
78
|
+
when Hexp::Node, Hexp::TextNode
|
85
79
|
child
|
86
|
-
when String
|
80
|
+
when String
|
87
81
|
Hexp::TextNode.new(child)
|
88
82
|
when ->(ch) { ch.respond_to? :to_hexp }
|
89
83
|
response = child.to_hexp
|
90
|
-
raise FormatError, "to_hexp must return a Hexp::Node, got #{response.inspect}" unless response.instance_of?(Hexp::Node)
|
84
|
+
raise FormatError, "to_hexp must return a Hexp::Node, got #{response.inspect}" unless response.instance_of?(Hexp::Node) || response.instance_of?(Hexp::TextNode)
|
91
85
|
response
|
92
86
|
when Array
|
93
87
|
Hexp::Node[*child.map(&:freeze)]
|
data/lib/hexp/node/pp.rb
CHANGED
@@ -4,9 +4,10 @@ module Hexp
|
|
4
4
|
class PP
|
5
5
|
# Create a new pretty-printer
|
6
6
|
#
|
7
|
-
# @param
|
8
|
-
#
|
7
|
+
# @param [Hexp::Node] node
|
8
|
+
# The node to represent
|
9
9
|
#
|
10
|
+
# @api private
|
10
11
|
def initialize(node)
|
11
12
|
@node = node
|
12
13
|
end
|
@@ -14,8 +15,8 @@ module Hexp
|
|
14
15
|
# Perform the pretty-printing
|
15
16
|
#
|
16
17
|
# @return [String] The pp output
|
17
|
-
# @api private
|
18
18
|
#
|
19
|
+
# @api private
|
19
20
|
def call
|
20
21
|
[
|
21
22
|
@node.class.inspect_name,
|
@@ -27,8 +28,8 @@ module Hexp
|
|
27
28
|
# Format the node tag
|
28
29
|
#
|
29
30
|
# @return [String]
|
30
|
-
# @api private
|
31
31
|
#
|
32
|
+
# @api private
|
32
33
|
def pp_tag
|
33
34
|
"[#{@node.tag.inspect}"
|
34
35
|
end
|
@@ -36,8 +37,8 @@ module Hexp
|
|
36
37
|
# Format the node attributes
|
37
38
|
#
|
38
39
|
# @return [String]
|
39
|
-
# @api private
|
40
40
|
#
|
41
|
+
# @api private
|
41
42
|
def pp_attributes
|
42
43
|
attrs = @node.attributes
|
43
44
|
return '' if attrs.empty?
|
@@ -47,8 +48,8 @@ module Hexp
|
|
47
48
|
# Format the node children
|
48
49
|
#
|
49
50
|
# @return [String]
|
50
|
-
# @api private
|
51
51
|
#
|
52
|
+
# @api private
|
52
53
|
def pp_children
|
53
54
|
children = @node.children
|
54
55
|
return ']' if children.empty?
|
@@ -57,11 +58,14 @@ module Hexp
|
|
57
58
|
|
58
59
|
# Indent a multiline string with a number of spaces
|
59
60
|
#
|
60
|
-
# @param
|
61
|
-
#
|
61
|
+
# @param [String] string
|
62
|
+
# The string to indent
|
63
|
+
# @param [Integer] indent
|
64
|
+
# The number of spaces to use for indentation
|
65
|
+
#
|
62
66
|
# @return [String]
|
63
|
-
# @api private
|
64
67
|
#
|
68
|
+
# @api private
|
65
69
|
def self.indent(string, indent = 2)
|
66
70
|
string.lines.map {|line| " "*indent + line}.join
|
67
71
|
end
|