hexp 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,16 +5,30 @@ module Hexp
5
5
  module Children
6
6
  # Is this node an empty node
7
7
  #
8
- # H[:p, class: 'foo'].empty? #=> true
9
- # H[:p, [H[:span]].empty? #=> false
8
+ # @example
9
+ # H[:p, class: 'foo'].empty? #=> true
10
+ # H[:p, [H[:span]].empty? #=> false
10
11
  #
11
- # @return [Boolean] true if this node has no children
12
- # @api public
12
+ # @return [true,false]
13
+ # True if this node has no children
13
14
  #
15
+ # @api public
14
16
  def empty?
15
17
  children.empty?
16
18
  end
17
19
 
20
+ # Add a child node to the end of the list of children
21
+ #
22
+ # @example
23
+ # H[:ul].add_child(H[:li, "chunky"]) #=> H[:ul, [H[:li, "chunky]]]
24
+ #
25
+ # @param [Hexp::Node] child
26
+ # The child node to add
27
+ #
28
+ # @return [Hexp::Node]
29
+ # A new node containing that has the child added to its children
30
+ #
31
+ # @api public
18
32
  def add_child(child)
19
33
  H[
20
34
  self.tag,
@@ -25,16 +39,57 @@ module Hexp
25
39
  alias :add :add_child
26
40
  alias :<< :add_child
27
41
 
42
+ # All the text in this node and its descendants
43
+ #
44
+ # Concatenates the contents of all text nodes.
45
+ #
46
+ # @example
47
+ # H[:div, [
48
+ # H[:span, "My name is"],
49
+ # " ",
50
+ # H[:strong, "@plexus"],
51
+ # "."
52
+ # ]
53
+ # ].text #=> "My name is @plexus."
54
+ #
55
+ # @return [String]
56
+ #
57
+ # @api public
28
58
  def text
29
59
  children.map do |node|
30
60
  node.text? ? node : node.text
31
61
  end.join
32
62
  end
33
63
 
64
+ # Replace the children of this node with a new list of children
65
+ #
66
+ # @example
67
+ # H[:div, "Hello"].set_children([H[:span, "wicked!"], H[:br]])
68
+ # # => H[:div, [H[:span, "wicked!"], H[:br]]]
69
+ #
70
+ # @param [Array,Hexp::NodeList] new_children
71
+ #
72
+ # @return [Hexp::Node]
73
+ #
74
+ # @api public
34
75
  def set_children(new_children)
35
76
  H[tag, attributes, new_children]
36
77
  end
37
78
 
79
+ # Perform an action on each child node, and replace the node with the result
80
+ #
81
+ # @example
82
+ # H[:div, [H[:span, "foo"]]].map_children do |node|
83
+ # node.add_class(node.text)
84
+ # end
85
+ # # => H[:div, [H[:span, {class: "foo"}, "foo"]]]
86
+ #
87
+ # @yieldparam [Hexp::Node]
88
+ # The child node
89
+ #
90
+ # @return [Hexp::Node]
91
+ #
92
+ # @api public
38
93
  def map_children(&blk)
39
94
  return to_enum(:map_children) unless block_given?
40
95
  H[tag, attributes, children.map(&blk)]
@@ -2,15 +2,74 @@ module Hexp
2
2
  class Node
3
3
  # Select nodes using CSS selectors
4
4
  #
5
- class CssSelection < Selector
5
+ # The main interface to this is {Hexp::Node#select}, although there's
6
+ # nothing stopping you from using this class directly.
7
+ #
8
+ # This class is +Enumerable+, and calling {#each} without a block will give
9
+ # you an +Enumerator+, so you have all Ruby's tasty list operations at your
10
+ # disposal.
11
+ #
12
+ # Only a subset of the
13
+ # {http://www.w3.org/TR/css3-selectors/ CSS 3 selector syntax}
14
+ # is supported. Parsing a selector that contains unsupported elements
15
+ # should raise an exception.
16
+ #
17
+ # * tag selector : +div+, +a+, +section+
18
+ # * class selector : +.big+, +.user_profile+
19
+ # * id selector : +#main_content+, +#sidebar+
20
+ # * attribute selectors : +[href]+, +[class~=foo]+, +[lang|=en]+
21
+ #
22
+ # Attribute selectors support
23
+ # {http://www.w3.org/TR/css3-selectors/#attribute-selectors all the operators of the CSS 3 spec}
24
+ #, so have a look there for more details.
25
+ #
26
+ # Of course you can combine all these.
27
+ #
28
+ # @example
29
+ # link = H[:a, {class: 'foo bar', lang: 'fr-be', href: 'http://example.com'}, "Hello, World"]
30
+ # node = H[:div, {class: 'wrap'}, link]
31
+ # node.select('div.wrap a.foo.bar[lang|=fr][href^=http]') do |a|
32
+ # p a.text
33
+ # end
34
+ #
35
+ class CssSelection < Selection
36
+ include Enumerable
37
+
38
+ # Create a new CssSelection based on a root node and a selector
39
+ #
40
+ # The selector can be unparsed (a plain +String+), or parsed. This class
41
+ # works recursively by passing a subset of the parsed selector to a subset
42
+ # of the tree, hence why this matters.
43
+ #
44
+ # @param [Hexp::Node] node
45
+ # Root node of the tree
46
+ #
47
+ # @param [String,Hexp::CssSelector::CommaSequence] css_selector
48
+ # CSS selector
49
+ #
50
+ # @api public
6
51
  def initialize(node, css_selector)
7
- @node, @css_selector = node, css_selector
52
+ @node, @css_selector = node.to_hexp, css_selector.freeze
8
53
  end
9
54
 
55
+ # Debugging representation
56
+ #
57
+ # @return [String]
58
+ #
59
+ # @api public
10
60
  def inspect
11
61
  "#<#{self.class} @node=#{@node.inspect} @css_selector=#{@css_selector.inspect} matches=#{node_matches?}>"
12
62
  end
13
63
 
64
+ # Iterate over the nodes that match
65
+ #
66
+ # @yieldparam [Hexp::Node]
67
+ # Each matching node
68
+ #
69
+ # @return [Enumerator,CssSelection]
70
+ # Enumerator if no block is given, or self
71
+ #
72
+ # @api public
14
73
  def each(&block)
15
74
  return to_enum(:each) unless block_given?
16
75
 
@@ -18,8 +77,17 @@ module Hexp
18
77
  next_selection_for(child).each(&block)
19
78
  end
20
79
  yield @node if node_matches?
80
+ self
21
81
  end
22
82
 
83
+ # Replace / alter each node that matches
84
+ #
85
+ # @yieldparam [Hexp::Node]
86
+ # Each matching node
87
+ #
88
+ # @return [Hexp::Node]
89
+ #
90
+ # @api private
23
91
  def rewrite(&block)
24
92
  return @node if @node.text?
25
93
 
@@ -35,29 +103,67 @@ module Hexp
35
103
 
36
104
  private
37
105
 
106
+ # The CSS selector, parsed to a comma sequence
107
+ #
108
+ # @return [Hexp::CssSelector::CommaSequence]
109
+ #
110
+ # @api private
38
111
  def comma_sequence
39
- @comma_sequence ||= coerce_to_comma_sequence(@css_selector)
112
+ @comma_sequence ||= parse_selector
40
113
  end
41
114
 
42
- def coerce_to_comma_sequence(css_selector)
43
- return css_selector if css_selector.is_a? CssSelector::CommaSequence
115
+ # Parse the CSS selector, if it isn't in a parsed form already
116
+ #
117
+ # @return [Hexp::CssSelector::CommaSequence]
118
+ #
119
+ # @api private
120
+ def parse_selector
121
+ return @css_selector if @css_selector.is_a? CssSelector::CommaSequence
44
122
  CssSelector::Parser.call(@css_selector)
45
123
  end
46
124
 
125
+ # Is the current node part of the selection
126
+ #
127
+ # @return [true,false]
128
+ #
129
+ # @api private
47
130
  def node_matches?
48
131
  comma_sequence.matches?(@node)
49
132
  end
50
133
 
51
- # returns a new commasequence with the parts removed that have been consumed by matching
52
- # against this node. If no part matches, return nil
134
+ # Consume the matching part of the comma sequence, return the rest
135
+ #
136
+ # Returns a new comma sequence with the parts removed that have been
137
+ # consumed by matching against this node. If no part matches, returns nil.
138
+ #
139
+ # @return [Hexp::CssSelector::CommaSequence]
140
+ #
141
+ # @api private
53
142
  def next_comma_sequence
54
143
  @next_comma_sequence ||= CssSelector::CommaSequence.new(consume_matching_heads)
55
144
  end
56
145
 
146
+ # Recurse down a child down, passing in the remaining part of the selector
147
+ #
148
+ # @param [Hexp::Node] child
149
+ # One of the children of the node in this selection object
150
+ #
151
+ # @return [Hexp::Node::CssSelection]
152
+ #
153
+ # @api private
57
154
  def next_selection_for(child)
58
155
  self.class.new(child, next_comma_sequence)
59
156
  end
60
157
 
158
+ # For each sequence in the comma sequence, remove the head if it matches
159
+ #
160
+ # For example, if this node is a `H[:div]`, and the selector is
161
+ # `span.foo, div a[:href]`, then the result of this method will be
162
+ # `span.foo, a[:href]`. This can then be used to match any child nodes.
163
+ #
164
+ # @return [Hexp::CssSelector::CommaSequence]
165
+ #
166
+ # @api private
61
167
  def consume_matching_heads
62
168
  comma_sequence.members.flat_map do |sequence|
63
169
  if sequence.head_matches? @node
@@ -9,16 +9,18 @@ module Hexp
9
9
  # The resulting DOM Document
10
10
  #
11
11
  # @return [Nokogiri::HTML::Document]
12
- # @api private
13
12
  #
13
+ # @api private
14
14
  attr_reader :dom
15
15
 
16
16
  # Instanitiate a Domizer
17
17
  #
18
- # @param hexp [Hexp::Node]
19
- # @param options [Hash] :include_doctype defaults to true
20
- # @api private
18
+ # @param [Hexp::Node] hexp
21
19
  #
20
+ # @param [Hash] options
21
+ # @options options [true,false] :include_doctype Defaults to true
22
+ #
23
+ # @api private
22
24
  def initialize(hexp, options = {})
23
25
  @dom = Hexp::DOM
24
26
  @raw = hexp
@@ -28,10 +30,14 @@ module Hexp
28
30
  # Turn the hexp into a DOM
29
31
  #
30
32
  # @return [Nokogiri::HTML::Document]
31
- # @api private
32
33
  #
34
+ # @api private
33
35
  def call
34
36
  @doc = dom::Document.new
37
+ if @options[:html5]
38
+ @doc.children = dom::NodeSet.new(@doc, [])
39
+ @doc.create_internal_subset(nil, nil, nil)
40
+ end
35
41
  @root = domize(@raw)
36
42
  @doc << @root
37
43
 
@@ -46,10 +52,11 @@ module Hexp
46
52
 
47
53
  # Turn a Hexp::Node into a Document
48
54
  #
49
- # @param hexp [Hexp::Node]
55
+ # @param [Hexp::Node] hexp
56
+ #
50
57
  # @return [Nokogiri::HTML::Document]
51
- # @api private
52
58
  #
59
+ # @api private
53
60
  def domize(hexp)
54
61
  dom::Node.new(hexp.tag.to_s, @doc).tap do |node|
55
62
  set_attributes(node, hexp.attributes)
@@ -59,11 +66,12 @@ module Hexp
59
66
 
60
67
  # Set attributes on a DOM node
61
68
  #
62
- # @param node [Nokogiri::XML::Element]
63
- # @param attributes [Hash]
69
+ # @param [Nokogiri::XML::Element] node
70
+ # @param [Hash] attributes
71
+ #
64
72
  # @return [void]
65
- # @api private
66
73
  #
74
+ # @api private
67
75
  def set_attributes(node, attributes)
68
76
  attributes.each do |key,value|
69
77
  node[key] = value
@@ -72,11 +80,12 @@ module Hexp
72
80
 
73
81
  # Set children on the DOM node
74
82
  #
75
- # @param node [Nokogiri::XML::Element]
76
- # @param children [Hexp::List]
83
+ # @param [Nokogiri::XML::Element] node
84
+ # @param [Hexp::List] children
85
+ #
77
86
  # @return [void]
78
- # @api private
79
87
  #
88
+ # @api private
80
89
  def set_children(node, children)
81
90
  children.each do |child|
82
91
  if child.instance_of?(TextNode)
@@ -11,7 +11,6 @@ module Hexp
11
11
  # Hexp::Node::Normalize.new([:p, {class:'foo'}])
12
12
  #
13
13
  # @api public
14
- #
15
14
  def initialize(node)
16
15
  @raw = node
17
16
  end
@@ -21,7 +20,6 @@ module Hexp
21
20
  # @return [Array] strict hexp node
22
21
  #
23
22
  # @api private
24
- #
25
23
  def call
26
24
  [@raw.first, normalized_attributes, normalized_children]
27
25
  end
@@ -33,7 +31,6 @@ module Hexp
33
31
  # @return [Hash] the attributes hash
34
32
  #
35
33
  # @api private
36
- #
37
34
  def attributes
38
35
  attrs = @raw[1]
39
36
  return attrs if attrs.instance_of?(Hash)
@@ -45,7 +42,6 @@ module Hexp
45
42
  # @return [Hash]
46
43
  #
47
44
  # @api private
48
- #
49
45
  def normalized_attributes
50
46
  Hash[*
51
47
  attributes.flat_map do |key, value|
@@ -59,7 +55,6 @@ module Hexp
59
55
  # @return [Array] the list of child hexps, non-strict
60
56
  #
61
57
  # @api private
62
- #
63
58
  def children
64
59
  last = @raw.last
65
60
  if last.respond_to? :to_ary
@@ -76,18 +71,17 @@ module Hexp
76
71
  # @return [Array] list of normalized hexps
77
72
  #
78
73
  # @api private
79
- #
80
74
  def normalized_children
81
75
  Hexp::List[*
82
76
  children.map do |child|
83
77
  case child
84
- when Hexp::Node
78
+ when Hexp::Node, Hexp::TextNode
85
79
  child
86
- when String, TextNode
80
+ when String
87
81
  Hexp::TextNode.new(child)
88
82
  when ->(ch) { ch.respond_to? :to_hexp }
89
83
  response = child.to_hexp
90
- raise FormatError, "to_hexp must return a Hexp::Node, got #{response.inspect}" unless response.instance_of?(Hexp::Node)
84
+ raise FormatError, "to_hexp must return a Hexp::Node, got #{response.inspect}" unless response.instance_of?(Hexp::Node) || response.instance_of?(Hexp::TextNode)
91
85
  response
92
86
  when Array
93
87
  Hexp::Node[*child.map(&:freeze)]
data/lib/hexp/node/pp.rb CHANGED
@@ -4,9 +4,10 @@ module Hexp
4
4
  class PP
5
5
  # Create a new pretty-printer
6
6
  #
7
- # @param node [Hexp::Node] The node to represent
8
- # @api private
7
+ # @param [Hexp::Node] node
8
+ # The node to represent
9
9
  #
10
+ # @api private
10
11
  def initialize(node)
11
12
  @node = node
12
13
  end
@@ -14,8 +15,8 @@ module Hexp
14
15
  # Perform the pretty-printing
15
16
  #
16
17
  # @return [String] The pp output
17
- # @api private
18
18
  #
19
+ # @api private
19
20
  def call
20
21
  [
21
22
  @node.class.inspect_name,
@@ -27,8 +28,8 @@ module Hexp
27
28
  # Format the node tag
28
29
  #
29
30
  # @return [String]
30
- # @api private
31
31
  #
32
+ # @api private
32
33
  def pp_tag
33
34
  "[#{@node.tag.inspect}"
34
35
  end
@@ -36,8 +37,8 @@ module Hexp
36
37
  # Format the node attributes
37
38
  #
38
39
  # @return [String]
39
- # @api private
40
40
  #
41
+ # @api private
41
42
  def pp_attributes
42
43
  attrs = @node.attributes
43
44
  return '' if attrs.empty?
@@ -47,8 +48,8 @@ module Hexp
47
48
  # Format the node children
48
49
  #
49
50
  # @return [String]
50
- # @api private
51
51
  #
52
+ # @api private
52
53
  def pp_children
53
54
  children = @node.children
54
55
  return ']' if children.empty?
@@ -57,11 +58,14 @@ module Hexp
57
58
 
58
59
  # Indent a multiline string with a number of spaces
59
60
  #
60
- # @param string [String] The string to indent
61
- # @param indent [Integer] The number of spaces to use for indentation
61
+ # @param [String] string
62
+ # The string to indent
63
+ # @param [Integer] indent
64
+ # The number of spaces to use for indentation
65
+ #
62
66
  # @return [String]
63
- # @api private
64
67
  #
68
+ # @api private
65
69
  def self.indent(string, indent = 2)
66
70
  string.lines.map {|line| " "*indent + line}.join
67
71
  end