micromicro 2.0.1 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,38 +4,47 @@ module MicroMicro
4
4
  class Item
5
5
  include Collectible
6
6
 
7
- # Extract items from a context.
8
- #
9
- # @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
10
- # @return [Array<MicroMicro::Item>]
11
- def self.from_context(context)
12
- node_set_from(context).map { |node| new(node) }
13
- end
7
+ class ItemNodeSearch
8
+ attr_reader :node_set
14
9
 
15
- # Extract item nodes from a context.
16
- #
17
- # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
18
- # @param node_set [Nokogiri::XML::NodeSet]
19
- # @return [Nokogiri::XML::NodeSet]
20
- # rubocop:disable Metrics
21
- def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
22
- context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
23
-
24
- if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
25
- if Helpers.item_node?(context)
26
- node_set << context unless Helpers.item_nodes?(context.ancestors) && Helpers.property_node?(context)
27
- else
28
- node_set_from(context.element_children, node_set)
10
+ def initialize(document)
11
+ @node_set = Nokogiri::XML::NodeSet.new(document, [])
12
+ end
13
+
14
+ # rubocop:disable Metrics
15
+ def search(context)
16
+ context.each { |node| search(node) } if context.is_a?(Nokogiri::XML::NodeSet)
17
+
18
+ if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
19
+ if Helpers.item_node?(context)
20
+ node_set << context unless Helpers.item_nodes?(context.ancestors) && Helpers.property_node?(context)
21
+ else
22
+ search(context.element_children)
23
+ end
29
24
  end
25
+
26
+ node_set
30
27
  end
28
+ # rubocop:enable Metrics
29
+ end
30
+
31
+ private_constant :ItemNodeSearch
31
32
 
32
- node_set
33
+ # Extract {MicroMicro::Item}s from a context.
34
+ #
35
+ # @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
36
+ # @return [Array<MicroMicro::Item>]
37
+ def self.from_context(context)
38
+ ItemNodeSearch
39
+ .new(context.document)
40
+ .search(context)
41
+ .map { |node| new(node) }
33
42
  end
34
- # rubocop:enable Metrics
35
43
 
36
44
  # Parse a node for microformats2-encoded data.
37
45
  #
38
46
  # @param node [Nokogiri::XML::Element]
47
+ # @return [MicroMicro::Item]
39
48
  def initialize(node)
40
49
  @node = node
41
50
 
@@ -44,24 +53,40 @@ module MicroMicro
44
53
  properties << implied_url if implied_url?
45
54
  end
46
55
 
47
- # A collection of child items parsed from the node.
56
+ # A collection of child {MicroMicro::Item}s parsed from the node.
48
57
  #
49
58
  # @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
59
+ # microformats.org: microformats2 parsing specification § Parse an element for class microformats
50
60
  #
51
61
  # @return [MicroMicro::Collections::ItemsCollection]
52
62
  def children
53
63
  @children ||= Collections::ItemsCollection.new(self.class.from_context(node.element_children))
54
64
  end
55
65
 
56
- # The value of the node's `id` attribute, if present.
66
+ # Does this {MicroMicro::Item} contain any child {MicroMicro::Item}s?
67
+ #
68
+ # @return [Boolean]
69
+ def children?
70
+ children.any?
71
+ end
72
+
73
+ # The value of the node's +id+ attribute, if present.
57
74
  #
58
75
  # @return [String, nil]
59
76
  def id
60
77
  @id ||= node['id']&.strip
61
78
  end
62
79
 
63
- # :nocov:
80
+ # Does this {MicroMicro::Item} have an +id+ attribute value?
81
+ #
82
+ # @return [Boolean]
83
+ def id?
84
+ id.present?
85
+ end
86
+
64
87
  # @return [String]
88
+ #
89
+ # :nocov:
65
90
  def inspect
66
91
  "#<#{self.class}:#{format('%#0x', object_id)} " \
67
92
  "types: #{types.inspect}, " \
@@ -70,23 +95,23 @@ module MicroMicro
70
95
  end
71
96
  # :nocov:
72
97
 
73
- # A collection of plain text properties parsed from the node.
74
- #
75
- # @return [MicroMicro::Collections::PropertiesCollection]
76
- def plain_text_properties
77
- @plain_text_properties ||= properties.plain_text_properties
78
- end
79
-
80
- # A collection of properties parsed from the node.
98
+ # A collection of {MicroMicro::Property}s parsed from the node.
81
99
  #
82
100
  # @return [MicroMicro::Collections::PropertiesCollection]
83
101
  def properties
84
102
  @properties ||= Collections::PropertiesCollection.new(Property.from_context(node.element_children))
85
103
  end
86
104
 
87
- # Return the parsed item as a Hash.
105
+ # Return the parsed {MicroMicro::Item} as a Hash.
88
106
  #
89
107
  # @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
108
+ # microformats.org: microformats2 parsing specification § Parse an element for class microformats
109
+ #
110
+ # @see MicroMicro::Item#children
111
+ # @see MicroMicro::Item#id
112
+ # @see MicroMicro::Item#properties
113
+ # @see MicroMicro::Item#types
114
+ # @see MicroMicro::Collections::PropertiesCollection#to_h
90
115
  #
91
116
  # @return [Hash]
92
117
  def to_h
@@ -95,28 +120,22 @@ module MicroMicro
95
120
  properties: properties.to_h
96
121
  }
97
122
 
98
- hash[:id] = id if id.present?
99
- hash[:children] = children.to_a if children.any?
123
+ hash[:id] = id if id?
124
+ hash[:children] = children.to_a if children?
100
125
 
101
126
  hash
102
127
  end
103
128
 
104
- # An array of root class names parsed from the node's `class` attribute.
129
+ # An Array of root class names parsed from the node's +class+ attribute.
105
130
  #
106
131
  # @return [Array<String>]
107
132
  def types
108
133
  @types ||= Helpers.root_class_names_from(node)
109
134
  end
110
135
 
111
- # A collection of url properties parsed from the node.
112
- #
113
- # @return [MicroMicro::Collections::PropertiesCollection]
114
- def url_properties
115
- @url_properties ||= properties.url_properties
116
- end
117
-
118
136
  private
119
137
 
138
+ # @return [Nokogiri::XML::Element]
120
139
  attr_reader :node
121
140
 
122
141
  # @return [MicroMicro::ImpliedProperty]
@@ -173,7 +192,7 @@ module MicroMicro
173
192
 
174
193
  # @return [Boolean]
175
194
  def nested_items?
176
- @nested_items ||= properties.find(&:item_node?) || children.any?
195
+ @nested_items ||= properties.find(&:item_node?) || children?
177
196
  end
178
197
  end
179
198
  end
@@ -10,7 +10,9 @@ module MicroMicro
10
10
  end
11
11
 
12
12
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
13
+ # microformats.org: microformats2 parsing specification § Parsing a +p-+ property
13
14
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
15
+ # microformats.org: microformats2 parsing specification § Parsing an +e-+ property
14
16
  #
15
17
  # @return [String]
16
18
  def value
@@ -30,6 +30,7 @@ module MicroMicro
30
30
  # Value Class Pattern date and time parsing specification.
31
31
  #
32
32
  # @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
33
+ # microformats.org: Value Class Pattern § Date and time parsing
33
34
  #
34
35
  # @param string [String, #to_s]
35
36
  def initialize(string)
@@ -89,17 +90,14 @@ module MicroMicro
89
90
 
90
91
  # @return [String, nil]
91
92
  def value
92
- @value ||=
93
- if normalized_date || normalized_time || normalized_timezone
94
- "#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
95
- end
93
+ @value ||= "#{normalized_date} #{normalized_time}#{normalized_timezone}".strip.presence
96
94
  end
97
95
 
98
96
  # @return [Hash{Symbol => String, nil}]
99
97
  def values
100
98
  @values ||=
101
99
  if string.match?(DATE_TIME_TIMEZONE_REGEXP)
102
- string.match(DATE_TIME_TIMEZONE_REGEXP).named_captures.symbolize_keys
100
+ string.match(DATE_TIME_TIMEZONE_REGEXP).named_captures.transform_keys(&:to_sym)
103
101
  else
104
102
  {}
105
103
  end
@@ -10,6 +10,7 @@ module MicroMicro
10
10
  }.freeze
11
11
 
12
12
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
13
+ # microformats.org: microformats2 parsing specification § Parsing a +dt-+ property
13
14
  #
14
15
  # @return [String]
15
16
  def value
@@ -19,6 +20,7 @@ module MicroMicro
19
20
  private
20
21
 
21
22
  # @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
23
+ # microformats.org: Value Class Pattern § microformats2 parsers implied date
22
24
  #
23
25
  # @return [MicroMicro::Parsers::DateTimeParser, nil]
24
26
  def adopted_date_time_parser
@@ -39,6 +41,7 @@ module MicroMicro
39
41
  end
40
42
 
41
43
  # @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
44
+ # microformats.org: Value Class Pattern § microformats2 parsers implied date
42
45
  #
43
46
  # @return [Boolean]
44
47
  def imply_date?
@@ -4,6 +4,7 @@ module MicroMicro
4
4
  module Parsers
5
5
  class EmbeddedMarkupPropertyParser < BasePropertyParser
6
6
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
7
+ # microformats.org: microformats2 parsing specification § Parsing an +e-+ property
7
8
  #
8
9
  # @return [Hash{Symbol => String}]
9
10
  def value
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MicroMicro
4
+ module Parsers
5
+ class ImageElementParser
6
+ # @return [String]
7
+ attr_reader :value
8
+
9
+ # @param node [Nokogiri::XML::Element]
10
+ # @param value [String]
11
+ def initialize(node, value)
12
+ @node = node
13
+ @value = value
14
+ end
15
+
16
+ # @return [String, nil]
17
+ def alt
18
+ @alt ||= node['alt']&.strip
19
+ end
20
+
21
+ # @return [Boolean]
22
+ def alt?
23
+ !alt.nil?
24
+ end
25
+
26
+ # @return [Hash{Symbol => String}, nil]
27
+ def srcset
28
+ @srcset ||= image_candidates if node['srcset']
29
+ end
30
+
31
+ # @return [Boolean]
32
+ def srcset?
33
+ srcset.present?
34
+ end
35
+
36
+ # @return [Hash{Symbol => String, Hash{Symbol => String}}]
37
+ def to_h
38
+ hash = { value: value }
39
+
40
+ hash[:srcset] = srcset if srcset?
41
+ hash[:alt] = alt if alt?
42
+
43
+ hash
44
+ end
45
+
46
+ private
47
+
48
+ # @return [Nokogiri::XML::Element]
49
+ attr_reader :node
50
+
51
+ # @return [Hash{Symbol => String}]
52
+ #
53
+ # rubocop:disable Style/PerlBackrefs
54
+ def image_candidates
55
+ node['srcset']
56
+ .split(',')
57
+ .each_with_object({}) do |candidate, hash|
58
+ candidate.strip.match(/^(.+?)(\s+.+)?$/) do
59
+ key = ($2 || '1x').strip.to_sym
60
+
61
+ hash[key] = $1 unless hash[key]
62
+ end
63
+ end
64
+ end
65
+ # rubocop:enable Style/PerlBackrefs
66
+ end
67
+ end
68
+ end
@@ -10,6 +10,7 @@ module MicroMicro
10
10
  }.freeze
11
11
 
12
12
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
13
+ # microformats.org: microformats2 parsing specification § Parsing for implied properties
13
14
  #
14
15
  # @return [String]
15
16
  def value
@@ -11,18 +11,19 @@ module MicroMicro
11
11
  }.freeze
12
12
 
13
13
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
14
+ # microformats.org: microformats2 parsing specification § Parsing for implied properties
14
15
  # @see https://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
16
+ # microformats.org: microformats2 parsing specification § Parse an img element for src and alt
15
17
  #
16
18
  # @return [String, Hash{Symbol => String}, nil]
17
19
  def value
18
20
  @value ||=
19
21
  if attribute_value
20
- return attribute_value unless candidate_node.matches?('img[alt]')
21
-
22
- {
23
- value: attribute_value,
24
- alt: candidate_node['alt'].strip
25
- }
22
+ if candidate_node.matches?('img[alt], img[srcset]')
23
+ ImageElementParser.new(candidate_node, attribute_value).to_h
24
+ else
25
+ attribute_value
26
+ end
26
27
  end
27
28
  end
28
29
 
@@ -11,6 +11,7 @@ module MicroMicro
11
11
  }.freeze
12
12
 
13
13
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
14
+ # microformats.org: microformats2 parsing specification § Parsing for implied properties
14
15
  #
15
16
  # @return [String, nil]
16
17
  def value
@@ -10,6 +10,7 @@ module MicroMicro
10
10
  }.freeze
11
11
 
12
12
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
13
+ # microformats.org: microformats2 parsing specification § Parsing a +p-+ property
13
14
  #
14
15
  # @return [String]
15
16
  def value
@@ -16,16 +16,15 @@ module MicroMicro
16
16
  }.freeze
17
17
 
18
18
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
19
+ # microformats.org: microformats2 parsing specification § Parsing a +u-+ property
19
20
  # @see https://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
21
+ # microformats.org: microformats2 parsing specification § Parse an img element for src and alt
20
22
  #
21
23
  # @return [String, Hash{Symbol => String}]
22
24
  def value
23
25
  @value ||=
24
- if node.matches?('img[alt]')
25
- {
26
- value: resolved_value,
27
- alt: node['alt'].strip
28
- }
26
+ if node.matches?('img[alt], img[srcset]')
27
+ ImageElementParser.new(node, resolved_value).to_h
29
28
  else
30
29
  resolved_value
31
30
  end
@@ -4,7 +4,9 @@ module MicroMicro
4
4
  module Parsers
5
5
  class ValueClassPatternParser
6
6
  # @see https://microformats.org/wiki/value-class-pattern#Basic_Parsing
7
+ # microformats.org: Value Class Pattern § Basic Parsing
7
8
  # @see https://microformats.org/wiki/value-class-pattern#Date_and_time_values
9
+ # microformats.org: Value Class Pattern § Date and time values
8
10
  HTML_ATTRIBUTES_MAP = {
9
11
  'alt' => %w[area img],
10
12
  'value' => %w[data],
@@ -37,7 +39,7 @@ module MicroMicro
37
39
  Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP) || node.text
38
40
  end
39
41
 
40
- # @param context [Nokogiri::XML::Element]
42
+ # @param node [Nokogiri::XML::Element]
41
43
  # @param separator [String]
42
44
  def initialize(node, separator = '')
43
45
  @node = node
@@ -55,7 +57,7 @@ module MicroMicro
55
57
  self.class
56
58
  .node_set_from(node)
57
59
  .map { |value_node| self.class.value_from(value_node) }
58
- .select(&:present?)
60
+ .compact_blank!
59
61
  end
60
62
 
61
63
  private
@@ -11,55 +11,95 @@ module MicroMicro
11
11
  'u' => Parsers::UrlPropertyParser
12
12
  }.freeze
13
13
 
14
- attr_reader :name, :node, :prefix
14
+ private_constant :PROPERTY_PARSERS_MAP
15
15
 
16
- # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
17
- # @return [Array<MicroMicro::Property>]
18
- def self.from_context(context)
19
- node_set_from(context).flat_map do |node|
20
- Helpers.property_class_names_from(node).map { |token| new(node, token) }
16
+ class PropertyNodeSearch
17
+ attr_reader :node_set
18
+
19
+ def initialize(document)
20
+ @node_set = Nokogiri::XML::NodeSet.new(document, [])
21
21
  end
22
- end
23
22
 
24
- # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
25
- # @param node_set [Nokogiri::XML::NodeSet]
26
- # @return [Nokogiri::XML::NodeSet]
27
- def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
28
- context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
23
+ def search(context)
24
+ context.each { |node| search(node) } if context.is_a?(Nokogiri::XML::NodeSet)
25
+
26
+ if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
27
+ node_set << context if Helpers.property_node?(context)
29
28
 
30
- if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
31
- node_set << context if Helpers.property_node?(context)
29
+ search(context.element_children) unless Helpers.item_node?(context)
30
+ end
32
31
 
33
- node_set_from(context.element_children, node_set) unless Helpers.item_node?(context)
32
+ node_set
34
33
  end
34
+ end
35
+
36
+ private_constant :PropertyNodeSearch
35
37
 
36
- node_set
38
+ # This {MicroMicro::Property}'s +name+ value.
39
+ #
40
+ # @return [String]
41
+ attr_reader :name
42
+
43
+ # This {MicroMicro::Property}'s node.
44
+ #
45
+ # @return [Nokogiri::XML::Element]
46
+ attr_reader :node
47
+
48
+ # This {MicroMicro::Property}'s +prefix+ value.
49
+ #
50
+ # @return [String] One of +dt+, +e+, +p+, or +u+.
51
+ attr_reader :prefix
52
+
53
+ # Extract {MicroMicro::Property}s from a context.
54
+ #
55
+ # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
56
+ # @return [Array<MicroMicro::Property>]
57
+ def self.from_context(context)
58
+ PropertyNodeSearch
59
+ .new(context.document)
60
+ .search(context)
61
+ .flat_map do |node|
62
+ Helpers.property_class_names_from(node).map { |token| new(node, token) }
63
+ end
37
64
  end
38
65
 
66
+ # Parse a node for property data.
67
+ #
39
68
  # @param node [Nokogiri::XML::Element]
40
- # @param token [String]
69
+ # @param token [String] A hyphen-separated token representing a microformats2
70
+ # property value (e.g. +p-name+, +u-url+).
41
71
  def initialize(node, token)
42
72
  @node = node
43
73
  @prefix, @name = token.split(/-/, 2)
44
74
  end
45
75
 
76
+ # Is this {MicroMicro::Property} a datetime property?
77
+ #
46
78
  # @return [Boolean]
47
79
  def date_time_property?
48
80
  prefix == 'dt'
49
81
  end
50
82
 
83
+ # Is this {MicroMicro::Property} an embedded markup property?
84
+ #
51
85
  # @return [Boolean]
52
86
  def embedded_markup_property?
53
87
  prefix == 'e'
54
88
  end
55
89
 
90
+ # Always return +false+ when asked if this {MicroMicro::Property} is an
91
+ # implied property.
92
+ #
93
+ # @see MicroMicro::ImpliedProperty#implied?
94
+ #
56
95
  # @return [Boolean]
57
96
  def implied?
58
97
  false
59
98
  end
60
99
 
61
- # :nocov:
62
100
  # @return [String]
101
+ #
102
+ # :nocov:
63
103
  def inspect
64
104
  "#<#{self.class}:#{format('%#0x', object_id)} " \
65
105
  "name: #{name.inspect}, " \
@@ -68,27 +108,42 @@ module MicroMicro
68
108
  end
69
109
  # :nocov:
70
110
 
111
+ # Parse this {MicroMicro::Property}'s node as a {MicroMicro::Item}, if
112
+ # applicable.
113
+ #
71
114
  # @return [MicroMicro::Item, nil]
72
115
  def item
73
116
  @item ||= Item.new(node) if item_node?
74
117
  end
75
118
 
119
+ # Should this {MicroMicro::Property}'s node be parsed as a
120
+ # {MicroMicro::Item}?
121
+ #
122
+ # @see MicroMicro::Helpers.item_node?
123
+ #
76
124
  # @return [Boolean]
77
125
  def item_node?
78
126
  @item_node ||= Helpers.item_node?(node)
79
127
  end
80
128
 
129
+ # Is this {MicroMicro::Property} a plain text property?
130
+ #
81
131
  # @return [Boolean]
82
132
  def plain_text_property?
83
133
  prefix == 'p'
84
134
  end
85
135
 
136
+ # Is this {MicroMicro::Property} a url property?
137
+ #
86
138
  # @return [Boolean]
87
139
  def url_property?
88
140
  prefix == 'u'
89
141
  end
90
142
 
143
+ # Return this {MicroMicro::Property}'s parsed value.
144
+ #
91
145
  # @return [String, Hash]
146
+ #
92
147
  # rubocop:disable Metrics
93
148
  def value
94
149
  @value ||=
@@ -97,8 +152,8 @@ module MicroMicro
97
152
 
98
153
  return hash.merge(parser.value) if embedded_markup_property?
99
154
 
100
- p_property = item.properties.find { |property| property.name == 'name' } if plain_text_property?
101
- u_property = item.properties.find { |property| property.name == 'url' } if url_property?
155
+ p_property = item.properties.find_by(name: 'name') if plain_text_property?
156
+ u_property = item.properties.find_by(name: 'url') if url_property?
102
157
 
103
158
  hash.merge(value: (p_property || u_property || parser).value)
104
159
  else
@@ -107,6 +162,9 @@ module MicroMicro
107
162
  end
108
163
  # rubocop:enable Metrics
109
164
 
165
+ # Returns +true+ if this {MicroMicro::Property}'s +value+ is anything other
166
+ # than blank or +nil+.
167
+ #
110
168
  # @return [Boolean]
111
169
  def value?
112
170
  value.present?