micromicro 2.0.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,38 +4,47 @@ module MicroMicro
4
4
  class Item
5
5
  include Collectible
6
6
 
7
- # Extract items from a context.
8
- #
9
- # @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
10
- # @return [Array<MicroMicro::Item>]
11
- def self.from_context(context)
12
- node_set_from(context).map { |node| new(node) }
13
- end
7
+ class ItemNodeSearch
8
+ attr_reader :node_set
14
9
 
15
- # Extract item nodes from a context.
16
- #
17
- # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
18
- # @param node_set [Nokogiri::XML::NodeSet]
19
- # @return [Nokogiri::XML::NodeSet]
20
- # rubocop:disable Metrics
21
- def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
22
- context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
23
-
24
- if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
25
- if Helpers.item_node?(context)
26
- node_set << context unless Helpers.item_nodes?(context.ancestors) && Helpers.property_node?(context)
27
- else
28
- node_set_from(context.element_children, node_set)
10
+ def initialize(document)
11
+ @node_set = Nokogiri::XML::NodeSet.new(document, [])
12
+ end
13
+
14
+ # rubocop:disable Metrics
15
+ def search(context)
16
+ context.each { |node| search(node) } if context.is_a?(Nokogiri::XML::NodeSet)
17
+
18
+ if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
19
+ if Helpers.item_node?(context)
20
+ node_set << context unless Helpers.item_nodes?(context.ancestors) && Helpers.property_node?(context)
21
+ else
22
+ search(context.element_children)
23
+ end
29
24
  end
25
+
26
+ node_set
30
27
  end
28
+ # rubocop:enable Metrics
29
+ end
30
+
31
+ private_constant :ItemNodeSearch
31
32
 
32
- node_set
33
+ # Extract {MicroMicro::Item}s from a context.
34
+ #
35
+ # @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
36
+ # @return [Array<MicroMicro::Item>]
37
+ def self.from_context(context)
38
+ ItemNodeSearch
39
+ .new(context.document)
40
+ .search(context)
41
+ .map { |node| new(node) }
33
42
  end
34
- # rubocop:enable Metrics
35
43
 
36
44
  # Parse a node for microformats2-encoded data.
37
45
  #
38
46
  # @param node [Nokogiri::XML::Element]
47
+ # @return [MicroMicro::Item]
39
48
  def initialize(node)
40
49
  @node = node
41
50
 
@@ -44,24 +53,40 @@ module MicroMicro
44
53
  properties << implied_url if implied_url?
45
54
  end
46
55
 
47
- # A collection of child items parsed from the node.
56
+ # A collection of child {MicroMicro::Item}s parsed from the node.
48
57
  #
49
58
  # @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
59
+ # microformats.org: microformats2 parsing specification § Parse an element for class microformats
50
60
  #
51
61
  # @return [MicroMicro::Collections::ItemsCollection]
52
62
  def children
53
63
  @children ||= Collections::ItemsCollection.new(self.class.from_context(node.element_children))
54
64
  end
55
65
 
56
- # The value of the node's `id` attribute, if present.
66
+ # Does this {MicroMicro::Item} contain any child {MicroMicro::Item}s?
67
+ #
68
+ # @return [Boolean]
69
+ def children?
70
+ children.any?
71
+ end
72
+
73
+ # The value of the node's +id+ attribute, if present.
57
74
  #
58
75
  # @return [String, nil]
59
76
  def id
60
77
  @id ||= node['id']&.strip
61
78
  end
62
79
 
63
- # :nocov:
80
+ # Does this {MicroMicro::Item} have an +id+ attribute value?
81
+ #
82
+ # @return [Boolean]
83
+ def id?
84
+ id.present?
85
+ end
86
+
64
87
  # @return [String]
88
+ #
89
+ # :nocov:
65
90
  def inspect
66
91
  "#<#{self.class}:#{format('%#0x', object_id)} " \
67
92
  "types: #{types.inspect}, " \
@@ -70,23 +95,23 @@ module MicroMicro
70
95
  end
71
96
  # :nocov:
72
97
 
73
- # A collection of plain text properties parsed from the node.
74
- #
75
- # @return [MicroMicro::Collections::PropertiesCollection]
76
- def plain_text_properties
77
- @plain_text_properties ||= properties.plain_text_properties
78
- end
79
-
80
- # A collection of properties parsed from the node.
98
+ # A collection of {MicroMicro::Property}s parsed from the node.
81
99
  #
82
100
  # @return [MicroMicro::Collections::PropertiesCollection]
83
101
  def properties
84
102
  @properties ||= Collections::PropertiesCollection.new(Property.from_context(node.element_children))
85
103
  end
86
104
 
87
- # Return the parsed item as a Hash.
105
+ # Return the parsed {MicroMicro::Item} as a Hash.
88
106
  #
89
107
  # @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
108
+ # microformats.org: microformats2 parsing specification § Parse an element for class microformats
109
+ #
110
+ # @see MicroMicro::Item#children
111
+ # @see MicroMicro::Item#id
112
+ # @see MicroMicro::Item#properties
113
+ # @see MicroMicro::Item#types
114
+ # @see MicroMicro::Collections::PropertiesCollection#to_h
90
115
  #
91
116
  # @return [Hash]
92
117
  def to_h
@@ -95,28 +120,22 @@ module MicroMicro
95
120
  properties: properties.to_h
96
121
  }
97
122
 
98
- hash[:id] = id if id.present?
99
- hash[:children] = children.to_a if children.any?
123
+ hash[:id] = id if id?
124
+ hash[:children] = children.to_a if children?
100
125
 
101
126
  hash
102
127
  end
103
128
 
104
- # An array of root class names parsed from the node's `class` attribute.
129
+ # An Array of root class names parsed from the node's +class+ attribute.
105
130
  #
106
131
  # @return [Array<String>]
107
132
  def types
108
133
  @types ||= Helpers.root_class_names_from(node)
109
134
  end
110
135
 
111
- # A collection of url properties parsed from the node.
112
- #
113
- # @return [MicroMicro::Collections::PropertiesCollection]
114
- def url_properties
115
- @url_properties ||= properties.url_properties
116
- end
117
-
118
136
  private
119
137
 
138
+ # @return [Nokogiri::XML::Element]
120
139
  attr_reader :node
121
140
 
122
141
  # @return [MicroMicro::ImpliedProperty]
@@ -173,7 +192,7 @@ module MicroMicro
173
192
 
174
193
  # @return [Boolean]
175
194
  def nested_items?
176
- @nested_items ||= properties.find(&:item_node?) || children.any?
195
+ @nested_items ||= properties.find(&:item_node?) || children?
177
196
  end
178
197
  end
179
198
  end
@@ -10,7 +10,9 @@ module MicroMicro
10
10
  end
11
11
 
12
12
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
13
+ # microformats.org: microformats2 parsing specification § Parsing a +p-+ property
13
14
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
15
+ # microformats.org: microformats2 parsing specification § Parsing an +e-+ property
14
16
  #
15
17
  # @return [String]
16
18
  def value
@@ -30,6 +30,7 @@ module MicroMicro
30
30
  # Value Class Pattern date and time parsing specification.
31
31
  #
32
32
  # @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
33
+ # microformats.org: Value Class Pattern § Date and time parsing
33
34
  #
34
35
  # @param string [String, #to_s]
35
36
  def initialize(string)
@@ -89,17 +90,14 @@ module MicroMicro
89
90
 
90
91
  # @return [String, nil]
91
92
  def value
92
- @value ||=
93
- if normalized_date || normalized_time || normalized_timezone
94
- "#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
95
- end
93
+ @value ||= "#{normalized_date} #{normalized_time}#{normalized_timezone}".strip.presence
96
94
  end
97
95
 
98
96
  # @return [Hash{Symbol => String, nil}]
99
97
  def values
100
98
  @values ||=
101
99
  if string.match?(DATE_TIME_TIMEZONE_REGEXP)
102
- string.match(DATE_TIME_TIMEZONE_REGEXP).named_captures.symbolize_keys
100
+ string.match(DATE_TIME_TIMEZONE_REGEXP).named_captures.transform_keys(&:to_sym)
103
101
  else
104
102
  {}
105
103
  end
@@ -10,6 +10,7 @@ module MicroMicro
10
10
  }.freeze
11
11
 
12
12
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
13
+ # microformats.org: microformats2 parsing specification § Parsing a +dt-+ property
13
14
  #
14
15
  # @return [String]
15
16
  def value
@@ -19,6 +20,7 @@ module MicroMicro
19
20
  private
20
21
 
21
22
  # @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
23
+ # microformats.org: Value Class Pattern § microformats2 parsers implied date
22
24
  #
23
25
  # @return [MicroMicro::Parsers::DateTimeParser, nil]
24
26
  def adopted_date_time_parser
@@ -39,6 +41,7 @@ module MicroMicro
39
41
  end
40
42
 
41
43
  # @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
44
+ # microformats.org: Value Class Pattern § microformats2 parsers implied date
42
45
  #
43
46
  # @return [Boolean]
44
47
  def imply_date?
@@ -4,6 +4,7 @@ module MicroMicro
4
4
  module Parsers
5
5
  class EmbeddedMarkupPropertyParser < BasePropertyParser
6
6
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
7
+ # microformats.org: microformats2 parsing specification § Parsing an +e-+ property
7
8
  #
8
9
  # @return [Hash{Symbol => String}]
9
10
  def value
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MicroMicro
4
+ module Parsers
5
+ class ImageElementParser
6
+ # @return [String]
7
+ attr_reader :value
8
+
9
+ # @param node [Nokogiri::XML::Element]
10
+ # @param value [String]
11
+ def initialize(node, value)
12
+ @node = node
13
+ @value = value
14
+ end
15
+
16
+ # @return [String, nil]
17
+ def alt
18
+ @alt ||= node['alt']&.strip
19
+ end
20
+
21
+ # @return [Boolean]
22
+ def alt?
23
+ !alt.nil?
24
+ end
25
+
26
+ # @return [Hash{Symbol => String}, nil]
27
+ def srcset
28
+ @srcset ||= image_candidates if node['srcset']
29
+ end
30
+
31
+ # @return [Boolean]
32
+ def srcset?
33
+ srcset.present?
34
+ end
35
+
36
+ # @return [Hash{Symbol => String, Hash{Symbol => String}}]
37
+ def to_h
38
+ hash = { value: value }
39
+
40
+ hash[:srcset] = srcset if srcset?
41
+ hash[:alt] = alt if alt?
42
+
43
+ hash
44
+ end
45
+
46
+ private
47
+
48
+ # @return [Nokogiri::XML::Element]
49
+ attr_reader :node
50
+
51
+ # @return [Hash{Symbol => String}]
52
+ #
53
+ # rubocop:disable Style/PerlBackrefs
54
+ def image_candidates
55
+ node['srcset']
56
+ .split(',')
57
+ .each_with_object({}) do |candidate, hash|
58
+ candidate.strip.match(/^(.+?)(\s+.+)?$/) do
59
+ key = ($2 || '1x').strip.to_sym
60
+
61
+ hash[key] = $1 unless hash[key]
62
+ end
63
+ end
64
+ end
65
+ # rubocop:enable Style/PerlBackrefs
66
+ end
67
+ end
68
+ end
@@ -10,6 +10,7 @@ module MicroMicro
10
10
  }.freeze
11
11
 
12
12
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
13
+ # microformats.org: microformats2 parsing specification § Parsing for implied properties
13
14
  #
14
15
  # @return [String]
15
16
  def value
@@ -11,18 +11,19 @@ module MicroMicro
11
11
  }.freeze
12
12
 
13
13
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
14
+ # microformats.org: microformats2 parsing specification § Parsing for implied properties
14
15
  # @see https://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
16
+ # microformats.org: microformats2 parsing specification § Parse an img element for src and alt
15
17
  #
16
18
  # @return [String, Hash{Symbol => String}, nil]
17
19
  def value
18
20
  @value ||=
19
21
  if attribute_value
20
- return attribute_value unless candidate_node.matches?('img[alt]')
21
-
22
- {
23
- value: attribute_value,
24
- alt: candidate_node['alt'].strip
25
- }
22
+ if candidate_node.matches?('img[alt], img[srcset]')
23
+ ImageElementParser.new(candidate_node, attribute_value).to_h
24
+ else
25
+ attribute_value
26
+ end
26
27
  end
27
28
  end
28
29
 
@@ -11,6 +11,7 @@ module MicroMicro
11
11
  }.freeze
12
12
 
13
13
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
14
+ # microformats.org: microformats2 parsing specification § Parsing for implied properties
14
15
  #
15
16
  # @return [String, nil]
16
17
  def value
@@ -10,6 +10,7 @@ module MicroMicro
10
10
  }.freeze
11
11
 
12
12
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
13
+ # microformats.org: microformats2 parsing specification § Parsing a +p-+ property
13
14
  #
14
15
  # @return [String]
15
16
  def value
@@ -16,16 +16,15 @@ module MicroMicro
16
16
  }.freeze
17
17
 
18
18
  # @see https://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
19
+ # microformats.org: microformats2 parsing specification § Parsing a +u-+ property
19
20
  # @see https://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
21
+ # microformats.org: microformats2 parsing specification § Parse an img element for src and alt
20
22
  #
21
23
  # @return [String, Hash{Symbol => String}]
22
24
  def value
23
25
  @value ||=
24
- if node.matches?('img[alt]')
25
- {
26
- value: resolved_value,
27
- alt: node['alt'].strip
28
- }
26
+ if node.matches?('img[alt], img[srcset]')
27
+ ImageElementParser.new(node, resolved_value).to_h
29
28
  else
30
29
  resolved_value
31
30
  end
@@ -4,7 +4,9 @@ module MicroMicro
4
4
  module Parsers
5
5
  class ValueClassPatternParser
6
6
  # @see https://microformats.org/wiki/value-class-pattern#Basic_Parsing
7
+ # microformats.org: Value Class Pattern § Basic Parsing
7
8
  # @see https://microformats.org/wiki/value-class-pattern#Date_and_time_values
9
+ # microformats.org: Value Class Pattern § Date and time values
8
10
  HTML_ATTRIBUTES_MAP = {
9
11
  'alt' => %w[area img],
10
12
  'value' => %w[data],
@@ -37,7 +39,7 @@ module MicroMicro
37
39
  Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP) || node.text
38
40
  end
39
41
 
40
- # @param context [Nokogiri::XML::Element]
42
+ # @param node [Nokogiri::XML::Element]
41
43
  # @param separator [String]
42
44
  def initialize(node, separator = '')
43
45
  @node = node
@@ -55,7 +57,7 @@ module MicroMicro
55
57
  self.class
56
58
  .node_set_from(node)
57
59
  .map { |value_node| self.class.value_from(value_node) }
58
- .select(&:present?)
60
+ .compact_blank!
59
61
  end
60
62
 
61
63
  private
@@ -11,55 +11,95 @@ module MicroMicro
11
11
  'u' => Parsers::UrlPropertyParser
12
12
  }.freeze
13
13
 
14
- attr_reader :name, :node, :prefix
14
+ private_constant :PROPERTY_PARSERS_MAP
15
15
 
16
- # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
17
- # @return [Array<MicroMicro::Property>]
18
- def self.from_context(context)
19
- node_set_from(context).flat_map do |node|
20
- Helpers.property_class_names_from(node).map { |token| new(node, token) }
16
+ class PropertyNodeSearch
17
+ attr_reader :node_set
18
+
19
+ def initialize(document)
20
+ @node_set = Nokogiri::XML::NodeSet.new(document, [])
21
21
  end
22
- end
23
22
 
24
- # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
25
- # @param node_set [Nokogiri::XML::NodeSet]
26
- # @return [Nokogiri::XML::NodeSet]
27
- def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
28
- context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
23
+ def search(context)
24
+ context.each { |node| search(node) } if context.is_a?(Nokogiri::XML::NodeSet)
25
+
26
+ if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
27
+ node_set << context if Helpers.property_node?(context)
29
28
 
30
- if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
31
- node_set << context if Helpers.property_node?(context)
29
+ search(context.element_children) unless Helpers.item_node?(context)
30
+ end
32
31
 
33
- node_set_from(context.element_children, node_set) unless Helpers.item_node?(context)
32
+ node_set
34
33
  end
34
+ end
35
+
36
+ private_constant :PropertyNodeSearch
35
37
 
36
- node_set
38
+ # This {MicroMicro::Property}'s +name+ value.
39
+ #
40
+ # @return [String]
41
+ attr_reader :name
42
+
43
+ # This {MicroMicro::Property}'s node.
44
+ #
45
+ # @return [Nokogiri::XML::Element]
46
+ attr_reader :node
47
+
48
+ # This {MicroMicro::Property}'s +prefix+ value.
49
+ #
50
+ # @return [String] One of +dt+, +e+, +p+, or +u+.
51
+ attr_reader :prefix
52
+
53
+ # Extract {MicroMicro::Property}s from a context.
54
+ #
55
+ # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
56
+ # @return [Array<MicroMicro::Property>]
57
+ def self.from_context(context)
58
+ PropertyNodeSearch
59
+ .new(context.document)
60
+ .search(context)
61
+ .flat_map do |node|
62
+ Helpers.property_class_names_from(node).map { |token| new(node, token) }
63
+ end
37
64
  end
38
65
 
66
+ # Parse a node for property data.
67
+ #
39
68
  # @param node [Nokogiri::XML::Element]
40
- # @param token [String]
69
+ # @param token [String] A hyphen-separated token representing a microformats2
70
+ # property value (e.g. +p-name+, +u-url+).
41
71
  def initialize(node, token)
42
72
  @node = node
43
73
  @prefix, @name = token.split(/-/, 2)
44
74
  end
45
75
 
76
+ # Is this {MicroMicro::Property} a datetime property?
77
+ #
46
78
  # @return [Boolean]
47
79
  def date_time_property?
48
80
  prefix == 'dt'
49
81
  end
50
82
 
83
+ # Is this {MicroMicro::Property} an embedded markup property?
84
+ #
51
85
  # @return [Boolean]
52
86
  def embedded_markup_property?
53
87
  prefix == 'e'
54
88
  end
55
89
 
90
+ # Always return +false+ when asked if this {MicroMicro::Property} is an
91
+ # implied property.
92
+ #
93
+ # @see MicroMicro::ImpliedProperty#implied?
94
+ #
56
95
  # @return [Boolean]
57
96
  def implied?
58
97
  false
59
98
  end
60
99
 
61
- # :nocov:
62
100
  # @return [String]
101
+ #
102
+ # :nocov:
63
103
  def inspect
64
104
  "#<#{self.class}:#{format('%#0x', object_id)} " \
65
105
  "name: #{name.inspect}, " \
@@ -68,27 +108,42 @@ module MicroMicro
68
108
  end
69
109
  # :nocov:
70
110
 
111
+ # Parse this {MicroMicro::Property}'s node as a {MicroMicro::Item}, if
112
+ # applicable.
113
+ #
71
114
  # @return [MicroMicro::Item, nil]
72
115
  def item
73
116
  @item ||= Item.new(node) if item_node?
74
117
  end
75
118
 
119
+ # Should this {MicroMicro::Property}'s node be parsed as a
120
+ # {MicroMicro::Item}?
121
+ #
122
+ # @see MicroMicro::Helpers.item_node?
123
+ #
76
124
  # @return [Boolean]
77
125
  def item_node?
78
126
  @item_node ||= Helpers.item_node?(node)
79
127
  end
80
128
 
129
+ # Is this {MicroMicro::Property} a plain text property?
130
+ #
81
131
  # @return [Boolean]
82
132
  def plain_text_property?
83
133
  prefix == 'p'
84
134
  end
85
135
 
136
+ # Is this {MicroMicro::Property} a url property?
137
+ #
86
138
  # @return [Boolean]
87
139
  def url_property?
88
140
  prefix == 'u'
89
141
  end
90
142
 
143
+ # Return this {MicroMicro::Property}'s parsed value.
144
+ #
91
145
  # @return [String, Hash]
146
+ #
92
147
  # rubocop:disable Metrics
93
148
  def value
94
149
  @value ||=
@@ -97,8 +152,8 @@ module MicroMicro
97
152
 
98
153
  return hash.merge(parser.value) if embedded_markup_property?
99
154
 
100
- p_property = item.properties.find { |property| property.name == 'name' } if plain_text_property?
101
- u_property = item.properties.find { |property| property.name == 'url' } if url_property?
155
+ p_property = item.properties.find_by(name: 'name') if plain_text_property?
156
+ u_property = item.properties.find_by(name: 'url') if url_property?
102
157
 
103
158
  hash.merge(value: (p_property || u_property || parser).value)
104
159
  else
@@ -107,6 +162,9 @@ module MicroMicro
107
162
  end
108
163
  # rubocop:enable Metrics
109
164
 
165
+ # Returns +true+ if this {MicroMicro::Property}'s +value+ is anything other
166
+ # than blank or +nil+.
167
+ #
110
168
  # @return [Boolean]
111
169
  def value?
112
170
  value.present?