micromicro 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,9 @@
1
1
  module MicroMicro
2
2
  class Item
3
- attr_accessor :value
3
+ include Collectible
4
4
 
5
+ # Parse a node for microformats2-encoded data.
6
+ #
5
7
  # @param node [Nokogiri::XML::Element]
6
8
  def initialize(node)
7
9
  @node = node
@@ -11,11 +13,17 @@ module MicroMicro
11
13
  properties << implied_url if implied_url?
12
14
  end
13
15
 
16
+ # A collection of child items parsed from the node.
17
+ #
18
+ # @see http://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
19
+ #
14
20
  # @return [MicroMicro::Collections::ItemsCollection]
15
21
  def children
16
22
  @children ||= Collections::ItemsCollection.new(Item.items_from(node.element_children))
17
23
  end
18
24
 
25
+ # The value of the node's `id` attribute, if present.
26
+ #
19
27
  # @return [String, nil]
20
28
  def id
21
29
  @id ||= node['id']&.strip
@@ -26,12 +34,22 @@ module MicroMicro
26
34
  format(%(#<#{self.class.name}:%#0x types: #{types.inspect}, properties: #{properties.count}, children: #{children.count}>), object_id)
27
35
  end
28
36
 
37
+ # A collection of plain text properties parsed from the node.
38
+ #
39
+ # @return [MicroMicro::Collections::PropertiesCollection]
40
+ def plain_text_properties
41
+ @plain_text_properties ||= Collections::PropertiesCollection.new(properties.select { |property| property.prefix == 'p' })
42
+ end
43
+
44
+ # A collection of properties parsed from the node.
45
+ #
29
46
  # @return [MicroMicro::Collections::PropertiesCollection]
30
47
  def properties
31
48
  @properties ||= Collections::PropertiesCollection.new(Property.properties_from(node.element_children))
32
49
  end
33
50
 
34
- # @see microformats2 Parsing Specification section 1.2
51
+ # Return the parsed item as a Hash.
52
+ #
35
53
  # @see http://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
36
54
  #
37
55
  # @return [Hash]
@@ -43,28 +61,42 @@ module MicroMicro
43
61
 
44
62
  hash[:id] = id if id.present?
45
63
  hash[:children] = children.to_a if children.any?
46
- hash[:value] = value if value.present?
47
64
 
48
65
  hash
49
66
  end
50
67
 
68
+ # An array of root class names parsed from the node's `class` attribute.
69
+ #
51
70
  # @return [Array<String>]
52
71
  def types
53
72
  @types ||= self.class.types_from(node)
54
73
  end
55
74
 
75
+ # A collection of url properties parsed from the node.
76
+ #
77
+ # @return [MicroMicro::Collections::PropertiesCollection]
78
+ def url_properties
79
+ @url_properties ||= Collections::PropertiesCollection.new(properties.select { |property| property.prefix == 'u' })
80
+ end
81
+
82
+ # Does this node's `class` attribute contain root class names?
83
+ #
56
84
  # @param node [Nokogiri::XML::Element]
57
85
  # @return [Boolean]
58
86
  def self.item_node?(node)
59
87
  types_from(node).any?
60
88
  end
61
89
 
90
+ # Extract items from a context.
91
+ #
62
92
  # @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
63
93
  # @return [Array<MicroMicro::Item>]
64
94
  def self.items_from(context)
65
95
  nodes_from(context).map { |node| new(node) }
66
96
  end
67
97
 
98
+ # Extract item nodes from a context.
99
+ #
68
100
  # @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
69
101
  # @param node_set [Nokogiri::XML::NodeSet]
70
102
  # @return [Nokogiri::XML::NodeSet]
@@ -84,14 +116,15 @@ module MicroMicro
84
116
  node_set
85
117
  end
86
118
 
87
- # @param node [Nokogiri::XML::Element]
88
- # @return [Array<String>]
119
+ # Extract root class names from a node.
89
120
  #
90
- # @example
91
121
  # node = Nokogiri::HTML('<div class="h-card">Jason Garber</div>').at_css('div')
92
122
  # MicroMicro::Item.types_from(node) #=> ['h-card']
123
+ #
124
+ # @param node [Nokogiri::XML::Element]
125
+ # @return [Array<String>]
93
126
  def self.types_from(node)
94
- node.classes.select { |token| token.match?(/^h(?:\-[0-9a-z]+)?(?:\-[a-z]+)+$/) }.uniq.sort
127
+ node.classes.select { |token| token.match?(/^h(?:-[0-9a-z]+)?(?:-[a-z]+)+$/) }.uniq.sort
95
128
  end
96
129
 
97
130
  private
@@ -7,27 +7,30 @@ module MicroMicro
7
7
  @node = property.node
8
8
  end
9
9
 
10
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
11
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
12
+ #
10
13
  # @return [String]
11
14
  def value
12
- @value ||= serialized_node.text.strip
15
+ @value ||= begin
16
+ Document.text_content_from(node) do |context|
17
+ context.css('img').each { |img| img.content = " #{img['alt'] || img['src']} " }
18
+ end
19
+ end
20
+ end
21
+
22
+ # @param node [Nokogiri::XML::Element]
23
+ # @param attributes_map [Hash{String => Array}]
24
+ # @return [Array]
25
+ def self.attribute_value_from(node, attributes_map)
26
+ attributes_map.map do |attribute, names|
27
+ node[attribute] if names.include?(node.name) && node[attribute]
28
+ end.compact.first
13
29
  end
14
30
 
15
31
  private
16
32
 
17
33
  attr_reader :node, :property
18
-
19
- # @see microformats2 Parsing Specification sections 1.3.1 and 1.3.4
20
- # @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
21
- # @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
22
- def serialized_node
23
- @serialized_node ||= begin
24
- node.css(*Document.ignored_node_names).unlink
25
-
26
- node.css('img').each { |img| img.content = " #{img['alt'] || Absolutely.to_abs(base: node.document.url, relative: img['src'])} " }
27
-
28
- node
29
- end
30
- end
31
34
  end
32
35
  end
33
36
  end
@@ -1,7 +1,6 @@
1
1
  module MicroMicro
2
2
  module Parsers
3
3
  class DateTimeParser
4
- # @see Value Class Pattern section 4.2
5
4
  # @see http://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
6
5
  #
7
6
  # Regexp pattern matching YYYY-MM-DD and YYY-DDD
@@ -58,12 +57,7 @@ module MicroMicro
58
57
 
59
58
  # @return [String]
60
59
  def value
61
- @value ||= "#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
62
- end
63
-
64
- # @return [Boolean]
65
- def value?
66
- value.present?
60
+ @value ||= "#{normalized_date} #{normalized_time}#{normalized_timezone}".strip if normalized_date || normalized_time || normalized_timezone
67
61
  end
68
62
 
69
63
  # @return [Hash{Symbol => String, nil}]
@@ -1,65 +1,55 @@
1
1
  module MicroMicro
2
2
  module Parsers
3
3
  class DateTimePropertyParser < BasePropertyParser
4
- # @see microformats2 Parsing Specification section 1.3.3
5
- # @see http://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
6
4
  HTML_ATTRIBUTES_MAP = {
7
5
  'datetime' => %w[del ins time],
8
6
  'title' => %w[abbr],
9
7
  'value' => %w[data input]
10
8
  }.freeze
11
9
 
10
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
11
+ #
12
12
  # @return [String]
13
13
  def value
14
- @value ||= begin
15
- return resolved_value if date_time_parser.value?
16
- return attribute_values.first if attribute_values.any?
17
-
18
- super
19
- end
14
+ @value ||= resolved_value || attribute_value || super
20
15
  end
21
16
 
22
17
  private
23
18
 
19
+ # @see http://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
20
+ #
24
21
  # @return [MicroMicro::Parsers::DateTimeParser, nil]
25
- def adopted_date_time
26
- @adopted_date_time ||= begin
27
- collections = property.collection.select { |prop| prop.prefix == 'dt' }.split(property)
22
+ def adopted_date_time_parser
23
+ @adopted_date_time_parser ||= begin
24
+ date_time_siblings = (property.prev_all.reverse + property.next_all).select { |prop| prop.prefix == 'dt' }
28
25
 
29
- (collections.shift.reverse + collections).flatten.map { |prop| DateTimeParser.new(prop.value) }.find(&:normalized_date)
26
+ date_time_siblings.map { |prop| DateTimeParser.new(prop.value) }.find(&:normalized_date)
30
27
  end
31
28
  end
32
29
 
33
- # @return [Array<String>]
34
- def attribute_values
35
- @attribute_values ||= begin
36
- HTML_ATTRIBUTES_MAP.map do |attribute, names|
37
- node[attribute] if names.include?(node.name) && node[attribute]
38
- end.compact
39
- end
30
+ # @return [String, nil]
31
+ def attribute_value
32
+ self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
40
33
  end
41
34
 
42
35
  # @return [MicroMicro::Parsers::DateTimeParser]
43
36
  def date_time_parser
44
- @date_time_parser ||= DateTimeParser.new(value_class_pattern_parser.value)
37
+ @date_time_parser ||= DateTimeParser.new(ValueClassPatternParser.new(node, ' ').value)
45
38
  end
46
39
 
40
+ # @see http://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
41
+ #
47
42
  # @return [Boolean]
48
43
  def imply_date?
49
- date_time_parser.normalized_time && !date_time_parser.normalized_date
44
+ date_time_parser.normalized_time && !date_time_parser.normalized_date && adopted_date_time_parser
50
45
  end
51
46
 
52
47
  # @return [String]
53
48
  def resolved_value
54
- return "#{adopted_date_time.normalized_date} #{date_time_parser.value}" if imply_date? && adopted_date_time
49
+ return "#{adopted_date_time_parser.normalized_date} #{date_time_parser.value}" if imply_date?
55
50
 
56
51
  date_time_parser.value
57
52
  end
58
-
59
- # @return [MicroMicro::Parsers::ValueClassPatternParser]
60
- def value_class_pattern_parser
61
- ValueClassPatternParser.new(node, ' ')
62
- end
63
53
  end
64
54
  end
65
55
  end
@@ -1,28 +1,17 @@
1
1
  module MicroMicro
2
2
  module Parsers
3
3
  class EmbeddedMarkupPropertyParser < BasePropertyParser
4
- HTML_ATTRIBUTE_NAMES = %w[action cite code codebase data href poster src].freeze
5
-
4
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
5
+ #
6
+ # @return [Hash{Symbol => String}]
6
7
  def value
7
8
  @value ||= begin
8
9
  {
9
- html: resolved_node.inner_html.strip,
10
+ html: node.inner_html.strip,
10
11
  value: super
11
12
  }
12
13
  end
13
14
  end
14
-
15
- private
16
-
17
- def resolved_node
18
- @resolved_node ||= begin
19
- HTML_ATTRIBUTE_NAMES.each do |attribute|
20
- node.css("[#{attribute}]").each { |element| element[attribute] = Absolutely.to_abs(base: node.document.url, relative: element[attribute].strip) }
21
- end
22
-
23
- node
24
- end
25
- end
26
15
  end
27
16
  end
28
17
  end
@@ -1,77 +1,38 @@
1
1
  module MicroMicro
2
2
  module Parsers
3
3
  class ImpliedNamePropertyParser < BasePropertyParser
4
- # @see microformats2 Parsing Specification section 1.3.5
5
- # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
6
- HTML_ELEMENTS_MAP = {
7
- 'area' => 'alt',
8
- 'img' => 'alt',
9
- 'abbr' => 'title'
4
+ HTML_ATTRIBUTES_MAP = {
5
+ 'alt' => %w[area img],
6
+ 'title' => %w[abbr]
10
7
  }.freeze
11
8
 
9
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
10
+ #
12
11
  # @return [String]
13
12
  def value
14
- @value ||= unresolved_value.strip
13
+ @value ||= attribute_value || text_content
15
14
  end
16
15
 
17
16
  private
18
17
 
19
- # @return [Array<String>]
20
- def attribute_values
21
- @attribute_values ||= begin
22
- HTML_ELEMENTS_MAP.map do |element, attribute|
23
- node[attribute] if node.matches?("#{element}[#{attribute}]")
24
- end.compact
25
- end
26
- end
27
-
28
- # @return [Nokogiri::XML::Element, nil]
29
- def child_node
30
- @child_node ||= node.at_css('> :only-child')
31
- end
32
-
33
- # @return [Array<String>]
34
- def child_node_attribute_values
35
- @child_node_attribute_values ||= begin
36
- HTML_ELEMENTS_MAP.map do |element, attribute|
37
- child_node[attribute] if child_node.matches?("#{element}[#{attribute}]")
38
- end.compact
39
- end
18
+ # @return [Nokogiri::XML::NodeSet]
19
+ def candidate_nodes
20
+ @candidate_nodes ||= Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
40
21
  end
41
22
 
42
- # @return [Nokogiri::XML::Element, nil]
43
- def grandchild_node
44
- @grandchild_node ||= child_node.at_css('> :only-child')
23
+ # @return [Array]
24
+ def child_nodes
25
+ [node.at_css('> :only-child'), node.at_css('> :only-child > :only-child')].compact.reject { |child_node| Item.item_node?(child_node) }
45
26
  end
46
27
 
47
- # @return [Array<String>]
48
- def grandchild_node_attribute_values
49
- @grandchild_node_attribute_values ||= begin
50
- HTML_ELEMENTS_MAP.map do |element, attribute|
51
- grandchild_node[attribute] if grandchild_node.matches?("#{element}[#{attribute}]")
52
- end.compact
53
- end
54
- end
55
-
56
- # @return [Boolean]
57
- def parse_child_node?
58
- child_node && !Item.item_node?(child_node)
59
- end
60
-
61
- # @return [Boolean]
62
- def parse_grandchild_node?
63
- parse_child_node? && grandchild_node && !Item.item_node?(grandchild_node)
28
+ # @return [String, nil]
29
+ def attribute_value
30
+ candidate_nodes.map { |node| self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP) }.compact.first
64
31
  end
65
32
 
66
33
  # @return [String]
67
- def unresolved_value
68
- return attribute_values.first if attribute_values.any?
69
- return child_node_attribute_values.first if parse_child_node? && child_node_attribute_values.any?
70
- return grandchild_node_attribute_values.first if parse_grandchild_node? && grandchild_node_attribute_values.any?
71
-
72
- serialized_node.css('img').each { |img| img.content = img['alt'] }
73
-
74
- serialized_node.text
34
+ def text_content
35
+ @text_content ||= Document.text_content_from(node) { |context| context.css('img').each { |img| img.content = img['alt'] } }
75
36
  end
76
37
  end
77
38
  end
@@ -1,14 +1,15 @@
1
1
  module MicroMicro
2
2
  module Parsers
3
3
  class ImpliedPhotoPropertyParser < BasePropertyParser
4
- # @see microformats2 Parsing Specification section 1.3.5
5
- # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
6
4
  HTML_ELEMENTS_MAP = {
7
5
  'img' => 'src',
8
6
  'object' => 'data'
9
7
  }.freeze
10
8
 
11
- # @return [String, nil]
9
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
10
+ # @see http://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
11
+ #
12
+ # @return [String, Hash{Symbol => String}, nil]
12
13
  def value
13
14
  @value ||= begin
14
15
  return unless resolved_value
@@ -34,12 +35,7 @@ module MicroMicro
34
35
 
35
36
  # @return [String, nil]
36
37
  def resolved_value
37
- @resolved_value ||= Absolutely.to_abs(base: node.document.url, relative: unresolved_value.strip) if unresolved_value
38
- end
39
-
40
- # @return [String, nil]
41
- def unresolved_value
42
- @unresolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
38
+ @resolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
43
39
  end
44
40
 
45
41
  # @return [Nokogiri::XML::Element, nil]
@@ -1,16 +1,16 @@
1
1
  module MicroMicro
2
2
  module Parsers
3
3
  class ImpliedUrlPropertyParser < BasePropertyParser
4
- # @see microformats2 Parsing Specification section 1.3.5
5
- # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
6
4
  HTML_ELEMENTS_MAP = {
7
5
  'a' => 'href',
8
6
  'area' => 'href'
9
7
  }.freeze
10
8
 
9
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
10
+ #
11
11
  # @return [String, nil]
12
12
  def value
13
- @value ||= resolved_value
13
+ @value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
14
14
  end
15
15
 
16
16
  private
@@ -24,16 +24,6 @@ module MicroMicro
24
24
  end
25
25
  end
26
26
 
27
- # @return [String, nil]
28
- def resolved_value
29
- @resolved_value ||= Absolutely.to_abs(base: node.document.url, relative: unresolved_value.strip) if unresolved_value
30
- end
31
-
32
- # @return [String, nil]
33
- def unresolved_value
34
- @unresolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
35
- end
36
-
37
27
  # @return [Nokogiri::XML::Element, nil]
38
28
  def value_node
39
29
  @value_node ||= begin