micromicro 0.1.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,9 @@
1
1
  module MicroMicro
2
2
  class Item
3
- attr_accessor :value
3
+ include Collectible
4
4
 
5
+ # Parse a node for microformats2-encoded data.
6
+ #
5
7
  # @param node [Nokogiri::XML::Element]
6
8
  def initialize(node)
7
9
  @node = node
@@ -11,11 +13,17 @@ module MicroMicro
11
13
  properties << implied_url if implied_url?
12
14
  end
13
15
 
16
+ # A collection of child items parsed from the node.
17
+ #
18
+ # @see http://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
19
+ #
14
20
  # @return [MicroMicro::Collections::ItemsCollection]
15
21
  def children
16
22
  @children ||= Collections::ItemsCollection.new(Item.items_from(node.element_children))
17
23
  end
18
24
 
25
+ # The value of the node's `id` attribute, if present.
26
+ #
19
27
  # @return [String, nil]
20
28
  def id
21
29
  @id ||= node['id']&.strip
@@ -26,12 +34,22 @@ module MicroMicro
26
34
  format(%(#<#{self.class.name}:%#0x types: #{types.inspect}, properties: #{properties.count}, children: #{children.count}>), object_id)
27
35
  end
28
36
 
37
+ # A collection of plain text properties parsed from the node.
38
+ #
39
+ # @return [MicroMicro::Collections::PropertiesCollection]
40
+ def plain_text_properties
41
+ @plain_text_properties ||= Collections::PropertiesCollection.new(properties.select { |property| property.prefix == 'p' })
42
+ end
43
+
44
+ # A collection of properties parsed from the node.
45
+ #
29
46
  # @return [MicroMicro::Collections::PropertiesCollection]
30
47
  def properties
31
48
  @properties ||= Collections::PropertiesCollection.new(Property.properties_from(node.element_children))
32
49
  end
33
50
 
34
- # @see microformats2 Parsing Specification section 1.2
51
+ # Return the parsed item as a Hash.
52
+ #
35
53
  # @see http://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
36
54
  #
37
55
  # @return [Hash]
@@ -43,28 +61,42 @@ module MicroMicro
43
61
 
44
62
  hash[:id] = id if id.present?
45
63
  hash[:children] = children.to_a if children.any?
46
- hash[:value] = value if value.present?
47
64
 
48
65
  hash
49
66
  end
50
67
 
68
+ # An array of root class names parsed from the node's `class` attribute.
69
+ #
51
70
  # @return [Array<String>]
52
71
  def types
53
72
  @types ||= self.class.types_from(node)
54
73
  end
55
74
 
75
+ # A collection of url properties parsed from the node.
76
+ #
77
+ # @return [MicroMicro::Collections::PropertiesCollection]
78
+ def url_properties
79
+ @url_properties ||= Collections::PropertiesCollection.new(properties.select { |property| property.prefix == 'u' })
80
+ end
81
+
82
+ # Does this node's `class` attribute contain root class names?
83
+ #
56
84
  # @param node [Nokogiri::XML::Element]
57
85
  # @return [Boolean]
58
86
  def self.item_node?(node)
59
87
  types_from(node).any?
60
88
  end
61
89
 
90
+ # Extract items from a context.
91
+ #
62
92
  # @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
63
93
  # @return [Array<MicroMicro::Item>]
64
94
  def self.items_from(context)
65
95
  nodes_from(context).map { |node| new(node) }
66
96
  end
67
97
 
98
+ # Extract item nodes from a context.
99
+ #
68
100
  # @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
69
101
  # @param node_set [Nokogiri::XML::NodeSet]
70
102
  # @return [Nokogiri::XML::NodeSet]
@@ -84,14 +116,15 @@ module MicroMicro
84
116
  node_set
85
117
  end
86
118
 
87
- # @param node [Nokogiri::XML::Element]
88
- # @return [Array<String>]
119
+ # Extract root class names from a node.
89
120
  #
90
- # @example
91
121
  # node = Nokogiri::HTML('<div class="h-card">Jason Garber</div>').at_css('div')
92
122
  # MicroMicro::Item.types_from(node) #=> ['h-card']
123
+ #
124
+ # @param node [Nokogiri::XML::Element]
125
+ # @return [Array<String>]
93
126
  def self.types_from(node)
94
- node.classes.select { |token| token.match?(/^h(?:\-[0-9a-z]+)?(?:\-[a-z]+)+$/) }.uniq.sort
127
+ node.classes.select { |token| token.match?(/^h(?:-[0-9a-z]+)?(?:-[a-z]+)+$/) }.uniq.sort
95
128
  end
96
129
 
97
130
  private
@@ -7,27 +7,30 @@ module MicroMicro
7
7
  @node = property.node
8
8
  end
9
9
 
10
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
11
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
12
+ #
10
13
  # @return [String]
11
14
  def value
12
- @value ||= serialized_node.text.strip
15
+ @value ||= begin
16
+ Document.text_content_from(node) do |context|
17
+ context.css('img').each { |img| img.content = " #{img['alt'] || img['src']} " }
18
+ end
19
+ end
20
+ end
21
+
22
+ # @param node [Nokogiri::XML::Element]
23
+ # @param attributes_map [Hash{String => Array}]
24
+ # @return [Array]
25
+ def self.attribute_value_from(node, attributes_map)
26
+ attributes_map.map do |attribute, names|
27
+ node[attribute] if names.include?(node.name) && node[attribute]
28
+ end.compact.first
13
29
  end
14
30
 
15
31
  private
16
32
 
17
33
  attr_reader :node, :property
18
-
19
- # @see microformats2 Parsing Specification sections 1.3.1 and 1.3.4
20
- # @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
21
- # @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
22
- def serialized_node
23
- @serialized_node ||= begin
24
- node.css(*Document.ignored_node_names).unlink
25
-
26
- node.css('img').each { |img| img.content = " #{img['alt'] || Absolutely.to_abs(base: node.document.url, relative: img['src'])} " }
27
-
28
- node
29
- end
30
- end
31
34
  end
32
35
  end
33
36
  end
@@ -1,7 +1,6 @@
1
1
  module MicroMicro
2
2
  module Parsers
3
3
  class DateTimeParser
4
- # @see Value Class Pattern section 4.2
5
4
  # @see http://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
6
5
  #
7
6
  # Regexp pattern matching YYYY-MM-DD and YYY-DDD
@@ -58,12 +57,7 @@ module MicroMicro
58
57
 
59
58
  # @return [String]
60
59
  def value
61
- @value ||= "#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
62
- end
63
-
64
- # @return [Boolean]
65
- def value?
66
- value.present?
60
+ @value ||= "#{normalized_date} #{normalized_time}#{normalized_timezone}".strip if normalized_date || normalized_time || normalized_timezone
67
61
  end
68
62
 
69
63
  # @return [Hash{Symbol => String, nil}]
@@ -1,65 +1,55 @@
1
1
  module MicroMicro
2
2
  module Parsers
3
3
  class DateTimePropertyParser < BasePropertyParser
4
- # @see microformats2 Parsing Specification section 1.3.3
5
- # @see http://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
6
4
  HTML_ATTRIBUTES_MAP = {
7
5
  'datetime' => %w[del ins time],
8
6
  'title' => %w[abbr],
9
7
  'value' => %w[data input]
10
8
  }.freeze
11
9
 
10
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
11
+ #
12
12
  # @return [String]
13
13
  def value
14
- @value ||= begin
15
- return resolved_value if date_time_parser.value?
16
- return attribute_values.first if attribute_values.any?
17
-
18
- super
19
- end
14
+ @value ||= resolved_value || attribute_value || super
20
15
  end
21
16
 
22
17
  private
23
18
 
19
+ # @see http://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
20
+ #
24
21
  # @return [MicroMicro::Parsers::DateTimeParser, nil]
25
- def adopted_date_time
26
- @adopted_date_time ||= begin
27
- collections = property.collection.select { |prop| prop.prefix == 'dt' }.split(property)
22
+ def adopted_date_time_parser
23
+ @adopted_date_time_parser ||= begin
24
+ date_time_siblings = (property.prev_all.reverse + property.next_all).select { |prop| prop.prefix == 'dt' }
28
25
 
29
- (collections.shift.reverse + collections).flatten.map { |prop| DateTimeParser.new(prop.value) }.find(&:normalized_date)
26
+ date_time_siblings.map { |prop| DateTimeParser.new(prop.value) }.find(&:normalized_date)
30
27
  end
31
28
  end
32
29
 
33
- # @return [Array<String>]
34
- def attribute_values
35
- @attribute_values ||= begin
36
- HTML_ATTRIBUTES_MAP.map do |attribute, names|
37
- node[attribute] if names.include?(node.name) && node[attribute]
38
- end.compact
39
- end
30
+ # @return [String, nil]
31
+ def attribute_value
32
+ self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
40
33
  end
41
34
 
42
35
  # @return [MicroMicro::Parsers::DateTimeParser]
43
36
  def date_time_parser
44
- @date_time_parser ||= DateTimeParser.new(value_class_pattern_parser.value)
37
+ @date_time_parser ||= DateTimeParser.new(ValueClassPatternParser.new(node, ' ').value)
45
38
  end
46
39
 
40
+ # @see http://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
41
+ #
47
42
  # @return [Boolean]
48
43
  def imply_date?
49
- date_time_parser.normalized_time && !date_time_parser.normalized_date
44
+ date_time_parser.normalized_time && !date_time_parser.normalized_date && adopted_date_time_parser
50
45
  end
51
46
 
52
47
  # @return [String]
53
48
  def resolved_value
54
- return "#{adopted_date_time.normalized_date} #{date_time_parser.value}" if imply_date? && adopted_date_time
49
+ return "#{adopted_date_time_parser.normalized_date} #{date_time_parser.value}" if imply_date?
55
50
 
56
51
  date_time_parser.value
57
52
  end
58
-
59
- # @return [MicroMicro::Parsers::ValueClassPatternParser]
60
- def value_class_pattern_parser
61
- ValueClassPatternParser.new(node, ' ')
62
- end
63
53
  end
64
54
  end
65
55
  end
@@ -1,28 +1,17 @@
1
1
  module MicroMicro
2
2
  module Parsers
3
3
  class EmbeddedMarkupPropertyParser < BasePropertyParser
4
- HTML_ATTRIBUTE_NAMES = %w[action cite code codebase data href poster src].freeze
5
-
4
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
5
+ #
6
+ # @return [Hash{Symbol => String}]
6
7
  def value
7
8
  @value ||= begin
8
9
  {
9
- html: resolved_node.inner_html.strip,
10
+ html: node.inner_html.strip,
10
11
  value: super
11
12
  }
12
13
  end
13
14
  end
14
-
15
- private
16
-
17
- def resolved_node
18
- @resolved_node ||= begin
19
- HTML_ATTRIBUTE_NAMES.each do |attribute|
20
- node.css("[#{attribute}]").each { |element| element[attribute] = Absolutely.to_abs(base: node.document.url, relative: element[attribute].strip) }
21
- end
22
-
23
- node
24
- end
25
- end
26
15
  end
27
16
  end
28
17
  end
@@ -1,77 +1,38 @@
1
1
  module MicroMicro
2
2
  module Parsers
3
3
  class ImpliedNamePropertyParser < BasePropertyParser
4
- # @see microformats2 Parsing Specification section 1.3.5
5
- # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
6
- HTML_ELEMENTS_MAP = {
7
- 'area' => 'alt',
8
- 'img' => 'alt',
9
- 'abbr' => 'title'
4
+ HTML_ATTRIBUTES_MAP = {
5
+ 'alt' => %w[area img],
6
+ 'title' => %w[abbr]
10
7
  }.freeze
11
8
 
9
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
10
+ #
12
11
  # @return [String]
13
12
  def value
14
- @value ||= unresolved_value.strip
13
+ @value ||= attribute_value || text_content
15
14
  end
16
15
 
17
16
  private
18
17
 
19
- # @return [Array<String>]
20
- def attribute_values
21
- @attribute_values ||= begin
22
- HTML_ELEMENTS_MAP.map do |element, attribute|
23
- node[attribute] if node.matches?("#{element}[#{attribute}]")
24
- end.compact
25
- end
26
- end
27
-
28
- # @return [Nokogiri::XML::Element, nil]
29
- def child_node
30
- @child_node ||= node.at_css('> :only-child')
31
- end
32
-
33
- # @return [Array<String>]
34
- def child_node_attribute_values
35
- @child_node_attribute_values ||= begin
36
- HTML_ELEMENTS_MAP.map do |element, attribute|
37
- child_node[attribute] if child_node.matches?("#{element}[#{attribute}]")
38
- end.compact
39
- end
18
+ # @return [Nokogiri::XML::NodeSet]
19
+ def candidate_nodes
20
+ @candidate_nodes ||= Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
40
21
  end
41
22
 
42
- # @return [Nokogiri::XML::Element, nil]
43
- def grandchild_node
44
- @grandchild_node ||= child_node.at_css('> :only-child')
23
+ # @return [Array]
24
+ def child_nodes
25
+ [node.at_css('> :only-child'), node.at_css('> :only-child > :only-child')].compact.reject { |child_node| Item.item_node?(child_node) }
45
26
  end
46
27
 
47
- # @return [Array<String>]
48
- def grandchild_node_attribute_values
49
- @grandchild_node_attribute_values ||= begin
50
- HTML_ELEMENTS_MAP.map do |element, attribute|
51
- grandchild_node[attribute] if grandchild_node.matches?("#{element}[#{attribute}]")
52
- end.compact
53
- end
54
- end
55
-
56
- # @return [Boolean]
57
- def parse_child_node?
58
- child_node && !Item.item_node?(child_node)
59
- end
60
-
61
- # @return [Boolean]
62
- def parse_grandchild_node?
63
- parse_child_node? && grandchild_node && !Item.item_node?(grandchild_node)
28
+ # @return [String, nil]
29
+ def attribute_value
30
+ candidate_nodes.map { |node| self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP) }.compact.first
64
31
  end
65
32
 
66
33
  # @return [String]
67
- def unresolved_value
68
- return attribute_values.first if attribute_values.any?
69
- return child_node_attribute_values.first if parse_child_node? && child_node_attribute_values.any?
70
- return grandchild_node_attribute_values.first if parse_grandchild_node? && grandchild_node_attribute_values.any?
71
-
72
- serialized_node.css('img').each { |img| img.content = img['alt'] }
73
-
74
- serialized_node.text
34
+ def text_content
35
+ @text_content ||= Document.text_content_from(node) { |context| context.css('img').each { |img| img.content = img['alt'] } }
75
36
  end
76
37
  end
77
38
  end
@@ -1,14 +1,15 @@
1
1
  module MicroMicro
2
2
  module Parsers
3
3
  class ImpliedPhotoPropertyParser < BasePropertyParser
4
- # @see microformats2 Parsing Specification section 1.3.5
5
- # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
6
4
  HTML_ELEMENTS_MAP = {
7
5
  'img' => 'src',
8
6
  'object' => 'data'
9
7
  }.freeze
10
8
 
11
- # @return [String, nil]
9
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
10
+ # @see http://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
11
+ #
12
+ # @return [String, Hash{Symbol => String}, nil]
12
13
  def value
13
14
  @value ||= begin
14
15
  return unless resolved_value
@@ -34,12 +35,7 @@ module MicroMicro
34
35
 
35
36
  # @return [String, nil]
36
37
  def resolved_value
37
- @resolved_value ||= Absolutely.to_abs(base: node.document.url, relative: unresolved_value.strip) if unresolved_value
38
- end
39
-
40
- # @return [String, nil]
41
- def unresolved_value
42
- @unresolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
38
+ @resolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
43
39
  end
44
40
 
45
41
  # @return [Nokogiri::XML::Element, nil]
@@ -1,16 +1,16 @@
1
1
  module MicroMicro
2
2
  module Parsers
3
3
  class ImpliedUrlPropertyParser < BasePropertyParser
4
- # @see microformats2 Parsing Specification section 1.3.5
5
- # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
6
4
  HTML_ELEMENTS_MAP = {
7
5
  'a' => 'href',
8
6
  'area' => 'href'
9
7
  }.freeze
10
8
 
9
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
10
+ #
11
11
  # @return [String, nil]
12
12
  def value
13
- @value ||= resolved_value
13
+ @value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
14
14
  end
15
15
 
16
16
  private
@@ -24,16 +24,6 @@ module MicroMicro
24
24
  end
25
25
  end
26
26
 
27
- # @return [String, nil]
28
- def resolved_value
29
- @resolved_value ||= Absolutely.to_abs(base: node.document.url, relative: unresolved_value.strip) if unresolved_value
30
- end
31
-
32
- # @return [String, nil]
33
- def unresolved_value
34
- @unresolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
35
- end
36
-
37
27
  # @return [Nokogiri::XML::Element, nil]
38
28
  def value_node
39
29
  @value_node ||= begin