micromicro 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.simplecov +2 -0
- data/CHANGELOG.md +10 -0
- data/Gemfile +5 -5
- data/README.md +44 -9
- data/lib/micro_micro/collectible.rb +13 -0
- data/lib/micro_micro/collections/base_collection.rb +11 -12
- data/lib/micro_micro/collections/items_collection.rb +5 -0
- data/lib/micro_micro/collections/properties_collection.rb +9 -7
- data/lib/micro_micro/collections/{relations_collection.rb → relationships_collection.rb} +14 -5
- data/lib/micro_micro/document.rb +94 -14
- data/lib/micro_micro/item.rb +40 -7
- data/lib/micro_micro/parsers/base_property_parser.rb +17 -14
- data/lib/micro_micro/parsers/date_time_parser.rb +1 -7
- data/lib/micro_micro/parsers/date_time_property_parser.rb +17 -27
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +4 -15
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +17 -56
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +5 -9
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +3 -13
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +9 -18
- data/lib/micro_micro/parsers/url_property_parser.rb +11 -27
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +1 -12
- data/lib/micro_micro/property.rb +16 -23
- data/lib/micro_micro/{relation.rb → relationship.rb} +6 -5
- data/lib/micro_micro/version.rb +1 -1
- data/lib/micromicro.rb +13 -3
- data/micromicro.gemspec +1 -1
- metadata +11 -10
data/lib/micro_micro/item.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
class Item
|
3
|
-
|
3
|
+
include Collectible
|
4
4
|
|
5
|
+
# Parse a node for microformats2-encoded data.
|
6
|
+
#
|
5
7
|
# @param node [Nokogiri::XML::Element]
|
6
8
|
def initialize(node)
|
7
9
|
@node = node
|
@@ -11,11 +13,17 @@ module MicroMicro
|
|
11
13
|
properties << implied_url if implied_url?
|
12
14
|
end
|
13
15
|
|
16
|
+
# A collection of child items parsed from the node.
|
17
|
+
#
|
18
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
|
19
|
+
#
|
14
20
|
# @return [MicroMicro::Collections::ItemsCollection]
|
15
21
|
def children
|
16
22
|
@children ||= Collections::ItemsCollection.new(Item.items_from(node.element_children))
|
17
23
|
end
|
18
24
|
|
25
|
+
# The value of the node's `id` attribute, if present.
|
26
|
+
#
|
19
27
|
# @return [String, nil]
|
20
28
|
def id
|
21
29
|
@id ||= node['id']&.strip
|
@@ -26,12 +34,22 @@ module MicroMicro
|
|
26
34
|
format(%(#<#{self.class.name}:%#0x types: #{types.inspect}, properties: #{properties.count}, children: #{children.count}>), object_id)
|
27
35
|
end
|
28
36
|
|
37
|
+
# A collection of plain text properties parsed from the node.
|
38
|
+
#
|
39
|
+
# @return [MicroMicro::Collections::PropertiesCollection]
|
40
|
+
def plain_text_properties
|
41
|
+
@plain_text_properties ||= Collections::PropertiesCollection.new(properties.select { |property| property.prefix == 'p' })
|
42
|
+
end
|
43
|
+
|
44
|
+
# A collection of properties parsed from the node.
|
45
|
+
#
|
29
46
|
# @return [MicroMicro::Collections::PropertiesCollection]
|
30
47
|
def properties
|
31
48
|
@properties ||= Collections::PropertiesCollection.new(Property.properties_from(node.element_children))
|
32
49
|
end
|
33
50
|
|
34
|
-
#
|
51
|
+
# Return the parsed item as a Hash.
|
52
|
+
#
|
35
53
|
# @see http://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
|
36
54
|
#
|
37
55
|
# @return [Hash]
|
@@ -43,28 +61,42 @@ module MicroMicro
|
|
43
61
|
|
44
62
|
hash[:id] = id if id.present?
|
45
63
|
hash[:children] = children.to_a if children.any?
|
46
|
-
hash[:value] = value if value.present?
|
47
64
|
|
48
65
|
hash
|
49
66
|
end
|
50
67
|
|
68
|
+
# An array of root class names parsed from the node's `class` attribute.
|
69
|
+
#
|
51
70
|
# @return [Array<String>]
|
52
71
|
def types
|
53
72
|
@types ||= self.class.types_from(node)
|
54
73
|
end
|
55
74
|
|
75
|
+
# A collection of url properties parsed from the node.
|
76
|
+
#
|
77
|
+
# @return [MicroMicro::Collections::PropertiesCollection]
|
78
|
+
def url_properties
|
79
|
+
@url_properties ||= Collections::PropertiesCollection.new(properties.select { |property| property.prefix == 'u' })
|
80
|
+
end
|
81
|
+
|
82
|
+
# Does this node's `class` attribute contain root class names?
|
83
|
+
#
|
56
84
|
# @param node [Nokogiri::XML::Element]
|
57
85
|
# @return [Boolean]
|
58
86
|
def self.item_node?(node)
|
59
87
|
types_from(node).any?
|
60
88
|
end
|
61
89
|
|
90
|
+
# Extract items from a context.
|
91
|
+
#
|
62
92
|
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
63
93
|
# @return [Array<MicroMicro::Item>]
|
64
94
|
def self.items_from(context)
|
65
95
|
nodes_from(context).map { |node| new(node) }
|
66
96
|
end
|
67
97
|
|
98
|
+
# Extract item nodes from a context.
|
99
|
+
#
|
68
100
|
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
69
101
|
# @param node_set [Nokogiri::XML::NodeSet]
|
70
102
|
# @return [Nokogiri::XML::NodeSet]
|
@@ -84,14 +116,15 @@ module MicroMicro
|
|
84
116
|
node_set
|
85
117
|
end
|
86
118
|
|
87
|
-
#
|
88
|
-
# @return [Array<String>]
|
119
|
+
# Extract root class names from a node.
|
89
120
|
#
|
90
|
-
# @example
|
91
121
|
# node = Nokogiri::HTML('<div class="h-card">Jason Garber</div>').at_css('div')
|
92
122
|
# MicroMicro::Item.types_from(node) #=> ['h-card']
|
123
|
+
#
|
124
|
+
# @param node [Nokogiri::XML::Element]
|
125
|
+
# @return [Array<String>]
|
93
126
|
def self.types_from(node)
|
94
|
-
node.classes.select { |token| token.match?(/^h(
|
127
|
+
node.classes.select { |token| token.match?(/^h(?:-[0-9a-z]+)?(?:-[a-z]+)+$/) }.uniq.sort
|
95
128
|
end
|
96
129
|
|
97
130
|
private
|
@@ -7,27 +7,30 @@ module MicroMicro
|
|
7
7
|
@node = property.node
|
8
8
|
end
|
9
9
|
|
10
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
11
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
12
|
+
#
|
10
13
|
# @return [String]
|
11
14
|
def value
|
12
|
-
@value ||=
|
15
|
+
@value ||= begin
|
16
|
+
Document.text_content_from(node) do |context|
|
17
|
+
context.css('img').each { |img| img.content = " #{img['alt'] || img['src']} " }
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param node [Nokogiri::XML::Element]
|
23
|
+
# @param attributes_map [Hash{String => Array}]
|
24
|
+
# @return [Array]
|
25
|
+
def self.attribute_value_from(node, attributes_map)
|
26
|
+
attributes_map.map do |attribute, names|
|
27
|
+
node[attribute] if names.include?(node.name) && node[attribute]
|
28
|
+
end.compact.first
|
13
29
|
end
|
14
30
|
|
15
31
|
private
|
16
32
|
|
17
33
|
attr_reader :node, :property
|
18
|
-
|
19
|
-
# @see microformats2 Parsing Specification sections 1.3.1 and 1.3.4
|
20
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
21
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
22
|
-
def serialized_node
|
23
|
-
@serialized_node ||= begin
|
24
|
-
node.css(*Document.ignored_node_names).unlink
|
25
|
-
|
26
|
-
node.css('img').each { |img| img.content = " #{img['alt'] || Absolutely.to_abs(base: node.document.url, relative: img['src'])} " }
|
27
|
-
|
28
|
-
node
|
29
|
-
end
|
30
|
-
end
|
31
34
|
end
|
32
35
|
end
|
33
36
|
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class DateTimeParser
|
4
|
-
# @see Value Class Pattern section 4.2
|
5
4
|
# @see http://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
6
5
|
#
|
7
6
|
# Regexp pattern matching YYYY-MM-DD and YYY-DDD
|
@@ -58,12 +57,7 @@ module MicroMicro
|
|
58
57
|
|
59
58
|
# @return [String]
|
60
59
|
def value
|
61
|
-
@value ||= "#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
|
62
|
-
end
|
63
|
-
|
64
|
-
# @return [Boolean]
|
65
|
-
def value?
|
66
|
-
value.present?
|
60
|
+
@value ||= "#{normalized_date} #{normalized_time}#{normalized_timezone}".strip if normalized_date || normalized_time || normalized_timezone
|
67
61
|
end
|
68
62
|
|
69
63
|
# @return [Hash{Symbol => String, nil}]
|
@@ -1,65 +1,55 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class DateTimePropertyParser < BasePropertyParser
|
4
|
-
# @see microformats2 Parsing Specification section 1.3.3
|
5
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
|
6
4
|
HTML_ATTRIBUTES_MAP = {
|
7
5
|
'datetime' => %w[del ins time],
|
8
6
|
'title' => %w[abbr],
|
9
7
|
'value' => %w[data input]
|
10
8
|
}.freeze
|
11
9
|
|
10
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
|
11
|
+
#
|
12
12
|
# @return [String]
|
13
13
|
def value
|
14
|
-
@value ||=
|
15
|
-
return resolved_value if date_time_parser.value?
|
16
|
-
return attribute_values.first if attribute_values.any?
|
17
|
-
|
18
|
-
super
|
19
|
-
end
|
14
|
+
@value ||= resolved_value || attribute_value || super
|
20
15
|
end
|
21
16
|
|
22
17
|
private
|
23
18
|
|
19
|
+
# @see http://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
|
20
|
+
#
|
24
21
|
# @return [MicroMicro::Parsers::DateTimeParser, nil]
|
25
|
-
def
|
26
|
-
@
|
27
|
-
|
22
|
+
def adopted_date_time_parser
|
23
|
+
@adopted_date_time_parser ||= begin
|
24
|
+
date_time_siblings = (property.prev_all.reverse + property.next_all).select { |prop| prop.prefix == 'dt' }
|
28
25
|
|
29
|
-
|
26
|
+
date_time_siblings.map { |prop| DateTimeParser.new(prop.value) }.find(&:normalized_date)
|
30
27
|
end
|
31
28
|
end
|
32
29
|
|
33
|
-
# @return [
|
34
|
-
def
|
35
|
-
|
36
|
-
HTML_ATTRIBUTES_MAP.map do |attribute, names|
|
37
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
38
|
-
end.compact
|
39
|
-
end
|
30
|
+
# @return [String, nil]
|
31
|
+
def attribute_value
|
32
|
+
self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
40
33
|
end
|
41
34
|
|
42
35
|
# @return [MicroMicro::Parsers::DateTimeParser]
|
43
36
|
def date_time_parser
|
44
|
-
@date_time_parser ||= DateTimeParser.new(
|
37
|
+
@date_time_parser ||= DateTimeParser.new(ValueClassPatternParser.new(node, ' ').value)
|
45
38
|
end
|
46
39
|
|
40
|
+
# @see http://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
|
41
|
+
#
|
47
42
|
# @return [Boolean]
|
48
43
|
def imply_date?
|
49
|
-
date_time_parser.normalized_time && !date_time_parser.normalized_date
|
44
|
+
date_time_parser.normalized_time && !date_time_parser.normalized_date && adopted_date_time_parser
|
50
45
|
end
|
51
46
|
|
52
47
|
# @return [String]
|
53
48
|
def resolved_value
|
54
|
-
return "#{
|
49
|
+
return "#{adopted_date_time_parser.normalized_date} #{date_time_parser.value}" if imply_date?
|
55
50
|
|
56
51
|
date_time_parser.value
|
57
52
|
end
|
58
|
-
|
59
|
-
# @return [MicroMicro::Parsers::ValueClassPatternParser]
|
60
|
-
def value_class_pattern_parser
|
61
|
-
ValueClassPatternParser.new(node, ' ')
|
62
|
-
end
|
63
53
|
end
|
64
54
|
end
|
65
55
|
end
|
@@ -1,28 +1,17 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class EmbeddedMarkupPropertyParser < BasePropertyParser
|
4
|
-
|
5
|
-
|
4
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
5
|
+
#
|
6
|
+
# @return [Hash{Symbol => String}]
|
6
7
|
def value
|
7
8
|
@value ||= begin
|
8
9
|
{
|
9
|
-
html:
|
10
|
+
html: node.inner_html.strip,
|
10
11
|
value: super
|
11
12
|
}
|
12
13
|
end
|
13
14
|
end
|
14
|
-
|
15
|
-
private
|
16
|
-
|
17
|
-
def resolved_node
|
18
|
-
@resolved_node ||= begin
|
19
|
-
HTML_ATTRIBUTE_NAMES.each do |attribute|
|
20
|
-
node.css("[#{attribute}]").each { |element| element[attribute] = Absolutely.to_abs(base: node.document.url, relative: element[attribute].strip) }
|
21
|
-
end
|
22
|
-
|
23
|
-
node
|
24
|
-
end
|
25
|
-
end
|
26
15
|
end
|
27
16
|
end
|
28
17
|
end
|
@@ -1,77 +1,38 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class ImpliedNamePropertyParser < BasePropertyParser
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
'area' => 'alt',
|
8
|
-
'img' => 'alt',
|
9
|
-
'abbr' => 'title'
|
4
|
+
HTML_ATTRIBUTES_MAP = {
|
5
|
+
'alt' => %w[area img],
|
6
|
+
'title' => %w[abbr]
|
10
7
|
}.freeze
|
11
8
|
|
9
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
10
|
+
#
|
12
11
|
# @return [String]
|
13
12
|
def value
|
14
|
-
@value ||=
|
13
|
+
@value ||= attribute_value || text_content
|
15
14
|
end
|
16
15
|
|
17
16
|
private
|
18
17
|
|
19
|
-
# @return [
|
20
|
-
def
|
21
|
-
@
|
22
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
23
|
-
node[attribute] if node.matches?("#{element}[#{attribute}]")
|
24
|
-
end.compact
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
# @return [Nokogiri::XML::Element, nil]
|
29
|
-
def child_node
|
30
|
-
@child_node ||= node.at_css('> :only-child')
|
31
|
-
end
|
32
|
-
|
33
|
-
# @return [Array<String>]
|
34
|
-
def child_node_attribute_values
|
35
|
-
@child_node_attribute_values ||= begin
|
36
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
37
|
-
child_node[attribute] if child_node.matches?("#{element}[#{attribute}]")
|
38
|
-
end.compact
|
39
|
-
end
|
18
|
+
# @return [Nokogiri::XML::NodeSet]
|
19
|
+
def candidate_nodes
|
20
|
+
@candidate_nodes ||= Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
40
21
|
end
|
41
22
|
|
42
|
-
# @return [
|
43
|
-
def
|
44
|
-
|
23
|
+
# @return [Array]
|
24
|
+
def child_nodes
|
25
|
+
[node.at_css('> :only-child'), node.at_css('> :only-child > :only-child')].compact.reject { |child_node| Item.item_node?(child_node) }
|
45
26
|
end
|
46
27
|
|
47
|
-
# @return [
|
48
|
-
def
|
49
|
-
|
50
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
51
|
-
grandchild_node[attribute] if grandchild_node.matches?("#{element}[#{attribute}]")
|
52
|
-
end.compact
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
# @return [Boolean]
|
57
|
-
def parse_child_node?
|
58
|
-
child_node && !Item.item_node?(child_node)
|
59
|
-
end
|
60
|
-
|
61
|
-
# @return [Boolean]
|
62
|
-
def parse_grandchild_node?
|
63
|
-
parse_child_node? && grandchild_node && !Item.item_node?(grandchild_node)
|
28
|
+
# @return [String, nil]
|
29
|
+
def attribute_value
|
30
|
+
candidate_nodes.map { |node| self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP) }.compact.first
|
64
31
|
end
|
65
32
|
|
66
33
|
# @return [String]
|
67
|
-
def
|
68
|
-
|
69
|
-
return child_node_attribute_values.first if parse_child_node? && child_node_attribute_values.any?
|
70
|
-
return grandchild_node_attribute_values.first if parse_grandchild_node? && grandchild_node_attribute_values.any?
|
71
|
-
|
72
|
-
serialized_node.css('img').each { |img| img.content = img['alt'] }
|
73
|
-
|
74
|
-
serialized_node.text
|
34
|
+
def text_content
|
35
|
+
@text_content ||= Document.text_content_from(node) { |context| context.css('img').each { |img| img.content = img['alt'] } }
|
75
36
|
end
|
76
37
|
end
|
77
38
|
end
|
@@ -1,14 +1,15 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class ImpliedPhotoPropertyParser < BasePropertyParser
|
4
|
-
# @see microformats2 Parsing Specification section 1.3.5
|
5
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
6
4
|
HTML_ELEMENTS_MAP = {
|
7
5
|
'img' => 'src',
|
8
6
|
'object' => 'data'
|
9
7
|
}.freeze
|
10
8
|
|
11
|
-
# @
|
9
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
10
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
|
11
|
+
#
|
12
|
+
# @return [String, Hash{Symbol => String}, nil]
|
12
13
|
def value
|
13
14
|
@value ||= begin
|
14
15
|
return unless resolved_value
|
@@ -34,12 +35,7 @@ module MicroMicro
|
|
34
35
|
|
35
36
|
# @return [String, nil]
|
36
37
|
def resolved_value
|
37
|
-
@resolved_value ||=
|
38
|
-
end
|
39
|
-
|
40
|
-
# @return [String, nil]
|
41
|
-
def unresolved_value
|
42
|
-
@unresolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
|
38
|
+
@resolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
|
43
39
|
end
|
44
40
|
|
45
41
|
# @return [Nokogiri::XML::Element, nil]
|
@@ -1,16 +1,16 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class ImpliedUrlPropertyParser < BasePropertyParser
|
4
|
-
# @see microformats2 Parsing Specification section 1.3.5
|
5
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
6
4
|
HTML_ELEMENTS_MAP = {
|
7
5
|
'a' => 'href',
|
8
6
|
'area' => 'href'
|
9
7
|
}.freeze
|
10
8
|
|
9
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
10
|
+
#
|
11
11
|
# @return [String, nil]
|
12
12
|
def value
|
13
|
-
@value ||=
|
13
|
+
@value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
|
14
14
|
end
|
15
15
|
|
16
16
|
private
|
@@ -24,16 +24,6 @@ module MicroMicro
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
# @return [String, nil]
|
28
|
-
def resolved_value
|
29
|
-
@resolved_value ||= Absolutely.to_abs(base: node.document.url, relative: unresolved_value.strip) if unresolved_value
|
30
|
-
end
|
31
|
-
|
32
|
-
# @return [String, nil]
|
33
|
-
def unresolved_value
|
34
|
-
@unresolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
|
35
|
-
end
|
36
|
-
|
37
27
|
# @return [Nokogiri::XML::Element, nil]
|
38
28
|
def value_node
|
39
29
|
@value_node ||= begin
|