micromicro 0.1.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.simplecov +2 -0
- data/CHANGELOG.md +10 -0
- data/Gemfile +5 -5
- data/README.md +44 -9
- data/lib/micro_micro/collectible.rb +13 -0
- data/lib/micro_micro/collections/base_collection.rb +11 -12
- data/lib/micro_micro/collections/items_collection.rb +5 -0
- data/lib/micro_micro/collections/properties_collection.rb +9 -7
- data/lib/micro_micro/collections/{relations_collection.rb → relationships_collection.rb} +14 -5
- data/lib/micro_micro/document.rb +94 -14
- data/lib/micro_micro/item.rb +40 -7
- data/lib/micro_micro/parsers/base_property_parser.rb +17 -14
- data/lib/micro_micro/parsers/date_time_parser.rb +1 -7
- data/lib/micro_micro/parsers/date_time_property_parser.rb +17 -27
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +4 -15
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +17 -56
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +5 -9
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +3 -13
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +9 -18
- data/lib/micro_micro/parsers/url_property_parser.rb +11 -27
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +1 -12
- data/lib/micro_micro/property.rb +16 -23
- data/lib/micro_micro/{relation.rb → relationship.rb} +6 -5
- data/lib/micro_micro/version.rb +1 -1
- data/lib/micromicro.rb +13 -3
- data/micromicro.gemspec +1 -1
- metadata +11 -10
data/lib/micro_micro/item.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
class Item
|
3
|
-
|
3
|
+
include Collectible
|
4
4
|
|
5
|
+
# Parse a node for microformats2-encoded data.
|
6
|
+
#
|
5
7
|
# @param node [Nokogiri::XML::Element]
|
6
8
|
def initialize(node)
|
7
9
|
@node = node
|
@@ -11,11 +13,17 @@ module MicroMicro
|
|
11
13
|
properties << implied_url if implied_url?
|
12
14
|
end
|
13
15
|
|
16
|
+
# A collection of child items parsed from the node.
|
17
|
+
#
|
18
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
|
19
|
+
#
|
14
20
|
# @return [MicroMicro::Collections::ItemsCollection]
|
15
21
|
def children
|
16
22
|
@children ||= Collections::ItemsCollection.new(Item.items_from(node.element_children))
|
17
23
|
end
|
18
24
|
|
25
|
+
# The value of the node's `id` attribute, if present.
|
26
|
+
#
|
19
27
|
# @return [String, nil]
|
20
28
|
def id
|
21
29
|
@id ||= node['id']&.strip
|
@@ -26,12 +34,22 @@ module MicroMicro
|
|
26
34
|
format(%(#<#{self.class.name}:%#0x types: #{types.inspect}, properties: #{properties.count}, children: #{children.count}>), object_id)
|
27
35
|
end
|
28
36
|
|
37
|
+
# A collection of plain text properties parsed from the node.
|
38
|
+
#
|
39
|
+
# @return [MicroMicro::Collections::PropertiesCollection]
|
40
|
+
def plain_text_properties
|
41
|
+
@plain_text_properties ||= Collections::PropertiesCollection.new(properties.select { |property| property.prefix == 'p' })
|
42
|
+
end
|
43
|
+
|
44
|
+
# A collection of properties parsed from the node.
|
45
|
+
#
|
29
46
|
# @return [MicroMicro::Collections::PropertiesCollection]
|
30
47
|
def properties
|
31
48
|
@properties ||= Collections::PropertiesCollection.new(Property.properties_from(node.element_children))
|
32
49
|
end
|
33
50
|
|
34
|
-
#
|
51
|
+
# Return the parsed item as a Hash.
|
52
|
+
#
|
35
53
|
# @see http://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
|
36
54
|
#
|
37
55
|
# @return [Hash]
|
@@ -43,28 +61,42 @@ module MicroMicro
|
|
43
61
|
|
44
62
|
hash[:id] = id if id.present?
|
45
63
|
hash[:children] = children.to_a if children.any?
|
46
|
-
hash[:value] = value if value.present?
|
47
64
|
|
48
65
|
hash
|
49
66
|
end
|
50
67
|
|
68
|
+
# An array of root class names parsed from the node's `class` attribute.
|
69
|
+
#
|
51
70
|
# @return [Array<String>]
|
52
71
|
def types
|
53
72
|
@types ||= self.class.types_from(node)
|
54
73
|
end
|
55
74
|
|
75
|
+
# A collection of url properties parsed from the node.
|
76
|
+
#
|
77
|
+
# @return [MicroMicro::Collections::PropertiesCollection]
|
78
|
+
def url_properties
|
79
|
+
@url_properties ||= Collections::PropertiesCollection.new(properties.select { |property| property.prefix == 'u' })
|
80
|
+
end
|
81
|
+
|
82
|
+
# Does this node's `class` attribute contain root class names?
|
83
|
+
#
|
56
84
|
# @param node [Nokogiri::XML::Element]
|
57
85
|
# @return [Boolean]
|
58
86
|
def self.item_node?(node)
|
59
87
|
types_from(node).any?
|
60
88
|
end
|
61
89
|
|
90
|
+
# Extract items from a context.
|
91
|
+
#
|
62
92
|
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
63
93
|
# @return [Array<MicroMicro::Item>]
|
64
94
|
def self.items_from(context)
|
65
95
|
nodes_from(context).map { |node| new(node) }
|
66
96
|
end
|
67
97
|
|
98
|
+
# Extract item nodes from a context.
|
99
|
+
#
|
68
100
|
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
69
101
|
# @param node_set [Nokogiri::XML::NodeSet]
|
70
102
|
# @return [Nokogiri::XML::NodeSet]
|
@@ -84,14 +116,15 @@ module MicroMicro
|
|
84
116
|
node_set
|
85
117
|
end
|
86
118
|
|
87
|
-
#
|
88
|
-
# @return [Array<String>]
|
119
|
+
# Extract root class names from a node.
|
89
120
|
#
|
90
|
-
# @example
|
91
121
|
# node = Nokogiri::HTML('<div class="h-card">Jason Garber</div>').at_css('div')
|
92
122
|
# MicroMicro::Item.types_from(node) #=> ['h-card']
|
123
|
+
#
|
124
|
+
# @param node [Nokogiri::XML::Element]
|
125
|
+
# @return [Array<String>]
|
93
126
|
def self.types_from(node)
|
94
|
-
node.classes.select { |token| token.match?(/^h(
|
127
|
+
node.classes.select { |token| token.match?(/^h(?:-[0-9a-z]+)?(?:-[a-z]+)+$/) }.uniq.sort
|
95
128
|
end
|
96
129
|
|
97
130
|
private
|
@@ -7,27 +7,30 @@ module MicroMicro
|
|
7
7
|
@node = property.node
|
8
8
|
end
|
9
9
|
|
10
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
11
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
12
|
+
#
|
10
13
|
# @return [String]
|
11
14
|
def value
|
12
|
-
@value ||=
|
15
|
+
@value ||= begin
|
16
|
+
Document.text_content_from(node) do |context|
|
17
|
+
context.css('img').each { |img| img.content = " #{img['alt'] || img['src']} " }
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param node [Nokogiri::XML::Element]
|
23
|
+
# @param attributes_map [Hash{String => Array}]
|
24
|
+
# @return [Array]
|
25
|
+
def self.attribute_value_from(node, attributes_map)
|
26
|
+
attributes_map.map do |attribute, names|
|
27
|
+
node[attribute] if names.include?(node.name) && node[attribute]
|
28
|
+
end.compact.first
|
13
29
|
end
|
14
30
|
|
15
31
|
private
|
16
32
|
|
17
33
|
attr_reader :node, :property
|
18
|
-
|
19
|
-
# @see microformats2 Parsing Specification sections 1.3.1 and 1.3.4
|
20
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
21
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
22
|
-
def serialized_node
|
23
|
-
@serialized_node ||= begin
|
24
|
-
node.css(*Document.ignored_node_names).unlink
|
25
|
-
|
26
|
-
node.css('img').each { |img| img.content = " #{img['alt'] || Absolutely.to_abs(base: node.document.url, relative: img['src'])} " }
|
27
|
-
|
28
|
-
node
|
29
|
-
end
|
30
|
-
end
|
31
34
|
end
|
32
35
|
end
|
33
36
|
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class DateTimeParser
|
4
|
-
# @see Value Class Pattern section 4.2
|
5
4
|
# @see http://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
6
5
|
#
|
7
6
|
# Regexp pattern matching YYYY-MM-DD and YYY-DDD
|
@@ -58,12 +57,7 @@ module MicroMicro
|
|
58
57
|
|
59
58
|
# @return [String]
|
60
59
|
def value
|
61
|
-
@value ||= "#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
|
62
|
-
end
|
63
|
-
|
64
|
-
# @return [Boolean]
|
65
|
-
def value?
|
66
|
-
value.present?
|
60
|
+
@value ||= "#{normalized_date} #{normalized_time}#{normalized_timezone}".strip if normalized_date || normalized_time || normalized_timezone
|
67
61
|
end
|
68
62
|
|
69
63
|
# @return [Hash{Symbol => String, nil}]
|
@@ -1,65 +1,55 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class DateTimePropertyParser < BasePropertyParser
|
4
|
-
# @see microformats2 Parsing Specification section 1.3.3
|
5
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
|
6
4
|
HTML_ATTRIBUTES_MAP = {
|
7
5
|
'datetime' => %w[del ins time],
|
8
6
|
'title' => %w[abbr],
|
9
7
|
'value' => %w[data input]
|
10
8
|
}.freeze
|
11
9
|
|
10
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
|
11
|
+
#
|
12
12
|
# @return [String]
|
13
13
|
def value
|
14
|
-
@value ||=
|
15
|
-
return resolved_value if date_time_parser.value?
|
16
|
-
return attribute_values.first if attribute_values.any?
|
17
|
-
|
18
|
-
super
|
19
|
-
end
|
14
|
+
@value ||= resolved_value || attribute_value || super
|
20
15
|
end
|
21
16
|
|
22
17
|
private
|
23
18
|
|
19
|
+
# @see http://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
|
20
|
+
#
|
24
21
|
# @return [MicroMicro::Parsers::DateTimeParser, nil]
|
25
|
-
def
|
26
|
-
@
|
27
|
-
|
22
|
+
def adopted_date_time_parser
|
23
|
+
@adopted_date_time_parser ||= begin
|
24
|
+
date_time_siblings = (property.prev_all.reverse + property.next_all).select { |prop| prop.prefix == 'dt' }
|
28
25
|
|
29
|
-
|
26
|
+
date_time_siblings.map { |prop| DateTimeParser.new(prop.value) }.find(&:normalized_date)
|
30
27
|
end
|
31
28
|
end
|
32
29
|
|
33
|
-
# @return [
|
34
|
-
def
|
35
|
-
|
36
|
-
HTML_ATTRIBUTES_MAP.map do |attribute, names|
|
37
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
38
|
-
end.compact
|
39
|
-
end
|
30
|
+
# @return [String, nil]
|
31
|
+
def attribute_value
|
32
|
+
self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
40
33
|
end
|
41
34
|
|
42
35
|
# @return [MicroMicro::Parsers::DateTimeParser]
|
43
36
|
def date_time_parser
|
44
|
-
@date_time_parser ||= DateTimeParser.new(
|
37
|
+
@date_time_parser ||= DateTimeParser.new(ValueClassPatternParser.new(node, ' ').value)
|
45
38
|
end
|
46
39
|
|
40
|
+
# @see http://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
|
41
|
+
#
|
47
42
|
# @return [Boolean]
|
48
43
|
def imply_date?
|
49
|
-
date_time_parser.normalized_time && !date_time_parser.normalized_date
|
44
|
+
date_time_parser.normalized_time && !date_time_parser.normalized_date && adopted_date_time_parser
|
50
45
|
end
|
51
46
|
|
52
47
|
# @return [String]
|
53
48
|
def resolved_value
|
54
|
-
return "#{
|
49
|
+
return "#{adopted_date_time_parser.normalized_date} #{date_time_parser.value}" if imply_date?
|
55
50
|
|
56
51
|
date_time_parser.value
|
57
52
|
end
|
58
|
-
|
59
|
-
# @return [MicroMicro::Parsers::ValueClassPatternParser]
|
60
|
-
def value_class_pattern_parser
|
61
|
-
ValueClassPatternParser.new(node, ' ')
|
62
|
-
end
|
63
53
|
end
|
64
54
|
end
|
65
55
|
end
|
@@ -1,28 +1,17 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class EmbeddedMarkupPropertyParser < BasePropertyParser
|
4
|
-
|
5
|
-
|
4
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
5
|
+
#
|
6
|
+
# @return [Hash{Symbol => String}]
|
6
7
|
def value
|
7
8
|
@value ||= begin
|
8
9
|
{
|
9
|
-
html:
|
10
|
+
html: node.inner_html.strip,
|
10
11
|
value: super
|
11
12
|
}
|
12
13
|
end
|
13
14
|
end
|
14
|
-
|
15
|
-
private
|
16
|
-
|
17
|
-
def resolved_node
|
18
|
-
@resolved_node ||= begin
|
19
|
-
HTML_ATTRIBUTE_NAMES.each do |attribute|
|
20
|
-
node.css("[#{attribute}]").each { |element| element[attribute] = Absolutely.to_abs(base: node.document.url, relative: element[attribute].strip) }
|
21
|
-
end
|
22
|
-
|
23
|
-
node
|
24
|
-
end
|
25
|
-
end
|
26
15
|
end
|
27
16
|
end
|
28
17
|
end
|
@@ -1,77 +1,38 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class ImpliedNamePropertyParser < BasePropertyParser
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
'area' => 'alt',
|
8
|
-
'img' => 'alt',
|
9
|
-
'abbr' => 'title'
|
4
|
+
HTML_ATTRIBUTES_MAP = {
|
5
|
+
'alt' => %w[area img],
|
6
|
+
'title' => %w[abbr]
|
10
7
|
}.freeze
|
11
8
|
|
9
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
10
|
+
#
|
12
11
|
# @return [String]
|
13
12
|
def value
|
14
|
-
@value ||=
|
13
|
+
@value ||= attribute_value || text_content
|
15
14
|
end
|
16
15
|
|
17
16
|
private
|
18
17
|
|
19
|
-
# @return [
|
20
|
-
def
|
21
|
-
@
|
22
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
23
|
-
node[attribute] if node.matches?("#{element}[#{attribute}]")
|
24
|
-
end.compact
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
# @return [Nokogiri::XML::Element, nil]
|
29
|
-
def child_node
|
30
|
-
@child_node ||= node.at_css('> :only-child')
|
31
|
-
end
|
32
|
-
|
33
|
-
# @return [Array<String>]
|
34
|
-
def child_node_attribute_values
|
35
|
-
@child_node_attribute_values ||= begin
|
36
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
37
|
-
child_node[attribute] if child_node.matches?("#{element}[#{attribute}]")
|
38
|
-
end.compact
|
39
|
-
end
|
18
|
+
# @return [Nokogiri::XML::NodeSet]
|
19
|
+
def candidate_nodes
|
20
|
+
@candidate_nodes ||= Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
40
21
|
end
|
41
22
|
|
42
|
-
# @return [
|
43
|
-
def
|
44
|
-
|
23
|
+
# @return [Array]
|
24
|
+
def child_nodes
|
25
|
+
[node.at_css('> :only-child'), node.at_css('> :only-child > :only-child')].compact.reject { |child_node| Item.item_node?(child_node) }
|
45
26
|
end
|
46
27
|
|
47
|
-
# @return [
|
48
|
-
def
|
49
|
-
|
50
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
51
|
-
grandchild_node[attribute] if grandchild_node.matches?("#{element}[#{attribute}]")
|
52
|
-
end.compact
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
# @return [Boolean]
|
57
|
-
def parse_child_node?
|
58
|
-
child_node && !Item.item_node?(child_node)
|
59
|
-
end
|
60
|
-
|
61
|
-
# @return [Boolean]
|
62
|
-
def parse_grandchild_node?
|
63
|
-
parse_child_node? && grandchild_node && !Item.item_node?(grandchild_node)
|
28
|
+
# @return [String, nil]
|
29
|
+
def attribute_value
|
30
|
+
candidate_nodes.map { |node| self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP) }.compact.first
|
64
31
|
end
|
65
32
|
|
66
33
|
# @return [String]
|
67
|
-
def
|
68
|
-
|
69
|
-
return child_node_attribute_values.first if parse_child_node? && child_node_attribute_values.any?
|
70
|
-
return grandchild_node_attribute_values.first if parse_grandchild_node? && grandchild_node_attribute_values.any?
|
71
|
-
|
72
|
-
serialized_node.css('img').each { |img| img.content = img['alt'] }
|
73
|
-
|
74
|
-
serialized_node.text
|
34
|
+
def text_content
|
35
|
+
@text_content ||= Document.text_content_from(node) { |context| context.css('img').each { |img| img.content = img['alt'] } }
|
75
36
|
end
|
76
37
|
end
|
77
38
|
end
|
@@ -1,14 +1,15 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class ImpliedPhotoPropertyParser < BasePropertyParser
|
4
|
-
# @see microformats2 Parsing Specification section 1.3.5
|
5
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
6
4
|
HTML_ELEMENTS_MAP = {
|
7
5
|
'img' => 'src',
|
8
6
|
'object' => 'data'
|
9
7
|
}.freeze
|
10
8
|
|
11
|
-
# @
|
9
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
10
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
|
11
|
+
#
|
12
|
+
# @return [String, Hash{Symbol => String}, nil]
|
12
13
|
def value
|
13
14
|
@value ||= begin
|
14
15
|
return unless resolved_value
|
@@ -34,12 +35,7 @@ module MicroMicro
|
|
34
35
|
|
35
36
|
# @return [String, nil]
|
36
37
|
def resolved_value
|
37
|
-
@resolved_value ||=
|
38
|
-
end
|
39
|
-
|
40
|
-
# @return [String, nil]
|
41
|
-
def unresolved_value
|
42
|
-
@unresolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
|
38
|
+
@resolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
|
43
39
|
end
|
44
40
|
|
45
41
|
# @return [Nokogiri::XML::Element, nil]
|
@@ -1,16 +1,16 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class ImpliedUrlPropertyParser < BasePropertyParser
|
4
|
-
# @see microformats2 Parsing Specification section 1.3.5
|
5
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
6
4
|
HTML_ELEMENTS_MAP = {
|
7
5
|
'a' => 'href',
|
8
6
|
'area' => 'href'
|
9
7
|
}.freeze
|
10
8
|
|
9
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
10
|
+
#
|
11
11
|
# @return [String, nil]
|
12
12
|
def value
|
13
|
-
@value ||=
|
13
|
+
@value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
|
14
14
|
end
|
15
15
|
|
16
16
|
private
|
@@ -24,16 +24,6 @@ module MicroMicro
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
# @return [String, nil]
|
28
|
-
def resolved_value
|
29
|
-
@resolved_value ||= Absolutely.to_abs(base: node.document.url, relative: unresolved_value.strip) if unresolved_value
|
30
|
-
end
|
31
|
-
|
32
|
-
# @return [String, nil]
|
33
|
-
def unresolved_value
|
34
|
-
@unresolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
|
35
|
-
end
|
36
|
-
|
37
27
|
# @return [Nokogiri::XML::Element, nil]
|
38
28
|
def value_node
|
39
29
|
@value_node ||= begin
|