micromicro 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -0
- data/CONTRIBUTING.md +2 -2
- data/README.md +21 -20
- data/lib/micro_micro/collectible.rb +2 -0
- data/lib/micro_micro/collections/base_collection.rb +7 -1
- data/lib/micro_micro/collections/items_collection.rb +3 -1
- data/lib/micro_micro/collections/properties_collection.rb +12 -0
- data/lib/micro_micro/collections/relationships_collection.rb +11 -10
- data/lib/micro_micro/document.rb +11 -99
- data/lib/micro_micro/helpers.rb +88 -0
- data/lib/micro_micro/implied_property.rb +2 -0
- data/lib/micro_micro/item.rb +57 -62
- data/lib/micro_micro/parsers/base_implied_property_parser.rb +29 -0
- data/lib/micro_micro/parsers/base_property_parser.rb +6 -14
- data/lib/micro_micro/parsers/date_time_parser.rb +60 -25
- data/lib/micro_micro/parsers/date_time_property_parser.rb +10 -9
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +4 -3
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +15 -17
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +21 -45
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +12 -31
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +4 -2
- data/lib/micro_micro/parsers/url_property_parser.rb +22 -14
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +29 -44
- data/lib/micro_micro/property.rb +68 -56
- data/lib/micro_micro/relationship.rb +15 -13
- data/lib/micro_micro/version.rb +3 -1
- data/lib/micromicro.rb +31 -26
- data/micromicro.gemspec +14 -9
- metadata +23 -32
- data/.editorconfig +0 -14
- data/.gitignore +0 -34
- data/.gitmodules +0 -3
- data/.reek.yml +0 -8
- data/.rspec +0 -2
- data/.rubocop +0 -3
- data/.rubocop.yml +0 -25
- data/.ruby-version +0 -1
- data/.simplecov +0 -13
- data/.travis.yml +0 -19
- data/Gemfile +0 -14
- data/Rakefile +0 -18
data/lib/micro_micro/item.rb
CHANGED
@@ -1,7 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
class Item
|
3
5
|
include Collectible
|
4
6
|
|
7
|
+
# Extract items from a context.
|
8
|
+
#
|
9
|
+
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
10
|
+
# @return [Array<MicroMicro::Item>]
|
11
|
+
def self.from_context(context)
|
12
|
+
node_set_from(context).map { |node| new(node) }
|
13
|
+
end
|
14
|
+
|
15
|
+
# Extract item nodes from a context.
|
16
|
+
#
|
17
|
+
# @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
18
|
+
# @param node_set [Nokogiri::XML::NodeSet]
|
19
|
+
# @return [Nokogiri::XML::NodeSet]
|
20
|
+
# rubocop:disable Metrics
|
21
|
+
def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
22
|
+
context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
23
|
+
|
24
|
+
if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
|
25
|
+
if Helpers.item_node?(context)
|
26
|
+
node_set << context unless Helpers.item_nodes?(context.ancestors) && Helpers.property_node?(context)
|
27
|
+
else
|
28
|
+
node_set_from(context.element_children, node_set)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
node_set
|
33
|
+
end
|
34
|
+
# rubocop:enable Metrics
|
35
|
+
|
5
36
|
# Parse a node for microformats2-encoded data.
|
6
37
|
#
|
7
38
|
# @param node [Nokogiri::XML::Element]
|
@@ -15,11 +46,11 @@ module MicroMicro
|
|
15
46
|
|
16
47
|
# A collection of child items parsed from the node.
|
17
48
|
#
|
18
|
-
# @see
|
49
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
|
19
50
|
#
|
20
51
|
# @return [MicroMicro::Collections::ItemsCollection]
|
21
52
|
def children
|
22
|
-
@children ||= Collections::ItemsCollection.new(
|
53
|
+
@children ||= Collections::ItemsCollection.new(self.class.from_context(node.element_children))
|
23
54
|
end
|
24
55
|
|
25
56
|
# The value of the node's `id` attribute, if present.
|
@@ -29,28 +60,33 @@ module MicroMicro
|
|
29
60
|
@id ||= node['id']&.strip
|
30
61
|
end
|
31
62
|
|
63
|
+
# :nocov:
|
32
64
|
# @return [String]
|
33
65
|
def inspect
|
34
|
-
|
66
|
+
"#<#{self.class}:#{format('%#0x', object_id)} " \
|
67
|
+
"types: #{types.inspect}, " \
|
68
|
+
"properties: #{properties.count}, " \
|
69
|
+
"children: #{children.count}>"
|
35
70
|
end
|
71
|
+
# :nocov:
|
36
72
|
|
37
73
|
# A collection of plain text properties parsed from the node.
|
38
74
|
#
|
39
75
|
# @return [MicroMicro::Collections::PropertiesCollection]
|
40
76
|
def plain_text_properties
|
41
|
-
@plain_text_properties ||=
|
77
|
+
@plain_text_properties ||= properties.plain_text_properties
|
42
78
|
end
|
43
79
|
|
44
80
|
# A collection of properties parsed from the node.
|
45
81
|
#
|
46
82
|
# @return [MicroMicro::Collections::PropertiesCollection]
|
47
83
|
def properties
|
48
|
-
@properties ||= Collections::PropertiesCollection.new(Property.
|
84
|
+
@properties ||= Collections::PropertiesCollection.new(Property.from_context(node.element_children))
|
49
85
|
end
|
50
86
|
|
51
87
|
# Return the parsed item as a Hash.
|
52
88
|
#
|
53
|
-
# @see
|
89
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
|
54
90
|
#
|
55
91
|
# @return [Hash]
|
56
92
|
def to_h
|
@@ -69,62 +105,14 @@ module MicroMicro
|
|
69
105
|
#
|
70
106
|
# @return [Array<String>]
|
71
107
|
def types
|
72
|
-
@types ||=
|
108
|
+
@types ||= Helpers.root_class_names_from(node)
|
73
109
|
end
|
74
110
|
|
75
111
|
# A collection of url properties parsed from the node.
|
76
112
|
#
|
77
113
|
# @return [MicroMicro::Collections::PropertiesCollection]
|
78
114
|
def url_properties
|
79
|
-
@url_properties ||=
|
80
|
-
end
|
81
|
-
|
82
|
-
# Does this node's `class` attribute contain root class names?
|
83
|
-
#
|
84
|
-
# @param node [Nokogiri::XML::Element]
|
85
|
-
# @return [Boolean]
|
86
|
-
def self.item_node?(node)
|
87
|
-
types_from(node).any?
|
88
|
-
end
|
89
|
-
|
90
|
-
# Extract items from a context.
|
91
|
-
#
|
92
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
93
|
-
# @return [Array<MicroMicro::Item>]
|
94
|
-
def self.items_from(context)
|
95
|
-
nodes_from(context).map { |node| new(node) }
|
96
|
-
end
|
97
|
-
|
98
|
-
# Extract item nodes from a context.
|
99
|
-
#
|
100
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
101
|
-
# @param node_set [Nokogiri::XML::NodeSet]
|
102
|
-
# @return [Nokogiri::XML::NodeSet]
|
103
|
-
def self.nodes_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
104
|
-
return nodes_from(context.element_children, node_set) if context.is_a?(Nokogiri::HTML::Document)
|
105
|
-
|
106
|
-
context.each { |node| nodes_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
107
|
-
|
108
|
-
if context.is_a?(Nokogiri::XML::Element) && !Document.ignore_node?(context)
|
109
|
-
if item_node?(context)
|
110
|
-
node_set << context unless Property.property_node?(context)
|
111
|
-
else
|
112
|
-
nodes_from(context.element_children, node_set)
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
node_set
|
117
|
-
end
|
118
|
-
|
119
|
-
# Extract root class names from a node.
|
120
|
-
#
|
121
|
-
# node = Nokogiri::HTML('<div class="h-card">Jason Garber</div>').at_css('div')
|
122
|
-
# MicroMicro::Item.types_from(node) #=> ['h-card']
|
123
|
-
#
|
124
|
-
# @param node [Nokogiri::XML::Element]
|
125
|
-
# @return [Array<String>]
|
126
|
-
def self.types_from(node)
|
127
|
-
node.classes.select { |token| token.match?(/^h(?:-[0-9a-z]+)?(?:-[a-z]+)+$/) }.uniq.sort
|
115
|
+
@url_properties ||= properties.url_properties
|
128
116
|
end
|
129
117
|
|
130
118
|
private
|
@@ -133,7 +121,7 @@ module MicroMicro
|
|
133
121
|
|
134
122
|
# @return [MicroMicro::ImpliedProperty]
|
135
123
|
def implied_name
|
136
|
-
@implied_name ||= ImpliedProperty.new(node,
|
124
|
+
@implied_name ||= ImpliedProperty.new(node, 'p-name')
|
137
125
|
end
|
138
126
|
|
139
127
|
# @return [Boolean]
|
@@ -143,7 +131,7 @@ module MicroMicro
|
|
143
131
|
|
144
132
|
# @return [MicroMicro::ImpliedProperty]
|
145
133
|
def implied_photo
|
146
|
-
@implied_photo ||= ImpliedProperty.new(node,
|
134
|
+
@implied_photo ||= ImpliedProperty.new(node, 'u-photo')
|
147
135
|
end
|
148
136
|
|
149
137
|
# @return [Boolean]
|
@@ -153,7 +141,7 @@ module MicroMicro
|
|
153
141
|
|
154
142
|
# @return [MicroMicro::ImpliedProperty]
|
155
143
|
def implied_url
|
156
|
-
@implied_url ||= ImpliedProperty.new(node,
|
144
|
+
@implied_url ||= ImpliedProperty.new(node, 'u-url')
|
157
145
|
end
|
158
146
|
|
159
147
|
# @return [Boolean]
|
@@ -163,17 +151,24 @@ module MicroMicro
|
|
163
151
|
|
164
152
|
# @return [Boolean]
|
165
153
|
def imply_name?
|
166
|
-
properties.none?
|
154
|
+
properties.names.none?('name') &&
|
155
|
+
properties.none?(&:embedded_markup_property?) &&
|
156
|
+
properties.none?(&:plain_text_property?) &&
|
157
|
+
!nested_items?
|
167
158
|
end
|
168
159
|
|
169
160
|
# @return [Boolean]
|
170
161
|
def imply_photo?
|
171
|
-
properties.none?
|
162
|
+
properties.names.none?('photo') &&
|
163
|
+
properties.reject(&:implied?).none?(&:url_property?) &&
|
164
|
+
!nested_items?
|
172
165
|
end
|
173
166
|
|
174
167
|
# @return [Boolean]
|
175
168
|
def imply_url?
|
176
|
-
properties.none?
|
169
|
+
properties.names.none?('url') &&
|
170
|
+
properties.reject(&:implied?).none?(&:url_property?) &&
|
171
|
+
!nested_items?
|
177
172
|
end
|
178
173
|
|
179
174
|
# @return [Boolean]
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module MicroMicro
|
4
|
+
module Parsers
|
5
|
+
class BaseImpliedPropertyParser < BasePropertyParser
|
6
|
+
private
|
7
|
+
|
8
|
+
# @return [String, nil]
|
9
|
+
def attribute_value
|
10
|
+
candidate_node[self.class::HTML_ELEMENTS_MAP[candidate_node.name]] if candidate_node
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Nokogiri::XML::Element, nil]
|
14
|
+
def candidate_node
|
15
|
+
@candidate_node ||=
|
16
|
+
candidate_nodes.find do |node|
|
17
|
+
self.class::HTML_ELEMENTS_MAP.filter_map do |name, attribute|
|
18
|
+
node if name == node.name && node[attribute]
|
19
|
+
end.any?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# @return [Nokogiri::XML::NodeSet]
|
24
|
+
def candidate_nodes
|
25
|
+
Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class BasePropertyParser
|
@@ -7,25 +9,15 @@ module MicroMicro
|
|
7
9
|
@node = property.node
|
8
10
|
end
|
9
11
|
|
10
|
-
# @see
|
11
|
-
# @see
|
12
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
13
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
12
14
|
#
|
13
15
|
# @return [String]
|
14
16
|
def value
|
15
|
-
@value ||=
|
16
|
-
|
17
|
+
@value ||=
|
18
|
+
Helpers.text_content_from(node) do |context|
|
17
19
|
context.css('img').each { |img| img.content = " #{img['alt'] || img['src']} " }
|
18
20
|
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
# @param node [Nokogiri::XML::Element]
|
23
|
-
# @param attributes_map [Hash{String => Array}]
|
24
|
-
# @return [Array]
|
25
|
-
def self.attribute_value_from(node, attributes_map)
|
26
|
-
attributes_map.map do |attribute, names|
|
27
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
28
|
-
end.compact.first
|
29
21
|
end
|
30
22
|
|
31
23
|
private
|
@@ -1,78 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class DateTimeParser
|
4
|
-
# @see http://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
5
|
-
#
|
6
6
|
# Regexp pattern matching YYYY-MM-DD and YYY-DDD
|
7
|
-
DATE_REGEXP_PATTERN = '(?<year>\d{4})-
|
8
|
-
|
9
|
-
|
10
|
-
# Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
|
11
|
-
TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'.freeze
|
7
|
+
DATE_REGEXP_PATTERN = '(?<year>\d{4})-' \
|
8
|
+
'((?<ordinal>3[0-6]{2}|[0-2]\d{2})|(?<month>0\d|1[0-2])-' \
|
9
|
+
'(?<day>3[0-1]|[0-2]\d))'
|
12
10
|
|
13
|
-
|
11
|
+
# Regexp pattern matching HH:MM and HH:MM:SS
|
12
|
+
TIME_REGEXP_PATTERN = '(?<hours>2[0-3]|[0-1]?\d)' \
|
13
|
+
'(?::(?<minutes>[0-5]\d))?' \
|
14
|
+
'(?::(?<seconds>[0-5]\d))?' \
|
15
|
+
'(?:\s*?(?<abbreviation>[apPP]\.?[mM]\.?))?'
|
14
16
|
|
15
|
-
#
|
17
|
+
# Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
|
18
|
+
TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'
|
19
|
+
|
20
|
+
# Regexp for extracting named captures from a datetime-esque String.
|
21
|
+
DATE_TIME_TIMEZONE_REGEXP = /
|
22
|
+
\A
|
23
|
+
(?=.)
|
24
|
+
(?:#{DATE_REGEXP_PATTERN})?
|
25
|
+
(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?
|
26
|
+
\z
|
27
|
+
/x.freeze
|
28
|
+
|
29
|
+
# Parse a string for date and/or time values according to the Microformats
|
30
|
+
# Value Class Pattern date and time parsing specification.
|
31
|
+
#
|
32
|
+
# @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
33
|
+
#
|
34
|
+
# @param string [String, #to_s]
|
16
35
|
def initialize(string)
|
17
|
-
@string = string
|
36
|
+
@string = string.to_s
|
18
37
|
end
|
19
38
|
|
20
|
-
|
39
|
+
# Define getter and predicate methods for all possible named captures
|
40
|
+
# returned by the DATE_TIME_TIMEZONE_REGEXP regular expression.
|
41
|
+
[
|
42
|
+
:year, :ordinal, :month, :day,
|
43
|
+
:hours, :minutes, :seconds,
|
44
|
+
:abbreviation, :zulu, :offset
|
45
|
+
].each do |name|
|
21
46
|
define_method(name) { values[name] }
|
22
47
|
define_method("#{name}?") { public_send(name).present? }
|
23
48
|
end
|
24
49
|
|
50
|
+
# @return [String, nil]
|
25
51
|
def normalized_calendar_date
|
26
52
|
@normalized_calendar_date ||= "#{year}-#{month}-#{day}" if year? && month? && day?
|
27
53
|
end
|
28
54
|
|
55
|
+
# @return [String, nil]
|
29
56
|
def normalized_date
|
30
57
|
@normalized_date ||= normalized_calendar_date || normalized_ordinal_date
|
31
58
|
end
|
32
59
|
|
60
|
+
# @return [String, nil]
|
33
61
|
def normalized_hours
|
34
|
-
@normalized_hours ||=
|
35
|
-
|
36
|
-
|
62
|
+
@normalized_hours ||=
|
63
|
+
if hours?
|
64
|
+
return (hours.to_i + 12).to_s if abbreviation&.tr('.', '')&.downcase == 'pm'
|
37
65
|
|
38
|
-
|
39
|
-
|
66
|
+
format('%<hours>02d', hours: hours)
|
67
|
+
end
|
40
68
|
end
|
41
69
|
|
70
|
+
# @return [String]
|
42
71
|
def normalized_minutes
|
43
72
|
@normalized_minutes ||= minutes || '00'
|
44
73
|
end
|
45
74
|
|
75
|
+
# @return [String, nil]
|
46
76
|
def normalized_ordinal_date
|
47
77
|
@normalized_ordinal_date ||= "#{year}-#{ordinal}" if year? && ordinal?
|
48
78
|
end
|
49
79
|
|
80
|
+
# @return [String, nil]
|
50
81
|
def normalized_time
|
51
82
|
@normalized_time ||= [normalized_hours, normalized_minutes, seconds].compact.join(':') if normalized_hours
|
52
83
|
end
|
53
84
|
|
85
|
+
# @return [String, nil]
|
54
86
|
def normalized_timezone
|
55
87
|
@normalized_timezone ||= zulu || offset&.tr(':', '')
|
56
88
|
end
|
57
89
|
|
58
|
-
# @return [String]
|
90
|
+
# @return [String, nil]
|
59
91
|
def value
|
60
|
-
@value ||=
|
92
|
+
@value ||=
|
93
|
+
if normalized_date || normalized_time || normalized_timezone
|
94
|
+
"#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
|
95
|
+
end
|
61
96
|
end
|
62
97
|
|
63
98
|
# @return [Hash{Symbol => String, nil}]
|
64
99
|
def values
|
65
|
-
@values ||=
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
string&.match(/^(?:#{DATE_REGEXP_PATTERN})?(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?$/)&.named_captures.to_h.symbolize_keys
|
100
|
+
@values ||=
|
101
|
+
if string.match?(DATE_TIME_TIMEZONE_REGEXP)
|
102
|
+
string.match(DATE_TIME_TIMEZONE_REGEXP).named_captures.symbolize_keys
|
103
|
+
else
|
104
|
+
{}
|
105
|
+
end
|
72
106
|
end
|
73
107
|
|
74
108
|
private
|
75
109
|
|
110
|
+
# @return [String]
|
76
111
|
attr_reader :string
|
77
112
|
end
|
78
113
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class DateTimePropertyParser < BasePropertyParser
|
@@ -7,7 +9,7 @@ module MicroMicro
|
|
7
9
|
'value' => %w[data input]
|
8
10
|
}.freeze
|
9
11
|
|
10
|
-
# @see
|
12
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
|
11
13
|
#
|
12
14
|
# @return [String]
|
13
15
|
def value
|
@@ -16,20 +18,19 @@ module MicroMicro
|
|
16
18
|
|
17
19
|
private
|
18
20
|
|
19
|
-
# @see
|
21
|
+
# @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
|
20
22
|
#
|
21
23
|
# @return [MicroMicro::Parsers::DateTimeParser, nil]
|
22
24
|
def adopted_date_time_parser
|
23
|
-
@adopted_date_time_parser ||=
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
end
|
25
|
+
@adopted_date_time_parser ||=
|
26
|
+
(property.prev_all.reverse + property.next_all).filter_map do |prop|
|
27
|
+
DateTimeParser.new(prop.value) if prop.date_time_property?
|
28
|
+
end.find(&:normalized_date)
|
28
29
|
end
|
29
30
|
|
30
31
|
# @return [String, nil]
|
31
32
|
def attribute_value
|
32
|
-
|
33
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
33
34
|
end
|
34
35
|
|
35
36
|
# @return [MicroMicro::Parsers::DateTimeParser]
|
@@ -37,7 +38,7 @@ module MicroMicro
|
|
37
38
|
@date_time_parser ||= DateTimeParser.new(ValueClassPatternParser.new(node, ' ').value)
|
38
39
|
end
|
39
40
|
|
40
|
-
# @see
|
41
|
+
# @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
|
41
42
|
#
|
42
43
|
# @return [Boolean]
|
43
44
|
def imply_date?
|
@@ -1,16 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class EmbeddedMarkupPropertyParser < BasePropertyParser
|
4
|
-
# @see
|
6
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
5
7
|
#
|
6
8
|
# @return [Hash{Symbol => String}]
|
7
9
|
def value
|
8
|
-
@value ||=
|
10
|
+
@value ||=
|
9
11
|
{
|
10
12
|
html: node.inner_html.strip,
|
11
13
|
value: super
|
12
14
|
}
|
13
|
-
end
|
14
15
|
end
|
15
16
|
end
|
16
17
|
end
|
@@ -1,12 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedNamePropertyParser <
|
4
|
-
|
5
|
-
'
|
6
|
-
'
|
5
|
+
class ImpliedNamePropertyParser < BaseImpliedPropertyParser
|
6
|
+
HTML_ELEMENTS_MAP = {
|
7
|
+
'img' => 'alt',
|
8
|
+
'area' => 'alt',
|
9
|
+
'abbr' => 'title'
|
7
10
|
}.freeze
|
8
11
|
|
9
|
-
# @see
|
12
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
10
13
|
#
|
11
14
|
# @return [String]
|
12
15
|
def value
|
@@ -15,24 +18,19 @@ module MicroMicro
|
|
15
18
|
|
16
19
|
private
|
17
20
|
|
18
|
-
# @return [Nokogiri::XML::NodeSet]
|
19
|
-
def candidate_nodes
|
20
|
-
@candidate_nodes ||= Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
21
|
-
end
|
22
|
-
|
23
21
|
# @return [Array]
|
24
22
|
def child_nodes
|
25
|
-
[
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
def attribute_value
|
30
|
-
candidate_nodes.map { |node| self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP) }.compact.first
|
23
|
+
[
|
24
|
+
node.at_css('> :only-child'),
|
25
|
+
node.at_css('> :only-child > :only-child')
|
26
|
+
].compact.reject { |child_node| Helpers.item_node?(child_node) }
|
31
27
|
end
|
32
28
|
|
33
29
|
# @return [String]
|
34
30
|
def text_content
|
35
|
-
|
31
|
+
Helpers.text_content_from(node) do |context|
|
32
|
+
context.css('img').each { |img| img.content = img['alt'] }
|
33
|
+
end
|
36
34
|
end
|
37
35
|
end
|
38
36
|
end
|
@@ -1,64 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedPhotoPropertyParser <
|
5
|
+
class ImpliedPhotoPropertyParser < BaseImpliedPropertyParser
|
6
|
+
CSS_SELECTORS_ARRAY = ['> img[src]:only-of-type', '> object[data]:only-of-type'].freeze
|
7
|
+
|
4
8
|
HTML_ELEMENTS_MAP = {
|
5
9
|
'img' => 'src',
|
6
10
|
'object' => 'data'
|
7
11
|
}.freeze
|
8
12
|
|
9
|
-
# @see
|
10
|
-
# @see
|
13
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
14
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
|
11
15
|
#
|
12
16
|
# @return [String, Hash{Symbol => String}, nil]
|
13
17
|
def value
|
14
|
-
@value ||=
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
18
|
+
@value ||=
|
19
|
+
if attribute_value
|
20
|
+
return attribute_value unless candidate_node.matches?('img[alt]')
|
21
|
+
|
22
|
+
{
|
23
|
+
value: attribute_value,
|
24
|
+
alt: candidate_node['alt'].strip
|
25
|
+
}
|
26
|
+
end
|
23
27
|
end
|
24
28
|
|
25
29
|
private
|
26
30
|
|
27
|
-
# @return [Array
|
28
|
-
def
|
29
|
-
|
30
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
31
|
-
node if node.matches?("#{element}[#{attribute}]")
|
32
|
-
end.compact
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
# @return [String, nil]
|
37
|
-
def resolved_value
|
38
|
-
@resolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
|
39
|
-
end
|
31
|
+
# @return [Array]
|
32
|
+
def child_nodes
|
33
|
+
nodes = [node.at_css(*CSS_SELECTORS_ARRAY)]
|
40
34
|
|
41
|
-
|
42
|
-
def value_node
|
43
|
-
@value_node ||= begin
|
44
|
-
return attribute_values.first if attribute_values.any?
|
45
|
-
|
46
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
47
|
-
child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
|
48
|
-
|
49
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
50
|
-
end
|
51
|
-
|
52
|
-
if node.element_children.one? && !Item.item_node?(node.first_element_child)
|
53
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
54
|
-
child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
|
55
|
-
|
56
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
57
|
-
end
|
58
|
-
end
|
35
|
+
nodes << node.first_element_child.at_css(*CSS_SELECTORS_ARRAY) if node.element_children.one?
|
59
36
|
|
60
|
-
|
61
|
-
end
|
37
|
+
nodes.compact.reject { |child_node| Helpers.item_node?(child_node) }
|
62
38
|
end
|
63
39
|
end
|
64
40
|
end
|