micromicro 1.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -0
- data/CONTRIBUTING.md +2 -2
- data/README.md +21 -20
- data/lib/micro_micro/collectible.rb +2 -0
- data/lib/micro_micro/collections/base_collection.rb +7 -1
- data/lib/micro_micro/collections/items_collection.rb +3 -1
- data/lib/micro_micro/collections/properties_collection.rb +12 -0
- data/lib/micro_micro/collections/relationships_collection.rb +11 -10
- data/lib/micro_micro/document.rb +11 -99
- data/lib/micro_micro/helpers.rb +88 -0
- data/lib/micro_micro/implied_property.rb +2 -0
- data/lib/micro_micro/item.rb +57 -62
- data/lib/micro_micro/parsers/base_implied_property_parser.rb +29 -0
- data/lib/micro_micro/parsers/base_property_parser.rb +6 -14
- data/lib/micro_micro/parsers/date_time_parser.rb +60 -25
- data/lib/micro_micro/parsers/date_time_property_parser.rb +10 -9
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +4 -3
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +15 -17
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +21 -45
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +12 -31
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +4 -2
- data/lib/micro_micro/parsers/url_property_parser.rb +22 -14
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +29 -44
- data/lib/micro_micro/property.rb +68 -56
- data/lib/micro_micro/relationship.rb +15 -13
- data/lib/micro_micro/version.rb +3 -1
- data/lib/micromicro.rb +31 -26
- data/micromicro.gemspec +14 -9
- metadata +23 -32
- data/.editorconfig +0 -14
- data/.gitignore +0 -34
- data/.gitmodules +0 -3
- data/.reek.yml +0 -8
- data/.rspec +0 -2
- data/.rubocop +0 -3
- data/.rubocop.yml +0 -25
- data/.ruby-version +0 -1
- data/.simplecov +0 -13
- data/.travis.yml +0 -19
- data/Gemfile +0 -14
- data/Rakefile +0 -18
data/lib/micro_micro/item.rb
CHANGED
@@ -1,7 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
class Item
|
3
5
|
include Collectible
|
4
6
|
|
7
|
+
# Extract items from a context.
|
8
|
+
#
|
9
|
+
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
10
|
+
# @return [Array<MicroMicro::Item>]
|
11
|
+
def self.from_context(context)
|
12
|
+
node_set_from(context).map { |node| new(node) }
|
13
|
+
end
|
14
|
+
|
15
|
+
# Extract item nodes from a context.
|
16
|
+
#
|
17
|
+
# @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
18
|
+
# @param node_set [Nokogiri::XML::NodeSet]
|
19
|
+
# @return [Nokogiri::XML::NodeSet]
|
20
|
+
# rubocop:disable Metrics
|
21
|
+
def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
22
|
+
context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
23
|
+
|
24
|
+
if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
|
25
|
+
if Helpers.item_node?(context)
|
26
|
+
node_set << context unless Helpers.item_nodes?(context.ancestors) && Helpers.property_node?(context)
|
27
|
+
else
|
28
|
+
node_set_from(context.element_children, node_set)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
node_set
|
33
|
+
end
|
34
|
+
# rubocop:enable Metrics
|
35
|
+
|
5
36
|
# Parse a node for microformats2-encoded data.
|
6
37
|
#
|
7
38
|
# @param node [Nokogiri::XML::Element]
|
@@ -15,11 +46,11 @@ module MicroMicro
|
|
15
46
|
|
16
47
|
# A collection of child items parsed from the node.
|
17
48
|
#
|
18
|
-
# @see
|
49
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
|
19
50
|
#
|
20
51
|
# @return [MicroMicro::Collections::ItemsCollection]
|
21
52
|
def children
|
22
|
-
@children ||= Collections::ItemsCollection.new(
|
53
|
+
@children ||= Collections::ItemsCollection.new(self.class.from_context(node.element_children))
|
23
54
|
end
|
24
55
|
|
25
56
|
# The value of the node's `id` attribute, if present.
|
@@ -29,28 +60,33 @@ module MicroMicro
|
|
29
60
|
@id ||= node['id']&.strip
|
30
61
|
end
|
31
62
|
|
63
|
+
# :nocov:
|
32
64
|
# @return [String]
|
33
65
|
def inspect
|
34
|
-
|
66
|
+
"#<#{self.class}:#{format('%#0x', object_id)} " \
|
67
|
+
"types: #{types.inspect}, " \
|
68
|
+
"properties: #{properties.count}, " \
|
69
|
+
"children: #{children.count}>"
|
35
70
|
end
|
71
|
+
# :nocov:
|
36
72
|
|
37
73
|
# A collection of plain text properties parsed from the node.
|
38
74
|
#
|
39
75
|
# @return [MicroMicro::Collections::PropertiesCollection]
|
40
76
|
def plain_text_properties
|
41
|
-
@plain_text_properties ||=
|
77
|
+
@plain_text_properties ||= properties.plain_text_properties
|
42
78
|
end
|
43
79
|
|
44
80
|
# A collection of properties parsed from the node.
|
45
81
|
#
|
46
82
|
# @return [MicroMicro::Collections::PropertiesCollection]
|
47
83
|
def properties
|
48
|
-
@properties ||= Collections::PropertiesCollection.new(Property.
|
84
|
+
@properties ||= Collections::PropertiesCollection.new(Property.from_context(node.element_children))
|
49
85
|
end
|
50
86
|
|
51
87
|
# Return the parsed item as a Hash.
|
52
88
|
#
|
53
|
-
# @see
|
89
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
|
54
90
|
#
|
55
91
|
# @return [Hash]
|
56
92
|
def to_h
|
@@ -69,62 +105,14 @@ module MicroMicro
|
|
69
105
|
#
|
70
106
|
# @return [Array<String>]
|
71
107
|
def types
|
72
|
-
@types ||=
|
108
|
+
@types ||= Helpers.root_class_names_from(node)
|
73
109
|
end
|
74
110
|
|
75
111
|
# A collection of url properties parsed from the node.
|
76
112
|
#
|
77
113
|
# @return [MicroMicro::Collections::PropertiesCollection]
|
78
114
|
def url_properties
|
79
|
-
@url_properties ||=
|
80
|
-
end
|
81
|
-
|
82
|
-
# Does this node's `class` attribute contain root class names?
|
83
|
-
#
|
84
|
-
# @param node [Nokogiri::XML::Element]
|
85
|
-
# @return [Boolean]
|
86
|
-
def self.item_node?(node)
|
87
|
-
types_from(node).any?
|
88
|
-
end
|
89
|
-
|
90
|
-
# Extract items from a context.
|
91
|
-
#
|
92
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
93
|
-
# @return [Array<MicroMicro::Item>]
|
94
|
-
def self.items_from(context)
|
95
|
-
nodes_from(context).map { |node| new(node) }
|
96
|
-
end
|
97
|
-
|
98
|
-
# Extract item nodes from a context.
|
99
|
-
#
|
100
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
101
|
-
# @param node_set [Nokogiri::XML::NodeSet]
|
102
|
-
# @return [Nokogiri::XML::NodeSet]
|
103
|
-
def self.nodes_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
104
|
-
return nodes_from(context.element_children, node_set) if context.is_a?(Nokogiri::HTML::Document)
|
105
|
-
|
106
|
-
context.each { |node| nodes_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
107
|
-
|
108
|
-
if context.is_a?(Nokogiri::XML::Element) && !Document.ignore_node?(context)
|
109
|
-
if item_node?(context)
|
110
|
-
node_set << context unless Property.property_node?(context)
|
111
|
-
else
|
112
|
-
nodes_from(context.element_children, node_set)
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
node_set
|
117
|
-
end
|
118
|
-
|
119
|
-
# Extract root class names from a node.
|
120
|
-
#
|
121
|
-
# node = Nokogiri::HTML('<div class="h-card">Jason Garber</div>').at_css('div')
|
122
|
-
# MicroMicro::Item.types_from(node) #=> ['h-card']
|
123
|
-
#
|
124
|
-
# @param node [Nokogiri::XML::Element]
|
125
|
-
# @return [Array<String>]
|
126
|
-
def self.types_from(node)
|
127
|
-
node.classes.select { |token| token.match?(/^h(?:-[0-9a-z]+)?(?:-[a-z]+)+$/) }.uniq.sort
|
115
|
+
@url_properties ||= properties.url_properties
|
128
116
|
end
|
129
117
|
|
130
118
|
private
|
@@ -133,7 +121,7 @@ module MicroMicro
|
|
133
121
|
|
134
122
|
# @return [MicroMicro::ImpliedProperty]
|
135
123
|
def implied_name
|
136
|
-
@implied_name ||= ImpliedProperty.new(node,
|
124
|
+
@implied_name ||= ImpliedProperty.new(node, 'p-name')
|
137
125
|
end
|
138
126
|
|
139
127
|
# @return [Boolean]
|
@@ -143,7 +131,7 @@ module MicroMicro
|
|
143
131
|
|
144
132
|
# @return [MicroMicro::ImpliedProperty]
|
145
133
|
def implied_photo
|
146
|
-
@implied_photo ||= ImpliedProperty.new(node,
|
134
|
+
@implied_photo ||= ImpliedProperty.new(node, 'u-photo')
|
147
135
|
end
|
148
136
|
|
149
137
|
# @return [Boolean]
|
@@ -153,7 +141,7 @@ module MicroMicro
|
|
153
141
|
|
154
142
|
# @return [MicroMicro::ImpliedProperty]
|
155
143
|
def implied_url
|
156
|
-
@implied_url ||= ImpliedProperty.new(node,
|
144
|
+
@implied_url ||= ImpliedProperty.new(node, 'u-url')
|
157
145
|
end
|
158
146
|
|
159
147
|
# @return [Boolean]
|
@@ -163,17 +151,24 @@ module MicroMicro
|
|
163
151
|
|
164
152
|
# @return [Boolean]
|
165
153
|
def imply_name?
|
166
|
-
properties.none?
|
154
|
+
properties.names.none?('name') &&
|
155
|
+
properties.none?(&:embedded_markup_property?) &&
|
156
|
+
properties.none?(&:plain_text_property?) &&
|
157
|
+
!nested_items?
|
167
158
|
end
|
168
159
|
|
169
160
|
# @return [Boolean]
|
170
161
|
def imply_photo?
|
171
|
-
properties.none?
|
162
|
+
properties.names.none?('photo') &&
|
163
|
+
properties.reject(&:implied?).none?(&:url_property?) &&
|
164
|
+
!nested_items?
|
172
165
|
end
|
173
166
|
|
174
167
|
# @return [Boolean]
|
175
168
|
def imply_url?
|
176
|
-
properties.none?
|
169
|
+
properties.names.none?('url') &&
|
170
|
+
properties.reject(&:implied?).none?(&:url_property?) &&
|
171
|
+
!nested_items?
|
177
172
|
end
|
178
173
|
|
179
174
|
# @return [Boolean]
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module MicroMicro
|
4
|
+
module Parsers
|
5
|
+
class BaseImpliedPropertyParser < BasePropertyParser
|
6
|
+
private
|
7
|
+
|
8
|
+
# @return [String, nil]
|
9
|
+
def attribute_value
|
10
|
+
candidate_node[self.class::HTML_ELEMENTS_MAP[candidate_node.name]] if candidate_node
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Nokogiri::XML::Element, nil]
|
14
|
+
def candidate_node
|
15
|
+
@candidate_node ||=
|
16
|
+
candidate_nodes.find do |node|
|
17
|
+
self.class::HTML_ELEMENTS_MAP.filter_map do |name, attribute|
|
18
|
+
node if name == node.name && node[attribute]
|
19
|
+
end.any?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# @return [Nokogiri::XML::NodeSet]
|
24
|
+
def candidate_nodes
|
25
|
+
Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class BasePropertyParser
|
@@ -7,25 +9,15 @@ module MicroMicro
|
|
7
9
|
@node = property.node
|
8
10
|
end
|
9
11
|
|
10
|
-
# @see
|
11
|
-
# @see
|
12
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
13
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
12
14
|
#
|
13
15
|
# @return [String]
|
14
16
|
def value
|
15
|
-
@value ||=
|
16
|
-
|
17
|
+
@value ||=
|
18
|
+
Helpers.text_content_from(node) do |context|
|
17
19
|
context.css('img').each { |img| img.content = " #{img['alt'] || img['src']} " }
|
18
20
|
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
# @param node [Nokogiri::XML::Element]
|
23
|
-
# @param attributes_map [Hash{String => Array}]
|
24
|
-
# @return [Array]
|
25
|
-
def self.attribute_value_from(node, attributes_map)
|
26
|
-
attributes_map.map do |attribute, names|
|
27
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
28
|
-
end.compact.first
|
29
21
|
end
|
30
22
|
|
31
23
|
private
|
@@ -1,78 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class DateTimeParser
|
4
|
-
# @see http://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
5
|
-
#
|
6
6
|
# Regexp pattern matching YYYY-MM-DD and YYY-DDD
|
7
|
-
DATE_REGEXP_PATTERN = '(?<year>\d{4})-
|
8
|
-
|
9
|
-
|
10
|
-
# Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
|
11
|
-
TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'.freeze
|
7
|
+
DATE_REGEXP_PATTERN = '(?<year>\d{4})-' \
|
8
|
+
'((?<ordinal>3[0-6]{2}|[0-2]\d{2})|(?<month>0\d|1[0-2])-' \
|
9
|
+
'(?<day>3[0-1]|[0-2]\d))'
|
12
10
|
|
13
|
-
|
11
|
+
# Regexp pattern matching HH:MM and HH:MM:SS
|
12
|
+
TIME_REGEXP_PATTERN = '(?<hours>2[0-3]|[0-1]?\d)' \
|
13
|
+
'(?::(?<minutes>[0-5]\d))?' \
|
14
|
+
'(?::(?<seconds>[0-5]\d))?' \
|
15
|
+
'(?:\s*?(?<abbreviation>[apPP]\.?[mM]\.?))?'
|
14
16
|
|
15
|
-
#
|
17
|
+
# Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
|
18
|
+
TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'
|
19
|
+
|
20
|
+
# Regexp for extracting named captures from a datetime-esque String.
|
21
|
+
DATE_TIME_TIMEZONE_REGEXP = /
|
22
|
+
\A
|
23
|
+
(?=.)
|
24
|
+
(?:#{DATE_REGEXP_PATTERN})?
|
25
|
+
(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?
|
26
|
+
\z
|
27
|
+
/x.freeze
|
28
|
+
|
29
|
+
# Parse a string for date and/or time values according to the Microformats
|
30
|
+
# Value Class Pattern date and time parsing specification.
|
31
|
+
#
|
32
|
+
# @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
33
|
+
#
|
34
|
+
# @param string [String, #to_s]
|
16
35
|
def initialize(string)
|
17
|
-
@string = string
|
36
|
+
@string = string.to_s
|
18
37
|
end
|
19
38
|
|
20
|
-
|
39
|
+
# Define getter and predicate methods for all possible named captures
|
40
|
+
# returned by the DATE_TIME_TIMEZONE_REGEXP regular expression.
|
41
|
+
[
|
42
|
+
:year, :ordinal, :month, :day,
|
43
|
+
:hours, :minutes, :seconds,
|
44
|
+
:abbreviation, :zulu, :offset
|
45
|
+
].each do |name|
|
21
46
|
define_method(name) { values[name] }
|
22
47
|
define_method("#{name}?") { public_send(name).present? }
|
23
48
|
end
|
24
49
|
|
50
|
+
# @return [String, nil]
|
25
51
|
def normalized_calendar_date
|
26
52
|
@normalized_calendar_date ||= "#{year}-#{month}-#{day}" if year? && month? && day?
|
27
53
|
end
|
28
54
|
|
55
|
+
# @return [String, nil]
|
29
56
|
def normalized_date
|
30
57
|
@normalized_date ||= normalized_calendar_date || normalized_ordinal_date
|
31
58
|
end
|
32
59
|
|
60
|
+
# @return [String, nil]
|
33
61
|
def normalized_hours
|
34
|
-
@normalized_hours ||=
|
35
|
-
|
36
|
-
|
62
|
+
@normalized_hours ||=
|
63
|
+
if hours?
|
64
|
+
return (hours.to_i + 12).to_s if abbreviation&.tr('.', '')&.downcase == 'pm'
|
37
65
|
|
38
|
-
|
39
|
-
|
66
|
+
format('%<hours>02d', hours: hours)
|
67
|
+
end
|
40
68
|
end
|
41
69
|
|
70
|
+
# @return [String]
|
42
71
|
def normalized_minutes
|
43
72
|
@normalized_minutes ||= minutes || '00'
|
44
73
|
end
|
45
74
|
|
75
|
+
# @return [String, nil]
|
46
76
|
def normalized_ordinal_date
|
47
77
|
@normalized_ordinal_date ||= "#{year}-#{ordinal}" if year? && ordinal?
|
48
78
|
end
|
49
79
|
|
80
|
+
# @return [String, nil]
|
50
81
|
def normalized_time
|
51
82
|
@normalized_time ||= [normalized_hours, normalized_minutes, seconds].compact.join(':') if normalized_hours
|
52
83
|
end
|
53
84
|
|
85
|
+
# @return [String, nil]
|
54
86
|
def normalized_timezone
|
55
87
|
@normalized_timezone ||= zulu || offset&.tr(':', '')
|
56
88
|
end
|
57
89
|
|
58
|
-
# @return [String]
|
90
|
+
# @return [String, nil]
|
59
91
|
def value
|
60
|
-
@value ||=
|
92
|
+
@value ||=
|
93
|
+
if normalized_date || normalized_time || normalized_timezone
|
94
|
+
"#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
|
95
|
+
end
|
61
96
|
end
|
62
97
|
|
63
98
|
# @return [Hash{Symbol => String, nil}]
|
64
99
|
def values
|
65
|
-
@values ||=
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
string&.match(/^(?:#{DATE_REGEXP_PATTERN})?(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?$/)&.named_captures.to_h.symbolize_keys
|
100
|
+
@values ||=
|
101
|
+
if string.match?(DATE_TIME_TIMEZONE_REGEXP)
|
102
|
+
string.match(DATE_TIME_TIMEZONE_REGEXP).named_captures.symbolize_keys
|
103
|
+
else
|
104
|
+
{}
|
105
|
+
end
|
72
106
|
end
|
73
107
|
|
74
108
|
private
|
75
109
|
|
110
|
+
# @return [String]
|
76
111
|
attr_reader :string
|
77
112
|
end
|
78
113
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class DateTimePropertyParser < BasePropertyParser
|
@@ -7,7 +9,7 @@ module MicroMicro
|
|
7
9
|
'value' => %w[data input]
|
8
10
|
}.freeze
|
9
11
|
|
10
|
-
# @see
|
12
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
|
11
13
|
#
|
12
14
|
# @return [String]
|
13
15
|
def value
|
@@ -16,20 +18,19 @@ module MicroMicro
|
|
16
18
|
|
17
19
|
private
|
18
20
|
|
19
|
-
# @see
|
21
|
+
# @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
|
20
22
|
#
|
21
23
|
# @return [MicroMicro::Parsers::DateTimeParser, nil]
|
22
24
|
def adopted_date_time_parser
|
23
|
-
@adopted_date_time_parser ||=
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
end
|
25
|
+
@adopted_date_time_parser ||=
|
26
|
+
(property.prev_all.reverse + property.next_all).filter_map do |prop|
|
27
|
+
DateTimeParser.new(prop.value) if prop.date_time_property?
|
28
|
+
end.find(&:normalized_date)
|
28
29
|
end
|
29
30
|
|
30
31
|
# @return [String, nil]
|
31
32
|
def attribute_value
|
32
|
-
|
33
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
33
34
|
end
|
34
35
|
|
35
36
|
# @return [MicroMicro::Parsers::DateTimeParser]
|
@@ -37,7 +38,7 @@ module MicroMicro
|
|
37
38
|
@date_time_parser ||= DateTimeParser.new(ValueClassPatternParser.new(node, ' ').value)
|
38
39
|
end
|
39
40
|
|
40
|
-
# @see
|
41
|
+
# @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
|
41
42
|
#
|
42
43
|
# @return [Boolean]
|
43
44
|
def imply_date?
|
@@ -1,16 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class EmbeddedMarkupPropertyParser < BasePropertyParser
|
4
|
-
# @see
|
6
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
5
7
|
#
|
6
8
|
# @return [Hash{Symbol => String}]
|
7
9
|
def value
|
8
|
-
@value ||=
|
10
|
+
@value ||=
|
9
11
|
{
|
10
12
|
html: node.inner_html.strip,
|
11
13
|
value: super
|
12
14
|
}
|
13
|
-
end
|
14
15
|
end
|
15
16
|
end
|
16
17
|
end
|
@@ -1,12 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedNamePropertyParser <
|
4
|
-
|
5
|
-
'
|
6
|
-
'
|
5
|
+
class ImpliedNamePropertyParser < BaseImpliedPropertyParser
|
6
|
+
HTML_ELEMENTS_MAP = {
|
7
|
+
'img' => 'alt',
|
8
|
+
'area' => 'alt',
|
9
|
+
'abbr' => 'title'
|
7
10
|
}.freeze
|
8
11
|
|
9
|
-
# @see
|
12
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
10
13
|
#
|
11
14
|
# @return [String]
|
12
15
|
def value
|
@@ -15,24 +18,19 @@ module MicroMicro
|
|
15
18
|
|
16
19
|
private
|
17
20
|
|
18
|
-
# @return [Nokogiri::XML::NodeSet]
|
19
|
-
def candidate_nodes
|
20
|
-
@candidate_nodes ||= Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
21
|
-
end
|
22
|
-
|
23
21
|
# @return [Array]
|
24
22
|
def child_nodes
|
25
|
-
[
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
def attribute_value
|
30
|
-
candidate_nodes.map { |node| self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP) }.compact.first
|
23
|
+
[
|
24
|
+
node.at_css('> :only-child'),
|
25
|
+
node.at_css('> :only-child > :only-child')
|
26
|
+
].compact.reject { |child_node| Helpers.item_node?(child_node) }
|
31
27
|
end
|
32
28
|
|
33
29
|
# @return [String]
|
34
30
|
def text_content
|
35
|
-
|
31
|
+
Helpers.text_content_from(node) do |context|
|
32
|
+
context.css('img').each { |img| img.content = img['alt'] }
|
33
|
+
end
|
36
34
|
end
|
37
35
|
end
|
38
36
|
end
|
@@ -1,64 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedPhotoPropertyParser <
|
5
|
+
class ImpliedPhotoPropertyParser < BaseImpliedPropertyParser
|
6
|
+
CSS_SELECTORS_ARRAY = ['> img[src]:only-of-type', '> object[data]:only-of-type'].freeze
|
7
|
+
|
4
8
|
HTML_ELEMENTS_MAP = {
|
5
9
|
'img' => 'src',
|
6
10
|
'object' => 'data'
|
7
11
|
}.freeze
|
8
12
|
|
9
|
-
# @see
|
10
|
-
# @see
|
13
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
14
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
|
11
15
|
#
|
12
16
|
# @return [String, Hash{Symbol => String}, nil]
|
13
17
|
def value
|
14
|
-
@value ||=
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
18
|
+
@value ||=
|
19
|
+
if attribute_value
|
20
|
+
return attribute_value unless candidate_node.matches?('img[alt]')
|
21
|
+
|
22
|
+
{
|
23
|
+
value: attribute_value,
|
24
|
+
alt: candidate_node['alt'].strip
|
25
|
+
}
|
26
|
+
end
|
23
27
|
end
|
24
28
|
|
25
29
|
private
|
26
30
|
|
27
|
-
# @return [Array
|
28
|
-
def
|
29
|
-
|
30
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
31
|
-
node if node.matches?("#{element}[#{attribute}]")
|
32
|
-
end.compact
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
# @return [String, nil]
|
37
|
-
def resolved_value
|
38
|
-
@resolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
|
39
|
-
end
|
31
|
+
# @return [Array]
|
32
|
+
def child_nodes
|
33
|
+
nodes = [node.at_css(*CSS_SELECTORS_ARRAY)]
|
40
34
|
|
41
|
-
|
42
|
-
def value_node
|
43
|
-
@value_node ||= begin
|
44
|
-
return attribute_values.first if attribute_values.any?
|
45
|
-
|
46
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
47
|
-
child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
|
48
|
-
|
49
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
50
|
-
end
|
51
|
-
|
52
|
-
if node.element_children.one? && !Item.item_node?(node.first_element_child)
|
53
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
54
|
-
child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
|
55
|
-
|
56
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
57
|
-
end
|
58
|
-
end
|
35
|
+
nodes << node.first_element_child.at_css(*CSS_SELECTORS_ARRAY) if node.element_children.one?
|
59
36
|
|
60
|
-
|
61
|
-
end
|
37
|
+
nodes.compact.reject { |child_node| Helpers.item_node?(child_node) }
|
62
38
|
end
|
63
39
|
end
|
64
40
|
end
|