micromicro 0.1.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -0
- data/CONTRIBUTING.md +2 -2
- data/README.md +65 -29
- data/lib/micro_micro/collectible.rb +15 -0
- data/lib/micro_micro/collections/base_collection.rb +18 -13
- data/lib/micro_micro/collections/items_collection.rb +7 -0
- data/lib/micro_micro/collections/properties_collection.rb +20 -6
- data/lib/micro_micro/collections/relationships_collection.rb +33 -0
- data/lib/micro_micro/document.rb +36 -44
- data/lib/micro_micro/helpers.rb +82 -0
- data/lib/micro_micro/implied_property.rb +2 -0
- data/lib/micro_micro/item.rb +78 -52
- data/lib/micro_micro/parsers/base_implied_property_parser.rb +29 -0
- data/lib/micro_micro/parsers/base_property_parser.rb +9 -14
- data/lib/micro_micro/parsers/date_time_parser.rb +60 -31
- data/lib/micro_micro/parsers/date_time_property_parser.rb +20 -29
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +7 -17
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +17 -58
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +23 -51
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +13 -42
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +11 -18
- data/lib/micro_micro/parsers/url_property_parser.rb +29 -37
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +29 -55
- data/lib/micro_micro/property.rb +73 -68
- data/lib/micro_micro/{relation.rb → relationship.rb} +19 -16
- data/lib/micro_micro/version.rb +3 -1
- data/lib/micromicro.rb +37 -22
- data/micromicro.gemspec +14 -9
- metadata +23 -31
- data/.editorconfig +0 -14
- data/.gitignore +0 -34
- data/.gitmodules +0 -3
- data/.reek.yml +0 -8
- data/.rspec +0 -2
- data/.rubocop +0 -3
- data/.rubocop.yml +0 -25
- data/.ruby-version +0 -1
- data/.simplecov +0 -11
- data/.travis.yml +0 -19
- data/Gemfile +0 -14
- data/Rakefile +0 -18
- data/lib/micro_micro/collections/relations_collection.rb +0 -23
data/lib/micro_micro/item.rb
CHANGED
@@ -1,7 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
class Item
|
3
|
-
|
5
|
+
include Collectible
|
6
|
+
|
7
|
+
# Extract items from a context.
|
8
|
+
#
|
9
|
+
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
10
|
+
# @return [Array<MicroMicro::Item>]
|
11
|
+
def self.from_context(context)
|
12
|
+
node_set_from(context).map { |node| new(node) }
|
13
|
+
end
|
14
|
+
|
15
|
+
# Extract item nodes from a context.
|
16
|
+
#
|
17
|
+
# @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
18
|
+
# @param node_set [Nokogiri::XML::NodeSet]
|
19
|
+
# @return [Nokogiri::XML::NodeSet]
|
20
|
+
def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
21
|
+
context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
22
|
+
|
23
|
+
if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
|
24
|
+
if Helpers.item_node?(context)
|
25
|
+
node_set << context unless Helpers.property_node?(context)
|
26
|
+
else
|
27
|
+
node_set_from(context.element_children, node_set)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
node_set
|
32
|
+
end
|
4
33
|
|
34
|
+
# Parse a node for microformats2-encoded data.
|
35
|
+
#
|
5
36
|
# @param node [Nokogiri::XML::Element]
|
6
37
|
def initialize(node)
|
7
38
|
@node = node
|
@@ -11,28 +42,49 @@ module MicroMicro
|
|
11
42
|
properties << implied_url if implied_url?
|
12
43
|
end
|
13
44
|
|
45
|
+
# A collection of child items parsed from the node.
|
46
|
+
#
|
47
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
|
48
|
+
#
|
14
49
|
# @return [MicroMicro::Collections::ItemsCollection]
|
15
50
|
def children
|
16
|
-
@children ||= Collections::ItemsCollection.new(
|
51
|
+
@children ||= Collections::ItemsCollection.new(self.class.from_context(node.element_children))
|
17
52
|
end
|
18
53
|
|
54
|
+
# The value of the node's `id` attribute, if present.
|
55
|
+
#
|
19
56
|
# @return [String, nil]
|
20
57
|
def id
|
21
58
|
@id ||= node['id']&.strip
|
22
59
|
end
|
23
60
|
|
61
|
+
# :nocov:
|
24
62
|
# @return [String]
|
25
63
|
def inspect
|
26
|
-
|
64
|
+
"#<#{self.class}:#{format('%#0x', object_id)} " \
|
65
|
+
"types: #{types.inspect}, " \
|
66
|
+
"properties: #{properties.count}, " \
|
67
|
+
"children: #{children.count}>"
|
68
|
+
end
|
69
|
+
# :nocov:
|
70
|
+
|
71
|
+
# A collection of plain text properties parsed from the node.
|
72
|
+
#
|
73
|
+
# @return [MicroMicro::Collections::PropertiesCollection]
|
74
|
+
def plain_text_properties
|
75
|
+
@plain_text_properties ||= properties.plain_text_properties
|
27
76
|
end
|
28
77
|
|
78
|
+
# A collection of properties parsed from the node.
|
79
|
+
#
|
29
80
|
# @return [MicroMicro::Collections::PropertiesCollection]
|
30
81
|
def properties
|
31
|
-
@properties ||= Collections::PropertiesCollection.new(Property.
|
82
|
+
@properties ||= Collections::PropertiesCollection.new(Property.from_context(node.element_children))
|
32
83
|
end
|
33
84
|
|
34
|
-
#
|
35
|
-
#
|
85
|
+
# Return the parsed item as a Hash.
|
86
|
+
#
|
87
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
|
36
88
|
#
|
37
89
|
# @return [Hash]
|
38
90
|
def to_h
|
@@ -43,55 +95,22 @@ module MicroMicro
|
|
43
95
|
|
44
96
|
hash[:id] = id if id.present?
|
45
97
|
hash[:children] = children.to_a if children.any?
|
46
|
-
hash[:value] = value if value.present?
|
47
98
|
|
48
99
|
hash
|
49
100
|
end
|
50
101
|
|
102
|
+
# An array of root class names parsed from the node's `class` attribute.
|
103
|
+
#
|
51
104
|
# @return [Array<String>]
|
52
105
|
def types
|
53
|
-
@types ||=
|
106
|
+
@types ||= Helpers.root_class_names_from(node)
|
54
107
|
end
|
55
108
|
|
56
|
-
#
|
57
|
-
# @return [Boolean]
|
58
|
-
def self.item_node?(node)
|
59
|
-
types_from(node).any?
|
60
|
-
end
|
61
|
-
|
62
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
63
|
-
# @return [Array<MicroMicro::Item>]
|
64
|
-
def self.items_from(context)
|
65
|
-
nodes_from(context).map { |node| new(node) }
|
66
|
-
end
|
67
|
-
|
68
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
69
|
-
# @param node_set [Nokogiri::XML::NodeSet]
|
70
|
-
# @return [Nokogiri::XML::NodeSet]
|
71
|
-
def self.nodes_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
72
|
-
return nodes_from(context.element_children, node_set) if context.is_a?(Nokogiri::HTML::Document)
|
73
|
-
|
74
|
-
context.each { |node| nodes_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
75
|
-
|
76
|
-
if context.is_a?(Nokogiri::XML::Element) && !Document.ignore_node?(context)
|
77
|
-
if item_node?(context)
|
78
|
-
node_set << context unless Property.property_node?(context)
|
79
|
-
else
|
80
|
-
nodes_from(context.element_children, node_set)
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
node_set
|
85
|
-
end
|
86
|
-
|
87
|
-
# @param node [Nokogiri::XML::Element]
|
88
|
-
# @return [Array<String>]
|
109
|
+
# A collection of url properties parsed from the node.
|
89
110
|
#
|
90
|
-
# @
|
91
|
-
|
92
|
-
|
93
|
-
def self.types_from(node)
|
94
|
-
node.classes.select { |token| token.match?(/^h(?:\-[0-9a-z]+)?(?:\-[a-z]+)+$/) }.uniq.sort
|
111
|
+
# @return [MicroMicro::Collections::PropertiesCollection]
|
112
|
+
def url_properties
|
113
|
+
@url_properties ||= properties.url_properties
|
95
114
|
end
|
96
115
|
|
97
116
|
private
|
@@ -100,7 +119,7 @@ module MicroMicro
|
|
100
119
|
|
101
120
|
# @return [MicroMicro::ImpliedProperty]
|
102
121
|
def implied_name
|
103
|
-
@implied_name ||= ImpliedProperty.new(node,
|
122
|
+
@implied_name ||= ImpliedProperty.new(node, 'p-name')
|
104
123
|
end
|
105
124
|
|
106
125
|
# @return [Boolean]
|
@@ -110,7 +129,7 @@ module MicroMicro
|
|
110
129
|
|
111
130
|
# @return [MicroMicro::ImpliedProperty]
|
112
131
|
def implied_photo
|
113
|
-
@implied_photo ||= ImpliedProperty.new(node,
|
132
|
+
@implied_photo ||= ImpliedProperty.new(node, 'u-photo')
|
114
133
|
end
|
115
134
|
|
116
135
|
# @return [Boolean]
|
@@ -120,7 +139,7 @@ module MicroMicro
|
|
120
139
|
|
121
140
|
# @return [MicroMicro::ImpliedProperty]
|
122
141
|
def implied_url
|
123
|
-
@implied_url ||= ImpliedProperty.new(node,
|
142
|
+
@implied_url ||= ImpliedProperty.new(node, 'u-url')
|
124
143
|
end
|
125
144
|
|
126
145
|
# @return [Boolean]
|
@@ -130,17 +149,24 @@ module MicroMicro
|
|
130
149
|
|
131
150
|
# @return [Boolean]
|
132
151
|
def imply_name?
|
133
|
-
properties.none?
|
152
|
+
properties.names.none?('name') &&
|
153
|
+
properties.none?(&:embedded_markup_property?) &&
|
154
|
+
properties.none?(&:plain_text_property?) &&
|
155
|
+
!nested_items?
|
134
156
|
end
|
135
157
|
|
136
158
|
# @return [Boolean]
|
137
159
|
def imply_photo?
|
138
|
-
properties.none?
|
160
|
+
properties.names.none?('photo') &&
|
161
|
+
properties.reject(&:implied?).none?(&:url_property?) &&
|
162
|
+
!nested_items?
|
139
163
|
end
|
140
164
|
|
141
165
|
# @return [Boolean]
|
142
166
|
def imply_url?
|
143
|
-
properties.none?
|
167
|
+
properties.names.none?('url') &&
|
168
|
+
properties.reject(&:implied?).none?(&:url_property?) &&
|
169
|
+
!nested_items?
|
144
170
|
end
|
145
171
|
|
146
172
|
# @return [Boolean]
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module MicroMicro
|
4
|
+
module Parsers
|
5
|
+
class BaseImpliedPropertyParser < BasePropertyParser
|
6
|
+
private
|
7
|
+
|
8
|
+
# @return [String, nil]
|
9
|
+
def attribute_value
|
10
|
+
candidate_node[self.class::HTML_ELEMENTS_MAP[candidate_node.name]] if candidate_node
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Nokogiri::XML::Element, nil]
|
14
|
+
def candidate_node
|
15
|
+
@candidate_node ||=
|
16
|
+
candidate_nodes.find do |node|
|
17
|
+
self.class::HTML_ELEMENTS_MAP.filter_map do |name, attribute|
|
18
|
+
node if name == node.name && node[attribute]
|
19
|
+
end.any?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# @return [Nokogiri::XML::NodeSet]
|
24
|
+
def candidate_nodes
|
25
|
+
Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class BasePropertyParser
|
@@ -7,27 +9,20 @@ module MicroMicro
|
|
7
9
|
@node = property.node
|
8
10
|
end
|
9
11
|
|
12
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
13
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
14
|
+
#
|
10
15
|
# @return [String]
|
11
16
|
def value
|
12
|
-
@value ||=
|
17
|
+
@value ||=
|
18
|
+
Helpers.text_content_from(node) do |context|
|
19
|
+
context.css('img').each { |img| img.content = " #{img['alt'] || img['src']} " }
|
20
|
+
end
|
13
21
|
end
|
14
22
|
|
15
23
|
private
|
16
24
|
|
17
25
|
attr_reader :node, :property
|
18
|
-
|
19
|
-
# @see microformats2 Parsing Specification sections 1.3.1 and 1.3.4
|
20
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
21
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
22
|
-
def serialized_node
|
23
|
-
@serialized_node ||= begin
|
24
|
-
node.css(*Document.ignored_node_names).unlink
|
25
|
-
|
26
|
-
node.css('img').each { |img| img.content = " #{img['alt'] || Absolutely.to_abs(base: node.document.url, relative: img['src'])} " }
|
27
|
-
|
28
|
-
node
|
29
|
-
end
|
30
|
-
end
|
31
26
|
end
|
32
27
|
end
|
33
28
|
end
|
@@ -1,84 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class DateTimeParser
|
4
|
-
# @see Value Class Pattern section 4.2
|
5
|
-
# @see http://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
6
|
-
#
|
7
6
|
# Regexp pattern matching YYYY-MM-DD and YYY-DDD
|
8
|
-
DATE_REGEXP_PATTERN = '(?<year>\d{4})-
|
9
|
-
|
10
|
-
|
11
|
-
# Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
|
12
|
-
TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'.freeze
|
7
|
+
DATE_REGEXP_PATTERN = '(?<year>\d{4})-' \
|
8
|
+
'((?<ordinal>3[0-6]{2}|[0-2]\d{2})|(?<month>0\d|1[0-2])-' \
|
9
|
+
'(?<day>3[0-1]|[0-2]\d))'
|
13
10
|
|
14
|
-
|
11
|
+
# Regexp pattern matching HH:MM and HH:MM:SS
|
12
|
+
TIME_REGEXP_PATTERN = '(?<hours>2[0-3]|[0-1]?\d)' \
|
13
|
+
'(?::(?<minutes>[0-5]\d))?' \
|
14
|
+
'(?::(?<seconds>[0-5]\d))?' \
|
15
|
+
'(?:\s*?(?<abbreviation>[apPP]\.?[mM]\.?))?'
|
15
16
|
|
16
|
-
#
|
17
|
+
# Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
|
18
|
+
TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'
|
19
|
+
|
20
|
+
# Regexp for extracting named captures from a datetime-esque String.
|
21
|
+
DATE_TIME_TIMEZONE_REGEXP = /
|
22
|
+
\A
|
23
|
+
(?=.)
|
24
|
+
(?:#{DATE_REGEXP_PATTERN})?
|
25
|
+
(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?
|
26
|
+
\z
|
27
|
+
/x.freeze
|
28
|
+
|
29
|
+
# Parse a string for date and/or time values according to the Microformats
|
30
|
+
# Value Class Pattern date and time parsing specification.
|
31
|
+
#
|
32
|
+
# @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
33
|
+
#
|
34
|
+
# @param string [String, #to_s]
|
17
35
|
def initialize(string)
|
18
|
-
@string = string
|
36
|
+
@string = string.to_s
|
19
37
|
end
|
20
38
|
|
21
|
-
|
39
|
+
# Define getter and predicate methods for all possible named captures
|
40
|
+
# returned by the DATE_TIME_TIMEZONE_REGEXP regular expression.
|
41
|
+
[
|
42
|
+
:year, :ordinal, :month, :day,
|
43
|
+
:hours, :minutes, :seconds,
|
44
|
+
:abbreviation, :zulu, :offset
|
45
|
+
].each do |name|
|
22
46
|
define_method(name) { values[name] }
|
23
47
|
define_method("#{name}?") { public_send(name).present? }
|
24
48
|
end
|
25
49
|
|
50
|
+
# @return [String, nil]
|
26
51
|
def normalized_calendar_date
|
27
52
|
@normalized_calendar_date ||= "#{year}-#{month}-#{day}" if year? && month? && day?
|
28
53
|
end
|
29
54
|
|
55
|
+
# @return [String, nil]
|
30
56
|
def normalized_date
|
31
57
|
@normalized_date ||= normalized_calendar_date || normalized_ordinal_date
|
32
58
|
end
|
33
59
|
|
60
|
+
# @return [String, nil]
|
34
61
|
def normalized_hours
|
35
|
-
@normalized_hours ||=
|
36
|
-
|
37
|
-
|
62
|
+
@normalized_hours ||=
|
63
|
+
if hours?
|
64
|
+
return (hours.to_i + 12).to_s if abbreviation&.tr('.', '')&.downcase == 'pm'
|
38
65
|
|
39
|
-
|
40
|
-
|
66
|
+
format('%<hours>02d', hours: hours)
|
67
|
+
end
|
41
68
|
end
|
42
69
|
|
70
|
+
# @return [String]
|
43
71
|
def normalized_minutes
|
44
72
|
@normalized_minutes ||= minutes || '00'
|
45
73
|
end
|
46
74
|
|
75
|
+
# @return [String, nil]
|
47
76
|
def normalized_ordinal_date
|
48
77
|
@normalized_ordinal_date ||= "#{year}-#{ordinal}" if year? && ordinal?
|
49
78
|
end
|
50
79
|
|
80
|
+
# @return [String, nil]
|
51
81
|
def normalized_time
|
52
82
|
@normalized_time ||= [normalized_hours, normalized_minutes, seconds].compact.join(':') if normalized_hours
|
53
83
|
end
|
54
84
|
|
85
|
+
# @return [String, nil]
|
55
86
|
def normalized_timezone
|
56
87
|
@normalized_timezone ||= zulu || offset&.tr(':', '')
|
57
88
|
end
|
58
89
|
|
59
|
-
# @return [String]
|
90
|
+
# @return [String, nil]
|
60
91
|
def value
|
61
|
-
@value ||=
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
def value?
|
66
|
-
value.present?
|
92
|
+
@value ||=
|
93
|
+
if normalized_date || normalized_time || normalized_timezone
|
94
|
+
"#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
|
95
|
+
end
|
67
96
|
end
|
68
97
|
|
69
98
|
# @return [Hash{Symbol => String, nil}]
|
70
99
|
def values
|
71
|
-
@values ||=
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
string&.match(/^(?:#{DATE_REGEXP_PATTERN})?(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?$/)&.named_captures.to_h.symbolize_keys
|
100
|
+
@values ||=
|
101
|
+
if string.match?(DATE_TIME_TIMEZONE_REGEXP)
|
102
|
+
string.match(DATE_TIME_TIMEZONE_REGEXP).named_captures.symbolize_keys
|
103
|
+
else
|
104
|
+
{}
|
105
|
+
end
|
78
106
|
end
|
79
107
|
|
80
108
|
private
|
81
109
|
|
110
|
+
# @return [String]
|
82
111
|
attr_reader :string
|
83
112
|
end
|
84
113
|
end
|
@@ -1,65 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class DateTimePropertyParser < BasePropertyParser
|
4
|
-
# @see microformats2 Parsing Specification section 1.3.3
|
5
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
|
6
6
|
HTML_ATTRIBUTES_MAP = {
|
7
7
|
'datetime' => %w[del ins time],
|
8
8
|
'title' => %w[abbr],
|
9
9
|
'value' => %w[data input]
|
10
10
|
}.freeze
|
11
11
|
|
12
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
|
13
|
+
#
|
12
14
|
# @return [String]
|
13
15
|
def value
|
14
|
-
@value ||=
|
15
|
-
return resolved_value if date_time_parser.value?
|
16
|
-
return attribute_values.first if attribute_values.any?
|
17
|
-
|
18
|
-
super
|
19
|
-
end
|
16
|
+
@value ||= resolved_value || attribute_value || super
|
20
17
|
end
|
21
18
|
|
22
19
|
private
|
23
20
|
|
21
|
+
# @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
|
22
|
+
#
|
24
23
|
# @return [MicroMicro::Parsers::DateTimeParser, nil]
|
25
|
-
def
|
26
|
-
@
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
end
|
24
|
+
def adopted_date_time_parser
|
25
|
+
@adopted_date_time_parser ||=
|
26
|
+
(property.prev_all.reverse + property.next_all).filter_map do |prop|
|
27
|
+
DateTimeParser.new(prop.value) if prop.date_time_property?
|
28
|
+
end.find(&:normalized_date)
|
31
29
|
end
|
32
30
|
|
33
|
-
# @return [
|
34
|
-
def
|
35
|
-
|
36
|
-
HTML_ATTRIBUTES_MAP.map do |attribute, names|
|
37
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
38
|
-
end.compact
|
39
|
-
end
|
31
|
+
# @return [String, nil]
|
32
|
+
def attribute_value
|
33
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
40
34
|
end
|
41
35
|
|
42
36
|
# @return [MicroMicro::Parsers::DateTimeParser]
|
43
37
|
def date_time_parser
|
44
|
-
@date_time_parser ||= DateTimeParser.new(
|
38
|
+
@date_time_parser ||= DateTimeParser.new(ValueClassPatternParser.new(node, ' ').value)
|
45
39
|
end
|
46
40
|
|
41
|
+
# @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
|
42
|
+
#
|
47
43
|
# @return [Boolean]
|
48
44
|
def imply_date?
|
49
|
-
date_time_parser.normalized_time && !date_time_parser.normalized_date
|
45
|
+
date_time_parser.normalized_time && !date_time_parser.normalized_date && adopted_date_time_parser
|
50
46
|
end
|
51
47
|
|
52
48
|
# @return [String]
|
53
49
|
def resolved_value
|
54
|
-
return "#{
|
50
|
+
return "#{adopted_date_time_parser.normalized_date} #{date_time_parser.value}" if imply_date?
|
55
51
|
|
56
52
|
date_time_parser.value
|
57
53
|
end
|
58
|
-
|
59
|
-
# @return [MicroMicro::Parsers::ValueClassPatternParser]
|
60
|
-
def value_class_pattern_parser
|
61
|
-
ValueClassPatternParser.new(node, ' ')
|
62
|
-
end
|
63
54
|
end
|
64
55
|
end
|
65
56
|
end
|
@@ -1,27 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class EmbeddedMarkupPropertyParser < BasePropertyParser
|
4
|
-
|
5
|
-
|
6
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
7
|
+
#
|
8
|
+
# @return [Hash{Symbol => String}]
|
6
9
|
def value
|
7
|
-
@value ||=
|
10
|
+
@value ||=
|
8
11
|
{
|
9
|
-
html:
|
12
|
+
html: node.inner_html.strip,
|
10
13
|
value: super
|
11
14
|
}
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
private
|
16
|
-
|
17
|
-
def resolved_node
|
18
|
-
@resolved_node ||= begin
|
19
|
-
HTML_ATTRIBUTE_NAMES.each do |attribute|
|
20
|
-
node.css("[#{attribute}]").each { |element| element[attribute] = Absolutely.to_abs(base: node.document.url, relative: element[attribute].strip) }
|
21
|
-
end
|
22
|
-
|
23
|
-
node
|
24
|
-
end
|
25
15
|
end
|
26
16
|
end
|
27
17
|
end
|
@@ -1,77 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedNamePropertyParser <
|
4
|
-
# @see microformats2 Parsing Specification section 1.3.5
|
5
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
5
|
+
class ImpliedNamePropertyParser < BaseImpliedPropertyParser
|
6
6
|
HTML_ELEMENTS_MAP = {
|
7
|
-
'area' => 'alt',
|
8
7
|
'img' => 'alt',
|
8
|
+
'area' => 'alt',
|
9
9
|
'abbr' => 'title'
|
10
10
|
}.freeze
|
11
11
|
|
12
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
13
|
+
#
|
12
14
|
# @return [String]
|
13
15
|
def value
|
14
|
-
@value ||=
|
16
|
+
@value ||= attribute_value || text_content
|
15
17
|
end
|
16
18
|
|
17
19
|
private
|
18
20
|
|
19
|
-
# @return [Array
|
20
|
-
def
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
# @return [Nokogiri::XML::Element, nil]
|
29
|
-
def child_node
|
30
|
-
@child_node ||= node.at_css('> :only-child')
|
31
|
-
end
|
32
|
-
|
33
|
-
# @return [Array<String>]
|
34
|
-
def child_node_attribute_values
|
35
|
-
@child_node_attribute_values ||= begin
|
36
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
37
|
-
child_node[attribute] if child_node.matches?("#{element}[#{attribute}]")
|
38
|
-
end.compact
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
# @return [Nokogiri::XML::Element, nil]
|
43
|
-
def grandchild_node
|
44
|
-
@grandchild_node ||= child_node.at_css('> :only-child')
|
45
|
-
end
|
46
|
-
|
47
|
-
# @return [Array<String>]
|
48
|
-
def grandchild_node_attribute_values
|
49
|
-
@grandchild_node_attribute_values ||= begin
|
50
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
51
|
-
grandchild_node[attribute] if grandchild_node.matches?("#{element}[#{attribute}]")
|
52
|
-
end.compact
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
# @return [Boolean]
|
57
|
-
def parse_child_node?
|
58
|
-
child_node && !Item.item_node?(child_node)
|
59
|
-
end
|
60
|
-
|
61
|
-
# @return [Boolean]
|
62
|
-
def parse_grandchild_node?
|
63
|
-
parse_child_node? && grandchild_node && !Item.item_node?(grandchild_node)
|
21
|
+
# @return [Array]
|
22
|
+
def child_nodes
|
23
|
+
[
|
24
|
+
node.at_css('> :only-child'),
|
25
|
+
node.at_css('> :only-child > :only-child')
|
26
|
+
].compact.reject { |child_node| Helpers.item_node?(child_node) }
|
64
27
|
end
|
65
28
|
|
66
29
|
# @return [String]
|
67
|
-
def
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
serialized_node.css('img').each { |img| img.content = img['alt'] }
|
73
|
-
|
74
|
-
serialized_node.text
|
30
|
+
def text_content
|
31
|
+
Helpers.text_content_from(node) do |context|
|
32
|
+
context.css('img').each { |img| img.content = img['alt'] }
|
33
|
+
end
|
75
34
|
end
|
76
35
|
end
|
77
36
|
end
|