micromicro 1.1.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +43 -1
- data/CONTRIBUTING.md +3 -3
- data/README.md +9 -102
- data/lib/micro_micro/collectible.rb +2 -0
- data/lib/micro_micro/collections/base_collection.rb +8 -1
- data/lib/micro_micro/collections/items_collection.rb +84 -1
- data/lib/micro_micro/collections/properties_collection.rb +111 -0
- data/lib/micro_micro/collections/relationships_collection.rb +85 -6
- data/lib/micro_micro/document.rb +21 -103
- data/lib/micro_micro/helpers.rb +94 -0
- data/lib/micro_micro/implied_property.rb +15 -0
- data/lib/micro_micro/item.rb +93 -79
- data/lib/micro_micro/parsers/base_implied_property_parser.rb +29 -0
- data/lib/micro_micro/parsers/base_property_parser.rb +6 -12
- data/lib/micro_micro/parsers/date_time_parser.rb +61 -25
- data/lib/micro_micro/parsers/date_time_property_parser.rb +10 -6
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +4 -2
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +15 -16
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +21 -43
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +12 -30
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +4 -1
- data/lib/micro_micro/parsers/url_property_parser.rb +22 -12
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +29 -42
- data/lib/micro_micro/property.rb +126 -56
- data/lib/micro_micro/relationship.rb +38 -13
- data/lib/micro_micro/version.rb +3 -1
- data/lib/micromicro.rb +32 -26
- data/micromicro.gemspec +11 -6
- metadata +22 -19
@@ -1,78 +1,114 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class DateTimeParser
|
4
|
-
# @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
5
|
-
#
|
6
6
|
# Regexp pattern matching YYYY-MM-DD and YYY-DDD
|
7
|
-
DATE_REGEXP_PATTERN = '(?<year>\d{4})-
|
8
|
-
|
9
|
-
|
10
|
-
# Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
|
11
|
-
TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'.freeze
|
7
|
+
DATE_REGEXP_PATTERN = '(?<year>\d{4})-' \
|
8
|
+
'((?<ordinal>3[0-6]{2}|[0-2]\d{2})|(?<month>0\d|1[0-2])-' \
|
9
|
+
'(?<day>3[0-1]|[0-2]\d))'
|
12
10
|
|
13
|
-
|
11
|
+
# Regexp pattern matching HH:MM and HH:MM:SS
|
12
|
+
TIME_REGEXP_PATTERN = '(?<hours>2[0-3]|[0-1]?\d)' \
|
13
|
+
'(?::(?<minutes>[0-5]\d))?' \
|
14
|
+
'(?::(?<seconds>[0-5]\d))?' \
|
15
|
+
'(?:\s*?(?<abbreviation>[apPP]\.?[mM]\.?))?'
|
14
16
|
|
15
|
-
#
|
17
|
+
# Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
|
18
|
+
TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'
|
19
|
+
|
20
|
+
# Regexp for extracting named captures from a datetime-esque String.
|
21
|
+
DATE_TIME_TIMEZONE_REGEXP = /
|
22
|
+
\A
|
23
|
+
(?=.)
|
24
|
+
(?:#{DATE_REGEXP_PATTERN})?
|
25
|
+
(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?
|
26
|
+
\z
|
27
|
+
/x.freeze
|
28
|
+
|
29
|
+
# Parse a string for date and/or time values according to the Microformats
|
30
|
+
# Value Class Pattern date and time parsing specification.
|
31
|
+
#
|
32
|
+
# @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
33
|
+
# microformats.org: Value Class Pattern § Date and time parsing
|
34
|
+
#
|
35
|
+
# @param string [String, #to_s]
|
16
36
|
def initialize(string)
|
17
|
-
@string = string
|
37
|
+
@string = string.to_s
|
18
38
|
end
|
19
39
|
|
20
|
-
|
40
|
+
# Define getter and predicate methods for all possible named captures
|
41
|
+
# returned by the DATE_TIME_TIMEZONE_REGEXP regular expression.
|
42
|
+
[
|
43
|
+
:year, :ordinal, :month, :day,
|
44
|
+
:hours, :minutes, :seconds,
|
45
|
+
:abbreviation, :zulu, :offset
|
46
|
+
].each do |name|
|
21
47
|
define_method(name) { values[name] }
|
22
48
|
define_method("#{name}?") { public_send(name).present? }
|
23
49
|
end
|
24
50
|
|
51
|
+
# @return [String, nil]
|
25
52
|
def normalized_calendar_date
|
26
53
|
@normalized_calendar_date ||= "#{year}-#{month}-#{day}" if year? && month? && day?
|
27
54
|
end
|
28
55
|
|
56
|
+
# @return [String, nil]
|
29
57
|
def normalized_date
|
30
58
|
@normalized_date ||= normalized_calendar_date || normalized_ordinal_date
|
31
59
|
end
|
32
60
|
|
61
|
+
# @return [String, nil]
|
33
62
|
def normalized_hours
|
34
|
-
@normalized_hours ||=
|
35
|
-
|
36
|
-
|
63
|
+
@normalized_hours ||=
|
64
|
+
if hours?
|
65
|
+
return (hours.to_i + 12).to_s if abbreviation&.tr('.', '')&.downcase == 'pm'
|
37
66
|
|
38
|
-
|
39
|
-
|
67
|
+
format('%<hours>02d', hours: hours)
|
68
|
+
end
|
40
69
|
end
|
41
70
|
|
71
|
+
# @return [String]
|
42
72
|
def normalized_minutes
|
43
73
|
@normalized_minutes ||= minutes || '00'
|
44
74
|
end
|
45
75
|
|
76
|
+
# @return [String, nil]
|
46
77
|
def normalized_ordinal_date
|
47
78
|
@normalized_ordinal_date ||= "#{year}-#{ordinal}" if year? && ordinal?
|
48
79
|
end
|
49
80
|
|
81
|
+
# @return [String, nil]
|
50
82
|
def normalized_time
|
51
83
|
@normalized_time ||= [normalized_hours, normalized_minutes, seconds].compact.join(':') if normalized_hours
|
52
84
|
end
|
53
85
|
|
86
|
+
# @return [String, nil]
|
54
87
|
def normalized_timezone
|
55
88
|
@normalized_timezone ||= zulu || offset&.tr(':', '')
|
56
89
|
end
|
57
90
|
|
58
|
-
# @return [String]
|
91
|
+
# @return [String, nil]
|
59
92
|
def value
|
60
|
-
@value ||=
|
93
|
+
@value ||=
|
94
|
+
if normalized_date || normalized_time || normalized_timezone
|
95
|
+
"#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
|
96
|
+
end
|
61
97
|
end
|
62
98
|
|
63
99
|
# @return [Hash{Symbol => String, nil}]
|
64
100
|
def values
|
65
|
-
@values ||=
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
string&.match(/^(?:#{DATE_REGEXP_PATTERN})?(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?$/)&.named_captures.to_h.symbolize_keys
|
101
|
+
@values ||=
|
102
|
+
if string.match?(DATE_TIME_TIMEZONE_REGEXP)
|
103
|
+
string.match(DATE_TIME_TIMEZONE_REGEXP).named_captures.symbolize_keys
|
104
|
+
else
|
105
|
+
{}
|
106
|
+
end
|
72
107
|
end
|
73
108
|
|
74
109
|
private
|
75
110
|
|
111
|
+
# @return [String]
|
76
112
|
attr_reader :string
|
77
113
|
end
|
78
114
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class DateTimePropertyParser < BasePropertyParser
|
@@ -8,6 +10,7 @@ module MicroMicro
|
|
8
10
|
}.freeze
|
9
11
|
|
10
12
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
|
13
|
+
# microformats.org: microformats2 parsing specification § Parsing a +dt-+ property
|
11
14
|
#
|
12
15
|
# @return [String]
|
13
16
|
def value
|
@@ -17,19 +20,19 @@ module MicroMicro
|
|
17
20
|
private
|
18
21
|
|
19
22
|
# @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
|
23
|
+
# microformats.org: Value Class Pattern § microformats2 parsers implied date
|
20
24
|
#
|
21
25
|
# @return [MicroMicro::Parsers::DateTimeParser, nil]
|
22
26
|
def adopted_date_time_parser
|
23
|
-
@adopted_date_time_parser ||=
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
end
|
27
|
+
@adopted_date_time_parser ||=
|
28
|
+
(property.prev_all.reverse + property.next_all).filter_map do |prop|
|
29
|
+
DateTimeParser.new(prop.value) if prop.date_time_property?
|
30
|
+
end.find(&:normalized_date)
|
28
31
|
end
|
29
32
|
|
30
33
|
# @return [String, nil]
|
31
34
|
def attribute_value
|
32
|
-
|
35
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
33
36
|
end
|
34
37
|
|
35
38
|
# @return [MicroMicro::Parsers::DateTimeParser]
|
@@ -38,6 +41,7 @@ module MicroMicro
|
|
38
41
|
end
|
39
42
|
|
40
43
|
# @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
|
44
|
+
# microformats.org: Value Class Pattern § microformats2 parsers implied date
|
41
45
|
#
|
42
46
|
# @return [Boolean]
|
43
47
|
def imply_date?
|
@@ -1,16 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class EmbeddedMarkupPropertyParser < BasePropertyParser
|
4
6
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
7
|
+
# microformats.org: microformats2 parsing specification § Parsing an +e-+ property
|
5
8
|
#
|
6
9
|
# @return [Hash{Symbol => String}]
|
7
10
|
def value
|
8
|
-
@value ||=
|
11
|
+
@value ||=
|
9
12
|
{
|
10
13
|
html: node.inner_html.strip,
|
11
14
|
value: super
|
12
15
|
}
|
13
|
-
end
|
14
16
|
end
|
15
17
|
end
|
16
18
|
end
|
@@ -1,12 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedNamePropertyParser <
|
4
|
-
|
5
|
-
'
|
6
|
-
'
|
5
|
+
class ImpliedNamePropertyParser < BaseImpliedPropertyParser
|
6
|
+
HTML_ELEMENTS_MAP = {
|
7
|
+
'img' => 'alt',
|
8
|
+
'area' => 'alt',
|
9
|
+
'abbr' => 'title'
|
7
10
|
}.freeze
|
8
11
|
|
9
12
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
13
|
+
# microformats.org: microformats2 parsing specification § Parsing for implied properties
|
10
14
|
#
|
11
15
|
# @return [String]
|
12
16
|
def value
|
@@ -15,24 +19,19 @@ module MicroMicro
|
|
15
19
|
|
16
20
|
private
|
17
21
|
|
18
|
-
# @return [Nokogiri::XML::NodeSet]
|
19
|
-
def candidate_nodes
|
20
|
-
@candidate_nodes ||= Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
21
|
-
end
|
22
|
-
|
23
22
|
# @return [Array]
|
24
23
|
def child_nodes
|
25
|
-
[
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
def attribute_value
|
30
|
-
candidate_nodes.map { |node| self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP) }.compact.first
|
24
|
+
[
|
25
|
+
node.at_css('> :only-child'),
|
26
|
+
node.at_css('> :only-child > :only-child')
|
27
|
+
].compact.reject { |child_node| Helpers.item_node?(child_node) }
|
31
28
|
end
|
32
29
|
|
33
30
|
# @return [String]
|
34
31
|
def text_content
|
35
|
-
|
32
|
+
Helpers.text_content_from(node) do |context|
|
33
|
+
context.css('img').each { |img| img.content = img['alt'] }
|
34
|
+
end
|
36
35
|
end
|
37
36
|
end
|
38
37
|
end
|
@@ -1,64 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedPhotoPropertyParser <
|
5
|
+
class ImpliedPhotoPropertyParser < BaseImpliedPropertyParser
|
6
|
+
CSS_SELECTORS_ARRAY = ['> img[src]:only-of-type', '> object[data]:only-of-type'].freeze
|
7
|
+
|
4
8
|
HTML_ELEMENTS_MAP = {
|
5
9
|
'img' => 'src',
|
6
10
|
'object' => 'data'
|
7
11
|
}.freeze
|
8
12
|
|
9
13
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
14
|
+
# microformats.org: microformats2 parsing specification § Parsing for implied properties
|
10
15
|
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
|
16
|
+
# microformats.org: microformats2 parsing specification § Parse an img element for src and alt
|
11
17
|
#
|
12
18
|
# @return [String, Hash{Symbol => String}, nil]
|
13
19
|
def value
|
14
|
-
@value ||=
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
@value ||=
|
21
|
+
if attribute_value
|
22
|
+
return attribute_value unless candidate_node.matches?('img[alt]')
|
23
|
+
|
24
|
+
{
|
25
|
+
value: attribute_value,
|
26
|
+
alt: candidate_node['alt'].strip
|
27
|
+
}
|
28
|
+
end
|
23
29
|
end
|
24
30
|
|
25
31
|
private
|
26
32
|
|
27
|
-
# @return [Array
|
28
|
-
def
|
29
|
-
|
30
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
31
|
-
node if node.matches?("#{element}[#{attribute}]")
|
32
|
-
end.compact
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
# @return [String, nil]
|
37
|
-
def resolved_value
|
38
|
-
@resolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
|
39
|
-
end
|
33
|
+
# @return [Array]
|
34
|
+
def child_nodes
|
35
|
+
nodes = [node.at_css(*CSS_SELECTORS_ARRAY)]
|
40
36
|
|
41
|
-
|
42
|
-
def value_node
|
43
|
-
@value_node ||= begin
|
44
|
-
return attribute_values.first if attribute_values.any?
|
45
|
-
|
46
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
47
|
-
child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
|
48
|
-
|
49
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
50
|
-
end
|
51
|
-
|
52
|
-
if node.element_children.one? && !Item.item_node?(node.first_element_child)
|
53
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
54
|
-
child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
|
55
|
-
|
56
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
57
|
-
end
|
58
|
-
end
|
37
|
+
nodes << node.first_element_child.at_css(*CSS_SELECTORS_ARRAY) if node.element_children.one?
|
59
38
|
|
60
|
-
|
61
|
-
end
|
39
|
+
nodes.compact.reject { |child_node| Helpers.item_node?(child_node) }
|
62
40
|
end
|
63
41
|
end
|
64
42
|
end
|
@@ -1,50 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedUrlPropertyParser <
|
5
|
+
class ImpliedUrlPropertyParser < BaseImpliedPropertyParser
|
6
|
+
CSS_SELECTORS_ARRAY = ['> a[href]:only-of-type', '> area[href]:only-of-type'].freeze
|
7
|
+
|
4
8
|
HTML_ELEMENTS_MAP = {
|
5
9
|
'a' => 'href',
|
6
10
|
'area' => 'href'
|
7
11
|
}.freeze
|
8
12
|
|
9
13
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
14
|
+
# microformats.org: microformats2 parsing specification § Parsing for implied properties
|
10
15
|
#
|
11
16
|
# @return [String, nil]
|
12
17
|
def value
|
13
|
-
@value ||=
|
18
|
+
@value ||= attribute_value
|
14
19
|
end
|
15
20
|
|
16
21
|
private
|
17
22
|
|
18
|
-
# @return [Array
|
19
|
-
def
|
20
|
-
|
21
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
22
|
-
node if node.matches?("#{element}[#{attribute}]")
|
23
|
-
end.compact
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
# @return [Nokogiri::XML::Element, nil]
|
28
|
-
def value_node
|
29
|
-
@value_node ||= begin
|
30
|
-
return attribute_values.first if attribute_values.any?
|
31
|
-
|
32
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
33
|
-
child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
|
34
|
-
|
35
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
36
|
-
end
|
37
|
-
|
38
|
-
if node.element_children.one? && !Item.item_node?(node.first_element_child)
|
39
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
40
|
-
child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
|
23
|
+
# @return [Array]
|
24
|
+
def child_nodes
|
25
|
+
nodes = [node.at_css(*CSS_SELECTORS_ARRAY)]
|
41
26
|
|
42
|
-
|
43
|
-
end
|
44
|
-
end
|
27
|
+
nodes << node.first_element_child.at_css(*CSS_SELECTORS_ARRAY) if node.element_children.one?
|
45
28
|
|
46
|
-
|
47
|
-
end
|
29
|
+
nodes.compact.reject { |child_node| Helpers.item_node?(child_node) }
|
48
30
|
end
|
49
31
|
end
|
50
32
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class PlainTextPropertyParser < BasePropertyParser
|
@@ -8,6 +10,7 @@ module MicroMicro
|
|
8
10
|
}.freeze
|
9
11
|
|
10
12
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
13
|
+
# microformats.org: microformats2 parsing specification § Parsing a +p-+ property
|
11
14
|
#
|
12
15
|
# @return [String]
|
13
16
|
def value
|
@@ -18,7 +21,7 @@ module MicroMicro
|
|
18
21
|
|
19
22
|
# @return [String, nil]
|
20
23
|
def attribute_value
|
21
|
-
|
24
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
22
25
|
end
|
23
26
|
|
24
27
|
# @return [String, nil]
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class UrlPropertyParser < BasePropertyParser
|
@@ -14,40 +16,48 @@ module MicroMicro
|
|
14
16
|
}.freeze
|
15
17
|
|
16
18
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
|
19
|
+
# microformats.org: microformats2 parsing specification § Parsing a +u-+ property
|
17
20
|
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
|
21
|
+
# microformats.org: microformats2 parsing specification § Parse an img element for src and alt
|
18
22
|
#
|
19
23
|
# @return [String, Hash{Symbol => String}]
|
20
24
|
def value
|
21
|
-
@value ||=
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
@value ||=
|
26
|
+
if node.matches?('img[alt]')
|
27
|
+
{
|
28
|
+
value: resolved_value,
|
29
|
+
alt: node['alt'].strip
|
30
|
+
}
|
31
|
+
else
|
32
|
+
resolved_value
|
33
|
+
end
|
29
34
|
end
|
30
35
|
|
31
36
|
private
|
32
37
|
|
33
38
|
# @return [String, nil]
|
34
39
|
def attribute_value
|
35
|
-
|
40
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
36
41
|
end
|
37
42
|
|
38
43
|
# @return [String, nil]
|
39
44
|
def extended_attribute_value
|
40
|
-
|
45
|
+
Helpers.attribute_value_from(node, EXTENDED_HTML_ATTRIBUTES_MAP)
|
41
46
|
end
|
42
47
|
|
43
48
|
# @return [String]
|
44
49
|
def resolved_value
|
45
|
-
@resolved_value ||=
|
50
|
+
@resolved_value ||= node.document.resolve_relative_url(unresolved_value.strip)
|
51
|
+
end
|
52
|
+
|
53
|
+
# @return [String]
|
54
|
+
def text_content
|
55
|
+
Helpers.text_content_from(node)
|
46
56
|
end
|
47
57
|
|
48
58
|
# @return [String]
|
49
59
|
def unresolved_value
|
50
|
-
attribute_value || value_class_pattern_value || extended_attribute_value ||
|
60
|
+
attribute_value || value_class_pattern_value || extended_attribute_value || text_content
|
51
61
|
end
|
52
62
|
|
53
63
|
# @return [String, nil]
|
@@ -1,8 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class ValueClassPatternParser
|
4
6
|
# @see https://microformats.org/wiki/value-class-pattern#Basic_Parsing
|
7
|
+
# microformats.org: Value Class Pattern § Basic Parsing
|
5
8
|
# @see https://microformats.org/wiki/value-class-pattern#Date_and_time_values
|
9
|
+
# microformats.org: Value Class Pattern § Date and time values
|
6
10
|
HTML_ATTRIBUTES_MAP = {
|
7
11
|
'alt' => %w[area img],
|
8
12
|
'value' => %w[data],
|
@@ -10,72 +14,55 @@ module MicroMicro
|
|
10
14
|
'datetime' => %w[del ins time]
|
11
15
|
}.freeze
|
12
16
|
|
13
|
-
# @param context [Nokogiri::XML::Element]
|
14
|
-
# @param separator [String]
|
15
|
-
def initialize(node, separator = '')
|
16
|
-
@node = node
|
17
|
-
@separator = separator
|
18
|
-
end
|
19
|
-
|
20
|
-
# @return [String, nil]
|
21
|
-
def value
|
22
|
-
@value ||= values.join(separator).strip if values.any?
|
23
|
-
end
|
24
|
-
|
25
|
-
# @return [Array<String>]
|
26
|
-
def values
|
27
|
-
@values ||= value_nodes.map { |value_node| self.class.value_from(value_node) }.select(&:present?)
|
28
|
-
end
|
29
|
-
|
30
17
|
# @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
31
18
|
# @param node_set [Nokogiri::XML::NodeSet]
|
32
19
|
# @return [Nokogiri::XML::NodeSet]
|
33
|
-
def self.
|
34
|
-
context.each { |node|
|
20
|
+
def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
21
|
+
context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
35
22
|
|
36
|
-
if context.is_a?(Nokogiri::XML::Element) && !
|
37
|
-
if value_class_node?(context) || value_title_node?(context)
|
23
|
+
if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
|
24
|
+
if Helpers.value_class_node?(context) || Helpers.value_title_node?(context)
|
38
25
|
node_set << context
|
39
26
|
else
|
40
|
-
|
27
|
+
node_set_from(context.element_children, node_set)
|
41
28
|
end
|
42
29
|
end
|
43
30
|
|
44
31
|
node_set
|
45
32
|
end
|
46
33
|
|
47
|
-
# @param node [Nokogiri::XML::Element]
|
48
|
-
# @return [Boolean]
|
49
|
-
def self.value_class_node?(node)
|
50
|
-
node.classes.include?('value')
|
51
|
-
end
|
52
|
-
|
53
34
|
# @param node [Nokogiri::XML::Element]
|
54
35
|
# @return [String, nil]
|
55
36
|
def self.value_from(node)
|
56
|
-
return node['title'] if value_title_node?(node)
|
37
|
+
return node['title'] if Helpers.value_title_node?(node)
|
57
38
|
|
58
|
-
|
59
|
-
return node[attribute] if names.include?(node.name) && node[attribute]
|
60
|
-
end
|
61
|
-
|
62
|
-
node.text
|
39
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP) || node.text
|
63
40
|
end
|
64
41
|
|
65
42
|
# @param node [Nokogiri::XML::Element]
|
66
|
-
# @
|
67
|
-
def
|
68
|
-
node
|
43
|
+
# @param separator [String]
|
44
|
+
def initialize(node, separator = '')
|
45
|
+
@node = node
|
46
|
+
@separator = separator
|
47
|
+
end
|
48
|
+
|
49
|
+
# @return [String, nil]
|
50
|
+
def value
|
51
|
+
@value ||= values.join(separator).strip if values.any?
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [Array<String>]
|
55
|
+
def values
|
56
|
+
@values ||=
|
57
|
+
self.class
|
58
|
+
.node_set_from(node)
|
59
|
+
.map { |value_node| self.class.value_from(value_node) }
|
60
|
+
.select(&:present?)
|
69
61
|
end
|
70
62
|
|
71
63
|
private
|
72
64
|
|
73
65
|
attr_reader :node, :separator
|
74
|
-
|
75
|
-
# @return [Nokogiri::XML::NodeSet]
|
76
|
-
def value_nodes
|
77
|
-
@value_nodes ||= self.class.nodes_from(node)
|
78
|
-
end
|
79
66
|
end
|
80
67
|
end
|
81
68
|
end
|