micromicro 1.1.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +43 -1
- data/CONTRIBUTING.md +3 -3
- data/README.md +9 -102
- data/lib/micro_micro/collectible.rb +2 -0
- data/lib/micro_micro/collections/base_collection.rb +8 -1
- data/lib/micro_micro/collections/items_collection.rb +84 -1
- data/lib/micro_micro/collections/properties_collection.rb +111 -0
- data/lib/micro_micro/collections/relationships_collection.rb +85 -6
- data/lib/micro_micro/document.rb +21 -103
- data/lib/micro_micro/helpers.rb +94 -0
- data/lib/micro_micro/implied_property.rb +15 -0
- data/lib/micro_micro/item.rb +93 -79
- data/lib/micro_micro/parsers/base_implied_property_parser.rb +29 -0
- data/lib/micro_micro/parsers/base_property_parser.rb +6 -12
- data/lib/micro_micro/parsers/date_time_parser.rb +61 -25
- data/lib/micro_micro/parsers/date_time_property_parser.rb +10 -6
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +4 -2
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +15 -16
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +21 -43
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +12 -30
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +4 -1
- data/lib/micro_micro/parsers/url_property_parser.rb +22 -12
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +29 -42
- data/lib/micro_micro/property.rb +126 -56
- data/lib/micro_micro/relationship.rb +38 -13
- data/lib/micro_micro/version.rb +3 -1
- data/lib/micromicro.rb +32 -26
- data/micromicro.gemspec +11 -6
- metadata +22 -19
@@ -1,78 +1,114 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class DateTimeParser
|
4
|
-
# @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
5
|
-
#
|
6
6
|
# Regexp pattern matching YYYY-MM-DD and YYY-DDD
|
7
|
-
DATE_REGEXP_PATTERN = '(?<year>\d{4})-
|
8
|
-
|
9
|
-
|
10
|
-
# Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
|
11
|
-
TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'.freeze
|
7
|
+
DATE_REGEXP_PATTERN = '(?<year>\d{4})-' \
|
8
|
+
'((?<ordinal>3[0-6]{2}|[0-2]\d{2})|(?<month>0\d|1[0-2])-' \
|
9
|
+
'(?<day>3[0-1]|[0-2]\d))'
|
12
10
|
|
13
|
-
|
11
|
+
# Regexp pattern matching HH:MM and HH:MM:SS
|
12
|
+
TIME_REGEXP_PATTERN = '(?<hours>2[0-3]|[0-1]?\d)' \
|
13
|
+
'(?::(?<minutes>[0-5]\d))?' \
|
14
|
+
'(?::(?<seconds>[0-5]\d))?' \
|
15
|
+
'(?:\s*?(?<abbreviation>[apPP]\.?[mM]\.?))?'
|
14
16
|
|
15
|
-
#
|
17
|
+
# Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
|
18
|
+
TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'
|
19
|
+
|
20
|
+
# Regexp for extracting named captures from a datetime-esque String.
|
21
|
+
DATE_TIME_TIMEZONE_REGEXP = /
|
22
|
+
\A
|
23
|
+
(?=.)
|
24
|
+
(?:#{DATE_REGEXP_PATTERN})?
|
25
|
+
(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?
|
26
|
+
\z
|
27
|
+
/x.freeze
|
28
|
+
|
29
|
+
# Parse a string for date and/or time values according to the Microformats
|
30
|
+
# Value Class Pattern date and time parsing specification.
|
31
|
+
#
|
32
|
+
# @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
33
|
+
# microformats.org: Value Class Pattern § Date and time parsing
|
34
|
+
#
|
35
|
+
# @param string [String, #to_s]
|
16
36
|
def initialize(string)
|
17
|
-
@string = string
|
37
|
+
@string = string.to_s
|
18
38
|
end
|
19
39
|
|
20
|
-
|
40
|
+
# Define getter and predicate methods for all possible named captures
|
41
|
+
# returned by the DATE_TIME_TIMEZONE_REGEXP regular expression.
|
42
|
+
[
|
43
|
+
:year, :ordinal, :month, :day,
|
44
|
+
:hours, :minutes, :seconds,
|
45
|
+
:abbreviation, :zulu, :offset
|
46
|
+
].each do |name|
|
21
47
|
define_method(name) { values[name] }
|
22
48
|
define_method("#{name}?") { public_send(name).present? }
|
23
49
|
end
|
24
50
|
|
51
|
+
# @return [String, nil]
|
25
52
|
def normalized_calendar_date
|
26
53
|
@normalized_calendar_date ||= "#{year}-#{month}-#{day}" if year? && month? && day?
|
27
54
|
end
|
28
55
|
|
56
|
+
# @return [String, nil]
|
29
57
|
def normalized_date
|
30
58
|
@normalized_date ||= normalized_calendar_date || normalized_ordinal_date
|
31
59
|
end
|
32
60
|
|
61
|
+
# @return [String, nil]
|
33
62
|
def normalized_hours
|
34
|
-
@normalized_hours ||=
|
35
|
-
|
36
|
-
|
63
|
+
@normalized_hours ||=
|
64
|
+
if hours?
|
65
|
+
return (hours.to_i + 12).to_s if abbreviation&.tr('.', '')&.downcase == 'pm'
|
37
66
|
|
38
|
-
|
39
|
-
|
67
|
+
format('%<hours>02d', hours: hours)
|
68
|
+
end
|
40
69
|
end
|
41
70
|
|
71
|
+
# @return [String]
|
42
72
|
def normalized_minutes
|
43
73
|
@normalized_minutes ||= minutes || '00'
|
44
74
|
end
|
45
75
|
|
76
|
+
# @return [String, nil]
|
46
77
|
def normalized_ordinal_date
|
47
78
|
@normalized_ordinal_date ||= "#{year}-#{ordinal}" if year? && ordinal?
|
48
79
|
end
|
49
80
|
|
81
|
+
# @return [String, nil]
|
50
82
|
def normalized_time
|
51
83
|
@normalized_time ||= [normalized_hours, normalized_minutes, seconds].compact.join(':') if normalized_hours
|
52
84
|
end
|
53
85
|
|
86
|
+
# @return [String, nil]
|
54
87
|
def normalized_timezone
|
55
88
|
@normalized_timezone ||= zulu || offset&.tr(':', '')
|
56
89
|
end
|
57
90
|
|
58
|
-
# @return [String]
|
91
|
+
# @return [String, nil]
|
59
92
|
def value
|
60
|
-
@value ||=
|
93
|
+
@value ||=
|
94
|
+
if normalized_date || normalized_time || normalized_timezone
|
95
|
+
"#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
|
96
|
+
end
|
61
97
|
end
|
62
98
|
|
63
99
|
# @return [Hash{Symbol => String, nil}]
|
64
100
|
def values
|
65
|
-
@values ||=
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
string&.match(/^(?:#{DATE_REGEXP_PATTERN})?(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?$/)&.named_captures.to_h.symbolize_keys
|
101
|
+
@values ||=
|
102
|
+
if string.match?(DATE_TIME_TIMEZONE_REGEXP)
|
103
|
+
string.match(DATE_TIME_TIMEZONE_REGEXP).named_captures.symbolize_keys
|
104
|
+
else
|
105
|
+
{}
|
106
|
+
end
|
72
107
|
end
|
73
108
|
|
74
109
|
private
|
75
110
|
|
111
|
+
# @return [String]
|
76
112
|
attr_reader :string
|
77
113
|
end
|
78
114
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class DateTimePropertyParser < BasePropertyParser
|
@@ -8,6 +10,7 @@ module MicroMicro
|
|
8
10
|
}.freeze
|
9
11
|
|
10
12
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
|
13
|
+
# microformats.org: microformats2 parsing specification § Parsing a +dt-+ property
|
11
14
|
#
|
12
15
|
# @return [String]
|
13
16
|
def value
|
@@ -17,19 +20,19 @@ module MicroMicro
|
|
17
20
|
private
|
18
21
|
|
19
22
|
# @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
|
23
|
+
# microformats.org: Value Class Pattern § microformats2 parsers implied date
|
20
24
|
#
|
21
25
|
# @return [MicroMicro::Parsers::DateTimeParser, nil]
|
22
26
|
def adopted_date_time_parser
|
23
|
-
@adopted_date_time_parser ||=
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
end
|
27
|
+
@adopted_date_time_parser ||=
|
28
|
+
(property.prev_all.reverse + property.next_all).filter_map do |prop|
|
29
|
+
DateTimeParser.new(prop.value) if prop.date_time_property?
|
30
|
+
end.find(&:normalized_date)
|
28
31
|
end
|
29
32
|
|
30
33
|
# @return [String, nil]
|
31
34
|
def attribute_value
|
32
|
-
|
35
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
33
36
|
end
|
34
37
|
|
35
38
|
# @return [MicroMicro::Parsers::DateTimeParser]
|
@@ -38,6 +41,7 @@ module MicroMicro
|
|
38
41
|
end
|
39
42
|
|
40
43
|
# @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
|
44
|
+
# microformats.org: Value Class Pattern § microformats2 parsers implied date
|
41
45
|
#
|
42
46
|
# @return [Boolean]
|
43
47
|
def imply_date?
|
@@ -1,16 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class EmbeddedMarkupPropertyParser < BasePropertyParser
|
4
6
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
7
|
+
# microformats.org: microformats2 parsing specification § Parsing an +e-+ property
|
5
8
|
#
|
6
9
|
# @return [Hash{Symbol => String}]
|
7
10
|
def value
|
8
|
-
@value ||=
|
11
|
+
@value ||=
|
9
12
|
{
|
10
13
|
html: node.inner_html.strip,
|
11
14
|
value: super
|
12
15
|
}
|
13
|
-
end
|
14
16
|
end
|
15
17
|
end
|
16
18
|
end
|
@@ -1,12 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedNamePropertyParser <
|
4
|
-
|
5
|
-
'
|
6
|
-
'
|
5
|
+
class ImpliedNamePropertyParser < BaseImpliedPropertyParser
|
6
|
+
HTML_ELEMENTS_MAP = {
|
7
|
+
'img' => 'alt',
|
8
|
+
'area' => 'alt',
|
9
|
+
'abbr' => 'title'
|
7
10
|
}.freeze
|
8
11
|
|
9
12
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
13
|
+
# microformats.org: microformats2 parsing specification § Parsing for implied properties
|
10
14
|
#
|
11
15
|
# @return [String]
|
12
16
|
def value
|
@@ -15,24 +19,19 @@ module MicroMicro
|
|
15
19
|
|
16
20
|
private
|
17
21
|
|
18
|
-
# @return [Nokogiri::XML::NodeSet]
|
19
|
-
def candidate_nodes
|
20
|
-
@candidate_nodes ||= Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
21
|
-
end
|
22
|
-
|
23
22
|
# @return [Array]
|
24
23
|
def child_nodes
|
25
|
-
[
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
def attribute_value
|
30
|
-
candidate_nodes.map { |node| self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP) }.compact.first
|
24
|
+
[
|
25
|
+
node.at_css('> :only-child'),
|
26
|
+
node.at_css('> :only-child > :only-child')
|
27
|
+
].compact.reject { |child_node| Helpers.item_node?(child_node) }
|
31
28
|
end
|
32
29
|
|
33
30
|
# @return [String]
|
34
31
|
def text_content
|
35
|
-
|
32
|
+
Helpers.text_content_from(node) do |context|
|
33
|
+
context.css('img').each { |img| img.content = img['alt'] }
|
34
|
+
end
|
36
35
|
end
|
37
36
|
end
|
38
37
|
end
|
@@ -1,64 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedPhotoPropertyParser <
|
5
|
+
class ImpliedPhotoPropertyParser < BaseImpliedPropertyParser
|
6
|
+
CSS_SELECTORS_ARRAY = ['> img[src]:only-of-type', '> object[data]:only-of-type'].freeze
|
7
|
+
|
4
8
|
HTML_ELEMENTS_MAP = {
|
5
9
|
'img' => 'src',
|
6
10
|
'object' => 'data'
|
7
11
|
}.freeze
|
8
12
|
|
9
13
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
14
|
+
# microformats.org: microformats2 parsing specification § Parsing for implied properties
|
10
15
|
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
|
16
|
+
# microformats.org: microformats2 parsing specification § Parse an img element for src and alt
|
11
17
|
#
|
12
18
|
# @return [String, Hash{Symbol => String}, nil]
|
13
19
|
def value
|
14
|
-
@value ||=
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
@value ||=
|
21
|
+
if attribute_value
|
22
|
+
return attribute_value unless candidate_node.matches?('img[alt]')
|
23
|
+
|
24
|
+
{
|
25
|
+
value: attribute_value,
|
26
|
+
alt: candidate_node['alt'].strip
|
27
|
+
}
|
28
|
+
end
|
23
29
|
end
|
24
30
|
|
25
31
|
private
|
26
32
|
|
27
|
-
# @return [Array
|
28
|
-
def
|
29
|
-
|
30
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
31
|
-
node if node.matches?("#{element}[#{attribute}]")
|
32
|
-
end.compact
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
# @return [String, nil]
|
37
|
-
def resolved_value
|
38
|
-
@resolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
|
39
|
-
end
|
33
|
+
# @return [Array]
|
34
|
+
def child_nodes
|
35
|
+
nodes = [node.at_css(*CSS_SELECTORS_ARRAY)]
|
40
36
|
|
41
|
-
|
42
|
-
def value_node
|
43
|
-
@value_node ||= begin
|
44
|
-
return attribute_values.first if attribute_values.any?
|
45
|
-
|
46
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
47
|
-
child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
|
48
|
-
|
49
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
50
|
-
end
|
51
|
-
|
52
|
-
if node.element_children.one? && !Item.item_node?(node.first_element_child)
|
53
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
54
|
-
child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
|
55
|
-
|
56
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
57
|
-
end
|
58
|
-
end
|
37
|
+
nodes << node.first_element_child.at_css(*CSS_SELECTORS_ARRAY) if node.element_children.one?
|
59
38
|
|
60
|
-
|
61
|
-
end
|
39
|
+
nodes.compact.reject { |child_node| Helpers.item_node?(child_node) }
|
62
40
|
end
|
63
41
|
end
|
64
42
|
end
|
@@ -1,50 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedUrlPropertyParser <
|
5
|
+
class ImpliedUrlPropertyParser < BaseImpliedPropertyParser
|
6
|
+
CSS_SELECTORS_ARRAY = ['> a[href]:only-of-type', '> area[href]:only-of-type'].freeze
|
7
|
+
|
4
8
|
HTML_ELEMENTS_MAP = {
|
5
9
|
'a' => 'href',
|
6
10
|
'area' => 'href'
|
7
11
|
}.freeze
|
8
12
|
|
9
13
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
14
|
+
# microformats.org: microformats2 parsing specification § Parsing for implied properties
|
10
15
|
#
|
11
16
|
# @return [String, nil]
|
12
17
|
def value
|
13
|
-
@value ||=
|
18
|
+
@value ||= attribute_value
|
14
19
|
end
|
15
20
|
|
16
21
|
private
|
17
22
|
|
18
|
-
# @return [Array
|
19
|
-
def
|
20
|
-
|
21
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
22
|
-
node if node.matches?("#{element}[#{attribute}]")
|
23
|
-
end.compact
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
# @return [Nokogiri::XML::Element, nil]
|
28
|
-
def value_node
|
29
|
-
@value_node ||= begin
|
30
|
-
return attribute_values.first if attribute_values.any?
|
31
|
-
|
32
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
33
|
-
child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
|
34
|
-
|
35
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
36
|
-
end
|
37
|
-
|
38
|
-
if node.element_children.one? && !Item.item_node?(node.first_element_child)
|
39
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
40
|
-
child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
|
23
|
+
# @return [Array]
|
24
|
+
def child_nodes
|
25
|
+
nodes = [node.at_css(*CSS_SELECTORS_ARRAY)]
|
41
26
|
|
42
|
-
|
43
|
-
end
|
44
|
-
end
|
27
|
+
nodes << node.first_element_child.at_css(*CSS_SELECTORS_ARRAY) if node.element_children.one?
|
45
28
|
|
46
|
-
|
47
|
-
end
|
29
|
+
nodes.compact.reject { |child_node| Helpers.item_node?(child_node) }
|
48
30
|
end
|
49
31
|
end
|
50
32
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class PlainTextPropertyParser < BasePropertyParser
|
@@ -8,6 +10,7 @@ module MicroMicro
|
|
8
10
|
}.freeze
|
9
11
|
|
10
12
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
13
|
+
# microformats.org: microformats2 parsing specification § Parsing a +p-+ property
|
11
14
|
#
|
12
15
|
# @return [String]
|
13
16
|
def value
|
@@ -18,7 +21,7 @@ module MicroMicro
|
|
18
21
|
|
19
22
|
# @return [String, nil]
|
20
23
|
def attribute_value
|
21
|
-
|
24
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
22
25
|
end
|
23
26
|
|
24
27
|
# @return [String, nil]
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class UrlPropertyParser < BasePropertyParser
|
@@ -14,40 +16,48 @@ module MicroMicro
|
|
14
16
|
}.freeze
|
15
17
|
|
16
18
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
|
19
|
+
# microformats.org: microformats2 parsing specification § Parsing a +u-+ property
|
17
20
|
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
|
21
|
+
# microformats.org: microformats2 parsing specification § Parse an img element for src and alt
|
18
22
|
#
|
19
23
|
# @return [String, Hash{Symbol => String}]
|
20
24
|
def value
|
21
|
-
@value ||=
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
@value ||=
|
26
|
+
if node.matches?('img[alt]')
|
27
|
+
{
|
28
|
+
value: resolved_value,
|
29
|
+
alt: node['alt'].strip
|
30
|
+
}
|
31
|
+
else
|
32
|
+
resolved_value
|
33
|
+
end
|
29
34
|
end
|
30
35
|
|
31
36
|
private
|
32
37
|
|
33
38
|
# @return [String, nil]
|
34
39
|
def attribute_value
|
35
|
-
|
40
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
36
41
|
end
|
37
42
|
|
38
43
|
# @return [String, nil]
|
39
44
|
def extended_attribute_value
|
40
|
-
|
45
|
+
Helpers.attribute_value_from(node, EXTENDED_HTML_ATTRIBUTES_MAP)
|
41
46
|
end
|
42
47
|
|
43
48
|
# @return [String]
|
44
49
|
def resolved_value
|
45
|
-
@resolved_value ||=
|
50
|
+
@resolved_value ||= node.document.resolve_relative_url(unresolved_value.strip)
|
51
|
+
end
|
52
|
+
|
53
|
+
# @return [String]
|
54
|
+
def text_content
|
55
|
+
Helpers.text_content_from(node)
|
46
56
|
end
|
47
57
|
|
48
58
|
# @return [String]
|
49
59
|
def unresolved_value
|
50
|
-
attribute_value || value_class_pattern_value || extended_attribute_value ||
|
60
|
+
attribute_value || value_class_pattern_value || extended_attribute_value || text_content
|
51
61
|
end
|
52
62
|
|
53
63
|
# @return [String, nil]
|
@@ -1,8 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class ValueClassPatternParser
|
4
6
|
# @see https://microformats.org/wiki/value-class-pattern#Basic_Parsing
|
7
|
+
# microformats.org: Value Class Pattern § Basic Parsing
|
5
8
|
# @see https://microformats.org/wiki/value-class-pattern#Date_and_time_values
|
9
|
+
# microformats.org: Value Class Pattern § Date and time values
|
6
10
|
HTML_ATTRIBUTES_MAP = {
|
7
11
|
'alt' => %w[area img],
|
8
12
|
'value' => %w[data],
|
@@ -10,72 +14,55 @@ module MicroMicro
|
|
10
14
|
'datetime' => %w[del ins time]
|
11
15
|
}.freeze
|
12
16
|
|
13
|
-
# @param context [Nokogiri::XML::Element]
|
14
|
-
# @param separator [String]
|
15
|
-
def initialize(node, separator = '')
|
16
|
-
@node = node
|
17
|
-
@separator = separator
|
18
|
-
end
|
19
|
-
|
20
|
-
# @return [String, nil]
|
21
|
-
def value
|
22
|
-
@value ||= values.join(separator).strip if values.any?
|
23
|
-
end
|
24
|
-
|
25
|
-
# @return [Array<String>]
|
26
|
-
def values
|
27
|
-
@values ||= value_nodes.map { |value_node| self.class.value_from(value_node) }.select(&:present?)
|
28
|
-
end
|
29
|
-
|
30
17
|
# @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
31
18
|
# @param node_set [Nokogiri::XML::NodeSet]
|
32
19
|
# @return [Nokogiri::XML::NodeSet]
|
33
|
-
def self.
|
34
|
-
context.each { |node|
|
20
|
+
def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
21
|
+
context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
35
22
|
|
36
|
-
if context.is_a?(Nokogiri::XML::Element) && !
|
37
|
-
if value_class_node?(context) || value_title_node?(context)
|
23
|
+
if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
|
24
|
+
if Helpers.value_class_node?(context) || Helpers.value_title_node?(context)
|
38
25
|
node_set << context
|
39
26
|
else
|
40
|
-
|
27
|
+
node_set_from(context.element_children, node_set)
|
41
28
|
end
|
42
29
|
end
|
43
30
|
|
44
31
|
node_set
|
45
32
|
end
|
46
33
|
|
47
|
-
# @param node [Nokogiri::XML::Element]
|
48
|
-
# @return [Boolean]
|
49
|
-
def self.value_class_node?(node)
|
50
|
-
node.classes.include?('value')
|
51
|
-
end
|
52
|
-
|
53
34
|
# @param node [Nokogiri::XML::Element]
|
54
35
|
# @return [String, nil]
|
55
36
|
def self.value_from(node)
|
56
|
-
return node['title'] if value_title_node?(node)
|
37
|
+
return node['title'] if Helpers.value_title_node?(node)
|
57
38
|
|
58
|
-
|
59
|
-
return node[attribute] if names.include?(node.name) && node[attribute]
|
60
|
-
end
|
61
|
-
|
62
|
-
node.text
|
39
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP) || node.text
|
63
40
|
end
|
64
41
|
|
65
42
|
# @param node [Nokogiri::XML::Element]
|
66
|
-
# @
|
67
|
-
def
|
68
|
-
node
|
43
|
+
# @param separator [String]
|
44
|
+
def initialize(node, separator = '')
|
45
|
+
@node = node
|
46
|
+
@separator = separator
|
47
|
+
end
|
48
|
+
|
49
|
+
# @return [String, nil]
|
50
|
+
def value
|
51
|
+
@value ||= values.join(separator).strip if values.any?
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [Array<String>]
|
55
|
+
def values
|
56
|
+
@values ||=
|
57
|
+
self.class
|
58
|
+
.node_set_from(node)
|
59
|
+
.map { |value_node| self.class.value_from(value_node) }
|
60
|
+
.select(&:present?)
|
69
61
|
end
|
70
62
|
|
71
63
|
private
|
72
64
|
|
73
65
|
attr_reader :node, :separator
|
74
|
-
|
75
|
-
# @return [Nokogiri::XML::NodeSet]
|
76
|
-
def value_nodes
|
77
|
-
@value_nodes ||= self.class.nodes_from(node)
|
78
|
-
end
|
79
66
|
end
|
80
67
|
end
|
81
68
|
end
|