micromicro 1.1.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -1
- data/CONTRIBUTING.md +3 -3
- data/README.md +9 -9
- data/lib/micro_micro/collectible.rb +2 -0
- data/lib/micro_micro/collections/base_collection.rb +7 -1
- data/lib/micro_micro/collections/items_collection.rb +3 -1
- data/lib/micro_micro/collections/properties_collection.rb +12 -0
- data/lib/micro_micro/collections/relationships_collection.rb +10 -9
- data/lib/micro_micro/document.rb +10 -98
- data/lib/micro_micro/helpers.rb +82 -0
- data/lib/micro_micro/implied_property.rb +2 -0
- data/lib/micro_micro/item.rb +53 -60
- data/lib/micro_micro/parsers/base_implied_property_parser.rb +29 -0
- data/lib/micro_micro/parsers/base_property_parser.rb +4 -12
- data/lib/micro_micro/parsers/date_time_parser.rb +60 -25
- data/lib/micro_micro/parsers/date_time_property_parser.rb +7 -6
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +3 -2
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +14 -16
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +19 -43
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +11 -30
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +3 -1
- data/lib/micro_micro/parsers/url_property_parser.rb +20 -12
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +27 -42
- data/lib/micro_micro/property.rb +68 -56
- data/lib/micro_micro/relationship.rb +15 -13
- data/lib/micro_micro/version.rb +3 -1
- data/lib/micromicro.rb +31 -26
- data/micromicro.gemspec +11 -6
- metadata +22 -19
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module MicroMicro
|
4
|
+
module Parsers
|
5
|
+
class BaseImpliedPropertyParser < BasePropertyParser
|
6
|
+
private
|
7
|
+
|
8
|
+
# @return [String, nil]
|
9
|
+
def attribute_value
|
10
|
+
candidate_node[self.class::HTML_ELEMENTS_MAP[candidate_node.name]] if candidate_node
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Nokogiri::XML::Element, nil]
|
14
|
+
def candidate_node
|
15
|
+
@candidate_node ||=
|
16
|
+
candidate_nodes.find do |node|
|
17
|
+
self.class::HTML_ELEMENTS_MAP.filter_map do |name, attribute|
|
18
|
+
node if name == node.name && node[attribute]
|
19
|
+
end.any?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# @return [Nokogiri::XML::NodeSet]
|
24
|
+
def candidate_nodes
|
25
|
+
Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class BasePropertyParser
|
@@ -12,20 +14,10 @@ module MicroMicro
|
|
12
14
|
#
|
13
15
|
# @return [String]
|
14
16
|
def value
|
15
|
-
@value ||=
|
16
|
-
|
17
|
+
@value ||=
|
18
|
+
Helpers.text_content_from(node) do |context|
|
17
19
|
context.css('img').each { |img| img.content = " #{img['alt'] || img['src']} " }
|
18
20
|
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
# @param node [Nokogiri::XML::Element]
|
23
|
-
# @param attributes_map [Hash{String => Array}]
|
24
|
-
# @return [Array]
|
25
|
-
def self.attribute_value_from(node, attributes_map)
|
26
|
-
attributes_map.map do |attribute, names|
|
27
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
28
|
-
end.compact.first
|
29
21
|
end
|
30
22
|
|
31
23
|
private
|
@@ -1,78 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class DateTimeParser
|
4
|
-
# @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
5
|
-
#
|
6
6
|
# Regexp pattern matching YYYY-MM-DD and YYY-DDD
|
7
|
-
DATE_REGEXP_PATTERN = '(?<year>\d{4})-
|
8
|
-
|
9
|
-
|
10
|
-
# Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
|
11
|
-
TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'.freeze
|
7
|
+
DATE_REGEXP_PATTERN = '(?<year>\d{4})-' \
|
8
|
+
'((?<ordinal>3[0-6]{2}|[0-2]\d{2})|(?<month>0\d|1[0-2])-' \
|
9
|
+
'(?<day>3[0-1]|[0-2]\d))'
|
12
10
|
|
13
|
-
|
11
|
+
# Regexp pattern matching HH:MM and HH:MM:SS
|
12
|
+
TIME_REGEXP_PATTERN = '(?<hours>2[0-3]|[0-1]?\d)' \
|
13
|
+
'(?::(?<minutes>[0-5]\d))?' \
|
14
|
+
'(?::(?<seconds>[0-5]\d))?' \
|
15
|
+
'(?:\s*?(?<abbreviation>[apPP]\.?[mM]\.?))?'
|
14
16
|
|
15
|
-
#
|
17
|
+
# Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
|
18
|
+
TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'
|
19
|
+
|
20
|
+
# Regexp for extracting named captures from a datetime-esque String.
|
21
|
+
DATE_TIME_TIMEZONE_REGEXP = /
|
22
|
+
\A
|
23
|
+
(?=.)
|
24
|
+
(?:#{DATE_REGEXP_PATTERN})?
|
25
|
+
(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?
|
26
|
+
\z
|
27
|
+
/x.freeze
|
28
|
+
|
29
|
+
# Parse a string for date and/or time values according to the Microformats
|
30
|
+
# Value Class Pattern date and time parsing specification.
|
31
|
+
#
|
32
|
+
# @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
33
|
+
#
|
34
|
+
# @param string [String, #to_s]
|
16
35
|
def initialize(string)
|
17
|
-
@string = string
|
36
|
+
@string = string.to_s
|
18
37
|
end
|
19
38
|
|
20
|
-
|
39
|
+
# Define getter and predicate methods for all possible named captures
|
40
|
+
# returned by the DATE_TIME_TIMEZONE_REGEXP regular expression.
|
41
|
+
[
|
42
|
+
:year, :ordinal, :month, :day,
|
43
|
+
:hours, :minutes, :seconds,
|
44
|
+
:abbreviation, :zulu, :offset
|
45
|
+
].each do |name|
|
21
46
|
define_method(name) { values[name] }
|
22
47
|
define_method("#{name}?") { public_send(name).present? }
|
23
48
|
end
|
24
49
|
|
50
|
+
# @return [String, nil]
|
25
51
|
def normalized_calendar_date
|
26
52
|
@normalized_calendar_date ||= "#{year}-#{month}-#{day}" if year? && month? && day?
|
27
53
|
end
|
28
54
|
|
55
|
+
# @return [String, nil]
|
29
56
|
def normalized_date
|
30
57
|
@normalized_date ||= normalized_calendar_date || normalized_ordinal_date
|
31
58
|
end
|
32
59
|
|
60
|
+
# @return [String, nil]
|
33
61
|
def normalized_hours
|
34
|
-
@normalized_hours ||=
|
35
|
-
|
36
|
-
|
62
|
+
@normalized_hours ||=
|
63
|
+
if hours?
|
64
|
+
return (hours.to_i + 12).to_s if abbreviation&.tr('.', '')&.downcase == 'pm'
|
37
65
|
|
38
|
-
|
39
|
-
|
66
|
+
format('%<hours>02d', hours: hours)
|
67
|
+
end
|
40
68
|
end
|
41
69
|
|
70
|
+
# @return [String]
|
42
71
|
def normalized_minutes
|
43
72
|
@normalized_minutes ||= minutes || '00'
|
44
73
|
end
|
45
74
|
|
75
|
+
# @return [String, nil]
|
46
76
|
def normalized_ordinal_date
|
47
77
|
@normalized_ordinal_date ||= "#{year}-#{ordinal}" if year? && ordinal?
|
48
78
|
end
|
49
79
|
|
80
|
+
# @return [String, nil]
|
50
81
|
def normalized_time
|
51
82
|
@normalized_time ||= [normalized_hours, normalized_minutes, seconds].compact.join(':') if normalized_hours
|
52
83
|
end
|
53
84
|
|
85
|
+
# @return [String, nil]
|
54
86
|
def normalized_timezone
|
55
87
|
@normalized_timezone ||= zulu || offset&.tr(':', '')
|
56
88
|
end
|
57
89
|
|
58
|
-
# @return [String]
|
90
|
+
# @return [String, nil]
|
59
91
|
def value
|
60
|
-
@value ||=
|
92
|
+
@value ||=
|
93
|
+
if normalized_date || normalized_time || normalized_timezone
|
94
|
+
"#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
|
95
|
+
end
|
61
96
|
end
|
62
97
|
|
63
98
|
# @return [Hash{Symbol => String, nil}]
|
64
99
|
def values
|
65
|
-
@values ||=
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
string&.match(/^(?:#{DATE_REGEXP_PATTERN})?(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?$/)&.named_captures.to_h.symbolize_keys
|
100
|
+
@values ||=
|
101
|
+
if string.match?(DATE_TIME_TIMEZONE_REGEXP)
|
102
|
+
string.match(DATE_TIME_TIMEZONE_REGEXP).named_captures.symbolize_keys
|
103
|
+
else
|
104
|
+
{}
|
105
|
+
end
|
72
106
|
end
|
73
107
|
|
74
108
|
private
|
75
109
|
|
110
|
+
# @return [String]
|
76
111
|
attr_reader :string
|
77
112
|
end
|
78
113
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class DateTimePropertyParser < BasePropertyParser
|
@@ -20,16 +22,15 @@ module MicroMicro
|
|
20
22
|
#
|
21
23
|
# @return [MicroMicro::Parsers::DateTimeParser, nil]
|
22
24
|
def adopted_date_time_parser
|
23
|
-
@adopted_date_time_parser ||=
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
end
|
25
|
+
@adopted_date_time_parser ||=
|
26
|
+
(property.prev_all.reverse + property.next_all).filter_map do |prop|
|
27
|
+
DateTimeParser.new(prop.value) if prop.date_time_property?
|
28
|
+
end.find(&:normalized_date)
|
28
29
|
end
|
29
30
|
|
30
31
|
# @return [String, nil]
|
31
32
|
def attribute_value
|
32
|
-
|
33
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
33
34
|
end
|
34
35
|
|
35
36
|
# @return [MicroMicro::Parsers::DateTimeParser]
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class EmbeddedMarkupPropertyParser < BasePropertyParser
|
@@ -5,12 +7,11 @@ module MicroMicro
|
|
5
7
|
#
|
6
8
|
# @return [Hash{Symbol => String}]
|
7
9
|
def value
|
8
|
-
@value ||=
|
10
|
+
@value ||=
|
9
11
|
{
|
10
12
|
html: node.inner_html.strip,
|
11
13
|
value: super
|
12
14
|
}
|
13
|
-
end
|
14
15
|
end
|
15
16
|
end
|
16
17
|
end
|
@@ -1,9 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedNamePropertyParser <
|
4
|
-
|
5
|
-
'
|
6
|
-
'
|
5
|
+
class ImpliedNamePropertyParser < BaseImpliedPropertyParser
|
6
|
+
HTML_ELEMENTS_MAP = {
|
7
|
+
'img' => 'alt',
|
8
|
+
'area' => 'alt',
|
9
|
+
'abbr' => 'title'
|
7
10
|
}.freeze
|
8
11
|
|
9
12
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
@@ -15,24 +18,19 @@ module MicroMicro
|
|
15
18
|
|
16
19
|
private
|
17
20
|
|
18
|
-
# @return [Nokogiri::XML::NodeSet]
|
19
|
-
def candidate_nodes
|
20
|
-
@candidate_nodes ||= Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
21
|
-
end
|
22
|
-
|
23
21
|
# @return [Array]
|
24
22
|
def child_nodes
|
25
|
-
[
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
def attribute_value
|
30
|
-
candidate_nodes.map { |node| self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP) }.compact.first
|
23
|
+
[
|
24
|
+
node.at_css('> :only-child'),
|
25
|
+
node.at_css('> :only-child > :only-child')
|
26
|
+
].compact.reject { |child_node| Helpers.item_node?(child_node) }
|
31
27
|
end
|
32
28
|
|
33
29
|
# @return [String]
|
34
30
|
def text_content
|
35
|
-
|
31
|
+
Helpers.text_content_from(node) do |context|
|
32
|
+
context.css('img').each { |img| img.content = img['alt'] }
|
33
|
+
end
|
36
34
|
end
|
37
35
|
end
|
38
36
|
end
|
@@ -1,6 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedPhotoPropertyParser <
|
5
|
+
class ImpliedPhotoPropertyParser < BaseImpliedPropertyParser
|
6
|
+
CSS_SELECTORS_ARRAY = ['> img[src]:only-of-type', '> object[data]:only-of-type'].freeze
|
7
|
+
|
4
8
|
HTML_ELEMENTS_MAP = {
|
5
9
|
'img' => 'src',
|
6
10
|
'object' => 'data'
|
@@ -11,54 +15,26 @@ module MicroMicro
|
|
11
15
|
#
|
12
16
|
# @return [String, Hash{Symbol => String}, nil]
|
13
17
|
def value
|
14
|
-
@value ||=
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
18
|
+
@value ||=
|
19
|
+
if attribute_value
|
20
|
+
return attribute_value unless candidate_node.matches?('img[alt]')
|
21
|
+
|
22
|
+
{
|
23
|
+
value: attribute_value,
|
24
|
+
alt: candidate_node['alt'].strip
|
25
|
+
}
|
26
|
+
end
|
23
27
|
end
|
24
28
|
|
25
29
|
private
|
26
30
|
|
27
|
-
# @return [Array
|
28
|
-
def
|
29
|
-
|
30
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
31
|
-
node if node.matches?("#{element}[#{attribute}]")
|
32
|
-
end.compact
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
# @return [String, nil]
|
37
|
-
def resolved_value
|
38
|
-
@resolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
|
39
|
-
end
|
31
|
+
# @return [Array]
|
32
|
+
def child_nodes
|
33
|
+
nodes = [node.at_css(*CSS_SELECTORS_ARRAY)]
|
40
34
|
|
41
|
-
|
42
|
-
def value_node
|
43
|
-
@value_node ||= begin
|
44
|
-
return attribute_values.first if attribute_values.any?
|
45
|
-
|
46
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
47
|
-
child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
|
48
|
-
|
49
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
50
|
-
end
|
51
|
-
|
52
|
-
if node.element_children.one? && !Item.item_node?(node.first_element_child)
|
53
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
54
|
-
child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
|
55
|
-
|
56
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
57
|
-
end
|
58
|
-
end
|
35
|
+
nodes << node.first_element_child.at_css(*CSS_SELECTORS_ARRAY) if node.element_children.one?
|
59
36
|
|
60
|
-
|
61
|
-
end
|
37
|
+
nodes.compact.reject { |child_node| Helpers.item_node?(child_node) }
|
62
38
|
end
|
63
39
|
end
|
64
40
|
end
|
@@ -1,6 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedUrlPropertyParser <
|
5
|
+
class ImpliedUrlPropertyParser < BaseImpliedPropertyParser
|
6
|
+
CSS_SELECTORS_ARRAY = ['> a[href]:only-of-type', '> area[href]:only-of-type'].freeze
|
7
|
+
|
4
8
|
HTML_ELEMENTS_MAP = {
|
5
9
|
'a' => 'href',
|
6
10
|
'area' => 'href'
|
@@ -10,41 +14,18 @@ module MicroMicro
|
|
10
14
|
#
|
11
15
|
# @return [String, nil]
|
12
16
|
def value
|
13
|
-
@value ||=
|
17
|
+
@value ||= attribute_value
|
14
18
|
end
|
15
19
|
|
16
20
|
private
|
17
21
|
|
18
|
-
# @return [Array
|
19
|
-
def
|
20
|
-
|
21
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
22
|
-
node if node.matches?("#{element}[#{attribute}]")
|
23
|
-
end.compact
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
# @return [Nokogiri::XML::Element, nil]
|
28
|
-
def value_node
|
29
|
-
@value_node ||= begin
|
30
|
-
return attribute_values.first if attribute_values.any?
|
31
|
-
|
32
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
33
|
-
child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
|
34
|
-
|
35
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
36
|
-
end
|
37
|
-
|
38
|
-
if node.element_children.one? && !Item.item_node?(node.first_element_child)
|
39
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
40
|
-
child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
|
22
|
+
# @return [Array]
|
23
|
+
def child_nodes
|
24
|
+
nodes = [node.at_css(*CSS_SELECTORS_ARRAY)]
|
41
25
|
|
42
|
-
|
43
|
-
end
|
44
|
-
end
|
26
|
+
nodes << node.first_element_child.at_css(*CSS_SELECTORS_ARRAY) if node.element_children.one?
|
45
27
|
|
46
|
-
|
47
|
-
end
|
28
|
+
nodes.compact.reject { |child_node| Helpers.item_node?(child_node) }
|
48
29
|
end
|
49
30
|
end
|
50
31
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class PlainTextPropertyParser < BasePropertyParser
|
@@ -18,7 +20,7 @@ module MicroMicro
|
|
18
20
|
|
19
21
|
# @return [String, nil]
|
20
22
|
def attribute_value
|
21
|
-
|
23
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
22
24
|
end
|
23
25
|
|
24
26
|
# @return [String, nil]
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class UrlPropertyParser < BasePropertyParser
|
@@ -18,36 +20,42 @@ module MicroMicro
|
|
18
20
|
#
|
19
21
|
# @return [String, Hash{Symbol => String}]
|
20
22
|
def value
|
21
|
-
@value ||=
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
23
|
+
@value ||=
|
24
|
+
if node.matches?('img[alt]')
|
25
|
+
{
|
26
|
+
value: resolved_value,
|
27
|
+
alt: node['alt'].strip
|
28
|
+
}
|
29
|
+
else
|
30
|
+
resolved_value
|
31
|
+
end
|
29
32
|
end
|
30
33
|
|
31
34
|
private
|
32
35
|
|
33
36
|
# @return [String, nil]
|
34
37
|
def attribute_value
|
35
|
-
|
38
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
36
39
|
end
|
37
40
|
|
38
41
|
# @return [String, nil]
|
39
42
|
def extended_attribute_value
|
40
|
-
|
43
|
+
Helpers.attribute_value_from(node, EXTENDED_HTML_ATTRIBUTES_MAP)
|
41
44
|
end
|
42
45
|
|
43
46
|
# @return [String]
|
44
47
|
def resolved_value
|
45
|
-
@resolved_value ||=
|
48
|
+
@resolved_value ||= node.document.resolve_relative_url(unresolved_value.strip)
|
49
|
+
end
|
50
|
+
|
51
|
+
# @return [String]
|
52
|
+
def text_content
|
53
|
+
Helpers.text_content_from(node)
|
46
54
|
end
|
47
55
|
|
48
56
|
# @return [String]
|
49
57
|
def unresolved_value
|
50
|
-
attribute_value || value_class_pattern_value || extended_attribute_value ||
|
58
|
+
attribute_value || value_class_pattern_value || extended_attribute_value || text_content
|
51
59
|
end
|
52
60
|
|
53
61
|
# @return [String, nil]
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class ValueClassPatternParser
|
@@ -10,72 +12,55 @@ module MicroMicro
|
|
10
12
|
'datetime' => %w[del ins time]
|
11
13
|
}.freeze
|
12
14
|
|
13
|
-
# @param context [Nokogiri::XML::Element]
|
14
|
-
# @param separator [String]
|
15
|
-
def initialize(node, separator = '')
|
16
|
-
@node = node
|
17
|
-
@separator = separator
|
18
|
-
end
|
19
|
-
|
20
|
-
# @return [String, nil]
|
21
|
-
def value
|
22
|
-
@value ||= values.join(separator).strip if values.any?
|
23
|
-
end
|
24
|
-
|
25
|
-
# @return [Array<String>]
|
26
|
-
def values
|
27
|
-
@values ||= value_nodes.map { |value_node| self.class.value_from(value_node) }.select(&:present?)
|
28
|
-
end
|
29
|
-
|
30
15
|
# @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
31
16
|
# @param node_set [Nokogiri::XML::NodeSet]
|
32
17
|
# @return [Nokogiri::XML::NodeSet]
|
33
|
-
def self.
|
34
|
-
context.each { |node|
|
18
|
+
def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
19
|
+
context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
35
20
|
|
36
|
-
if context.is_a?(Nokogiri::XML::Element) && !
|
37
|
-
if value_class_node?(context) || value_title_node?(context)
|
21
|
+
if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
|
22
|
+
if Helpers.value_class_node?(context) || Helpers.value_title_node?(context)
|
38
23
|
node_set << context
|
39
24
|
else
|
40
|
-
|
25
|
+
node_set_from(context.element_children, node_set)
|
41
26
|
end
|
42
27
|
end
|
43
28
|
|
44
29
|
node_set
|
45
30
|
end
|
46
31
|
|
47
|
-
# @param node [Nokogiri::XML::Element]
|
48
|
-
# @return [Boolean]
|
49
|
-
def self.value_class_node?(node)
|
50
|
-
node.classes.include?('value')
|
51
|
-
end
|
52
|
-
|
53
32
|
# @param node [Nokogiri::XML::Element]
|
54
33
|
# @return [String, nil]
|
55
34
|
def self.value_from(node)
|
56
|
-
return node['title'] if value_title_node?(node)
|
35
|
+
return node['title'] if Helpers.value_title_node?(node)
|
57
36
|
|
58
|
-
|
59
|
-
|
60
|
-
end
|
37
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP) || node.text
|
38
|
+
end
|
61
39
|
|
62
|
-
|
40
|
+
# @param context [Nokogiri::XML::Element]
|
41
|
+
# @param separator [String]
|
42
|
+
def initialize(node, separator = '')
|
43
|
+
@node = node
|
44
|
+
@separator = separator
|
63
45
|
end
|
64
46
|
|
65
|
-
# @
|
66
|
-
|
67
|
-
|
68
|
-
|
47
|
+
# @return [String, nil]
|
48
|
+
def value
|
49
|
+
@value ||= values.join(separator).strip if values.any?
|
50
|
+
end
|
51
|
+
|
52
|
+
# @return [Array<String>]
|
53
|
+
def values
|
54
|
+
@values ||=
|
55
|
+
self.class
|
56
|
+
.node_set_from(node)
|
57
|
+
.map { |value_node| self.class.value_from(value_node) }
|
58
|
+
.select(&:present?)
|
69
59
|
end
|
70
60
|
|
71
61
|
private
|
72
62
|
|
73
63
|
attr_reader :node, :separator
|
74
|
-
|
75
|
-
# @return [Nokogiri::XML::NodeSet]
|
76
|
-
def value_nodes
|
77
|
-
@value_nodes ||= self.class.nodes_from(node)
|
78
|
-
end
|
79
64
|
end
|
80
65
|
end
|
81
66
|
end
|