micromicro 1.1.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -1
- data/CONTRIBUTING.md +3 -3
- data/README.md +9 -9
- data/lib/micro_micro/collectible.rb +2 -0
- data/lib/micro_micro/collections/base_collection.rb +7 -1
- data/lib/micro_micro/collections/items_collection.rb +3 -1
- data/lib/micro_micro/collections/properties_collection.rb +12 -0
- data/lib/micro_micro/collections/relationships_collection.rb +10 -9
- data/lib/micro_micro/document.rb +10 -98
- data/lib/micro_micro/helpers.rb +82 -0
- data/lib/micro_micro/implied_property.rb +2 -0
- data/lib/micro_micro/item.rb +53 -60
- data/lib/micro_micro/parsers/base_implied_property_parser.rb +29 -0
- data/lib/micro_micro/parsers/base_property_parser.rb +4 -12
- data/lib/micro_micro/parsers/date_time_parser.rb +60 -25
- data/lib/micro_micro/parsers/date_time_property_parser.rb +7 -6
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +3 -2
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +14 -16
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +19 -43
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +11 -30
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +3 -1
- data/lib/micro_micro/parsers/url_property_parser.rb +20 -12
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +27 -42
- data/lib/micro_micro/property.rb +68 -56
- data/lib/micro_micro/relationship.rb +15 -13
- data/lib/micro_micro/version.rb +3 -1
- data/lib/micromicro.rb +31 -26
- data/micromicro.gemspec +11 -6
- metadata +22 -19
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module MicroMicro
|
4
|
+
module Parsers
|
5
|
+
class BaseImpliedPropertyParser < BasePropertyParser
|
6
|
+
private
|
7
|
+
|
8
|
+
# @return [String, nil]
|
9
|
+
def attribute_value
|
10
|
+
candidate_node[self.class::HTML_ELEMENTS_MAP[candidate_node.name]] if candidate_node
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Nokogiri::XML::Element, nil]
|
14
|
+
def candidate_node
|
15
|
+
@candidate_node ||=
|
16
|
+
candidate_nodes.find do |node|
|
17
|
+
self.class::HTML_ELEMENTS_MAP.filter_map do |name, attribute|
|
18
|
+
node if name == node.name && node[attribute]
|
19
|
+
end.any?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# @return [Nokogiri::XML::NodeSet]
|
24
|
+
def candidate_nodes
|
25
|
+
Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class BasePropertyParser
|
@@ -12,20 +14,10 @@ module MicroMicro
|
|
12
14
|
#
|
13
15
|
# @return [String]
|
14
16
|
def value
|
15
|
-
@value ||=
|
16
|
-
|
17
|
+
@value ||=
|
18
|
+
Helpers.text_content_from(node) do |context|
|
17
19
|
context.css('img').each { |img| img.content = " #{img['alt'] || img['src']} " }
|
18
20
|
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
# @param node [Nokogiri::XML::Element]
|
23
|
-
# @param attributes_map [Hash{String => Array}]
|
24
|
-
# @return [Array]
|
25
|
-
def self.attribute_value_from(node, attributes_map)
|
26
|
-
attributes_map.map do |attribute, names|
|
27
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
28
|
-
end.compact.first
|
29
21
|
end
|
30
22
|
|
31
23
|
private
|
@@ -1,78 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class DateTimeParser
|
4
|
-
# @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
5
|
-
#
|
6
6
|
# Regexp pattern matching YYYY-MM-DD and YYY-DDD
|
7
|
-
DATE_REGEXP_PATTERN = '(?<year>\d{4})-
|
8
|
-
|
9
|
-
|
10
|
-
# Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
|
11
|
-
TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'.freeze
|
7
|
+
DATE_REGEXP_PATTERN = '(?<year>\d{4})-' \
|
8
|
+
'((?<ordinal>3[0-6]{2}|[0-2]\d{2})|(?<month>0\d|1[0-2])-' \
|
9
|
+
'(?<day>3[0-1]|[0-2]\d))'
|
12
10
|
|
13
|
-
|
11
|
+
# Regexp pattern matching HH:MM and HH:MM:SS
|
12
|
+
TIME_REGEXP_PATTERN = '(?<hours>2[0-3]|[0-1]?\d)' \
|
13
|
+
'(?::(?<minutes>[0-5]\d))?' \
|
14
|
+
'(?::(?<seconds>[0-5]\d))?' \
|
15
|
+
'(?:\s*?(?<abbreviation>[apPP]\.?[mM]\.?))?'
|
14
16
|
|
15
|
-
#
|
17
|
+
# Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
|
18
|
+
TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'
|
19
|
+
|
20
|
+
# Regexp for extracting named captures from a datetime-esque String.
|
21
|
+
DATE_TIME_TIMEZONE_REGEXP = /
|
22
|
+
\A
|
23
|
+
(?=.)
|
24
|
+
(?:#{DATE_REGEXP_PATTERN})?
|
25
|
+
(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?
|
26
|
+
\z
|
27
|
+
/x.freeze
|
28
|
+
|
29
|
+
# Parse a string for date and/or time values according to the Microformats
|
30
|
+
# Value Class Pattern date and time parsing specification.
|
31
|
+
#
|
32
|
+
# @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
33
|
+
#
|
34
|
+
# @param string [String, #to_s]
|
16
35
|
def initialize(string)
|
17
|
-
@string = string
|
36
|
+
@string = string.to_s
|
18
37
|
end
|
19
38
|
|
20
|
-
|
39
|
+
# Define getter and predicate methods for all possible named captures
|
40
|
+
# returned by the DATE_TIME_TIMEZONE_REGEXP regular expression.
|
41
|
+
[
|
42
|
+
:year, :ordinal, :month, :day,
|
43
|
+
:hours, :minutes, :seconds,
|
44
|
+
:abbreviation, :zulu, :offset
|
45
|
+
].each do |name|
|
21
46
|
define_method(name) { values[name] }
|
22
47
|
define_method("#{name}?") { public_send(name).present? }
|
23
48
|
end
|
24
49
|
|
50
|
+
# @return [String, nil]
|
25
51
|
def normalized_calendar_date
|
26
52
|
@normalized_calendar_date ||= "#{year}-#{month}-#{day}" if year? && month? && day?
|
27
53
|
end
|
28
54
|
|
55
|
+
# @return [String, nil]
|
29
56
|
def normalized_date
|
30
57
|
@normalized_date ||= normalized_calendar_date || normalized_ordinal_date
|
31
58
|
end
|
32
59
|
|
60
|
+
# @return [String, nil]
|
33
61
|
def normalized_hours
|
34
|
-
@normalized_hours ||=
|
35
|
-
|
36
|
-
|
62
|
+
@normalized_hours ||=
|
63
|
+
if hours?
|
64
|
+
return (hours.to_i + 12).to_s if abbreviation&.tr('.', '')&.downcase == 'pm'
|
37
65
|
|
38
|
-
|
39
|
-
|
66
|
+
format('%<hours>02d', hours: hours)
|
67
|
+
end
|
40
68
|
end
|
41
69
|
|
70
|
+
# @return [String]
|
42
71
|
def normalized_minutes
|
43
72
|
@normalized_minutes ||= minutes || '00'
|
44
73
|
end
|
45
74
|
|
75
|
+
# @return [String, nil]
|
46
76
|
def normalized_ordinal_date
|
47
77
|
@normalized_ordinal_date ||= "#{year}-#{ordinal}" if year? && ordinal?
|
48
78
|
end
|
49
79
|
|
80
|
+
# @return [String, nil]
|
50
81
|
def normalized_time
|
51
82
|
@normalized_time ||= [normalized_hours, normalized_minutes, seconds].compact.join(':') if normalized_hours
|
52
83
|
end
|
53
84
|
|
85
|
+
# @return [String, nil]
|
54
86
|
def normalized_timezone
|
55
87
|
@normalized_timezone ||= zulu || offset&.tr(':', '')
|
56
88
|
end
|
57
89
|
|
58
|
-
# @return [String]
|
90
|
+
# @return [String, nil]
|
59
91
|
def value
|
60
|
-
@value ||=
|
92
|
+
@value ||=
|
93
|
+
if normalized_date || normalized_time || normalized_timezone
|
94
|
+
"#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
|
95
|
+
end
|
61
96
|
end
|
62
97
|
|
63
98
|
# @return [Hash{Symbol => String, nil}]
|
64
99
|
def values
|
65
|
-
@values ||=
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
string&.match(/^(?:#{DATE_REGEXP_PATTERN})?(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?$/)&.named_captures.to_h.symbolize_keys
|
100
|
+
@values ||=
|
101
|
+
if string.match?(DATE_TIME_TIMEZONE_REGEXP)
|
102
|
+
string.match(DATE_TIME_TIMEZONE_REGEXP).named_captures.symbolize_keys
|
103
|
+
else
|
104
|
+
{}
|
105
|
+
end
|
72
106
|
end
|
73
107
|
|
74
108
|
private
|
75
109
|
|
110
|
+
# @return [String]
|
76
111
|
attr_reader :string
|
77
112
|
end
|
78
113
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class DateTimePropertyParser < BasePropertyParser
|
@@ -20,16 +22,15 @@ module MicroMicro
|
|
20
22
|
#
|
21
23
|
# @return [MicroMicro::Parsers::DateTimeParser, nil]
|
22
24
|
def adopted_date_time_parser
|
23
|
-
@adopted_date_time_parser ||=
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
end
|
25
|
+
@adopted_date_time_parser ||=
|
26
|
+
(property.prev_all.reverse + property.next_all).filter_map do |prop|
|
27
|
+
DateTimeParser.new(prop.value) if prop.date_time_property?
|
28
|
+
end.find(&:normalized_date)
|
28
29
|
end
|
29
30
|
|
30
31
|
# @return [String, nil]
|
31
32
|
def attribute_value
|
32
|
-
|
33
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
33
34
|
end
|
34
35
|
|
35
36
|
# @return [MicroMicro::Parsers::DateTimeParser]
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class EmbeddedMarkupPropertyParser < BasePropertyParser
|
@@ -5,12 +7,11 @@ module MicroMicro
|
|
5
7
|
#
|
6
8
|
# @return [Hash{Symbol => String}]
|
7
9
|
def value
|
8
|
-
@value ||=
|
10
|
+
@value ||=
|
9
11
|
{
|
10
12
|
html: node.inner_html.strip,
|
11
13
|
value: super
|
12
14
|
}
|
13
|
-
end
|
14
15
|
end
|
15
16
|
end
|
16
17
|
end
|
@@ -1,9 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedNamePropertyParser <
|
4
|
-
|
5
|
-
'
|
6
|
-
'
|
5
|
+
class ImpliedNamePropertyParser < BaseImpliedPropertyParser
|
6
|
+
HTML_ELEMENTS_MAP = {
|
7
|
+
'img' => 'alt',
|
8
|
+
'area' => 'alt',
|
9
|
+
'abbr' => 'title'
|
7
10
|
}.freeze
|
8
11
|
|
9
12
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
@@ -15,24 +18,19 @@ module MicroMicro
|
|
15
18
|
|
16
19
|
private
|
17
20
|
|
18
|
-
# @return [Nokogiri::XML::NodeSet]
|
19
|
-
def candidate_nodes
|
20
|
-
@candidate_nodes ||= Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
21
|
-
end
|
22
|
-
|
23
21
|
# @return [Array]
|
24
22
|
def child_nodes
|
25
|
-
[
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
def attribute_value
|
30
|
-
candidate_nodes.map { |node| self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP) }.compact.first
|
23
|
+
[
|
24
|
+
node.at_css('> :only-child'),
|
25
|
+
node.at_css('> :only-child > :only-child')
|
26
|
+
].compact.reject { |child_node| Helpers.item_node?(child_node) }
|
31
27
|
end
|
32
28
|
|
33
29
|
# @return [String]
|
34
30
|
def text_content
|
35
|
-
|
31
|
+
Helpers.text_content_from(node) do |context|
|
32
|
+
context.css('img').each { |img| img.content = img['alt'] }
|
33
|
+
end
|
36
34
|
end
|
37
35
|
end
|
38
36
|
end
|
@@ -1,6 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedPhotoPropertyParser <
|
5
|
+
class ImpliedPhotoPropertyParser < BaseImpliedPropertyParser
|
6
|
+
CSS_SELECTORS_ARRAY = ['> img[src]:only-of-type', '> object[data]:only-of-type'].freeze
|
7
|
+
|
4
8
|
HTML_ELEMENTS_MAP = {
|
5
9
|
'img' => 'src',
|
6
10
|
'object' => 'data'
|
@@ -11,54 +15,26 @@ module MicroMicro
|
|
11
15
|
#
|
12
16
|
# @return [String, Hash{Symbol => String}, nil]
|
13
17
|
def value
|
14
|
-
@value ||=
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
18
|
+
@value ||=
|
19
|
+
if attribute_value
|
20
|
+
return attribute_value unless candidate_node.matches?('img[alt]')
|
21
|
+
|
22
|
+
{
|
23
|
+
value: attribute_value,
|
24
|
+
alt: candidate_node['alt'].strip
|
25
|
+
}
|
26
|
+
end
|
23
27
|
end
|
24
28
|
|
25
29
|
private
|
26
30
|
|
27
|
-
# @return [Array
|
28
|
-
def
|
29
|
-
|
30
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
31
|
-
node if node.matches?("#{element}[#{attribute}]")
|
32
|
-
end.compact
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
# @return [String, nil]
|
37
|
-
def resolved_value
|
38
|
-
@resolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
|
39
|
-
end
|
31
|
+
# @return [Array]
|
32
|
+
def child_nodes
|
33
|
+
nodes = [node.at_css(*CSS_SELECTORS_ARRAY)]
|
40
34
|
|
41
|
-
|
42
|
-
def value_node
|
43
|
-
@value_node ||= begin
|
44
|
-
return attribute_values.first if attribute_values.any?
|
45
|
-
|
46
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
47
|
-
child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
|
48
|
-
|
49
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
50
|
-
end
|
51
|
-
|
52
|
-
if node.element_children.one? && !Item.item_node?(node.first_element_child)
|
53
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
54
|
-
child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
|
55
|
-
|
56
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
57
|
-
end
|
58
|
-
end
|
35
|
+
nodes << node.first_element_child.at_css(*CSS_SELECTORS_ARRAY) if node.element_children.one?
|
59
36
|
|
60
|
-
|
61
|
-
end
|
37
|
+
nodes.compact.reject { |child_node| Helpers.item_node?(child_node) }
|
62
38
|
end
|
63
39
|
end
|
64
40
|
end
|
@@ -1,6 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
|
-
class ImpliedUrlPropertyParser <
|
5
|
+
class ImpliedUrlPropertyParser < BaseImpliedPropertyParser
|
6
|
+
CSS_SELECTORS_ARRAY = ['> a[href]:only-of-type', '> area[href]:only-of-type'].freeze
|
7
|
+
|
4
8
|
HTML_ELEMENTS_MAP = {
|
5
9
|
'a' => 'href',
|
6
10
|
'area' => 'href'
|
@@ -10,41 +14,18 @@ module MicroMicro
|
|
10
14
|
#
|
11
15
|
# @return [String, nil]
|
12
16
|
def value
|
13
|
-
@value ||=
|
17
|
+
@value ||= attribute_value
|
14
18
|
end
|
15
19
|
|
16
20
|
private
|
17
21
|
|
18
|
-
# @return [Array
|
19
|
-
def
|
20
|
-
|
21
|
-
HTML_ELEMENTS_MAP.map do |element, attribute|
|
22
|
-
node if node.matches?("#{element}[#{attribute}]")
|
23
|
-
end.compact
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
# @return [Nokogiri::XML::Element, nil]
|
28
|
-
def value_node
|
29
|
-
@value_node ||= begin
|
30
|
-
return attribute_values.first if attribute_values.any?
|
31
|
-
|
32
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
33
|
-
child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
|
34
|
-
|
35
|
-
return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
|
36
|
-
end
|
37
|
-
|
38
|
-
if node.element_children.one? && !Item.item_node?(node.first_element_child)
|
39
|
-
HTML_ELEMENTS_MAP.each do |element, attribute|
|
40
|
-
child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
|
22
|
+
# @return [Array]
|
23
|
+
def child_nodes
|
24
|
+
nodes = [node.at_css(*CSS_SELECTORS_ARRAY)]
|
41
25
|
|
42
|
-
|
43
|
-
end
|
44
|
-
end
|
26
|
+
nodes << node.first_element_child.at_css(*CSS_SELECTORS_ARRAY) if node.element_children.one?
|
45
27
|
|
46
|
-
|
47
|
-
end
|
28
|
+
nodes.compact.reject { |child_node| Helpers.item_node?(child_node) }
|
48
29
|
end
|
49
30
|
end
|
50
31
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class PlainTextPropertyParser < BasePropertyParser
|
@@ -18,7 +20,7 @@ module MicroMicro
|
|
18
20
|
|
19
21
|
# @return [String, nil]
|
20
22
|
def attribute_value
|
21
|
-
|
23
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
22
24
|
end
|
23
25
|
|
24
26
|
# @return [String, nil]
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class UrlPropertyParser < BasePropertyParser
|
@@ -18,36 +20,42 @@ module MicroMicro
|
|
18
20
|
#
|
19
21
|
# @return [String, Hash{Symbol => String}]
|
20
22
|
def value
|
21
|
-
@value ||=
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
23
|
+
@value ||=
|
24
|
+
if node.matches?('img[alt]')
|
25
|
+
{
|
26
|
+
value: resolved_value,
|
27
|
+
alt: node['alt'].strip
|
28
|
+
}
|
29
|
+
else
|
30
|
+
resolved_value
|
31
|
+
end
|
29
32
|
end
|
30
33
|
|
31
34
|
private
|
32
35
|
|
33
36
|
# @return [String, nil]
|
34
37
|
def attribute_value
|
35
|
-
|
38
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
36
39
|
end
|
37
40
|
|
38
41
|
# @return [String, nil]
|
39
42
|
def extended_attribute_value
|
40
|
-
|
43
|
+
Helpers.attribute_value_from(node, EXTENDED_HTML_ATTRIBUTES_MAP)
|
41
44
|
end
|
42
45
|
|
43
46
|
# @return [String]
|
44
47
|
def resolved_value
|
45
|
-
@resolved_value ||=
|
48
|
+
@resolved_value ||= node.document.resolve_relative_url(unresolved_value.strip)
|
49
|
+
end
|
50
|
+
|
51
|
+
# @return [String]
|
52
|
+
def text_content
|
53
|
+
Helpers.text_content_from(node)
|
46
54
|
end
|
47
55
|
|
48
56
|
# @return [String]
|
49
57
|
def unresolved_value
|
50
|
-
attribute_value || value_class_pattern_value || extended_attribute_value ||
|
58
|
+
attribute_value || value_class_pattern_value || extended_attribute_value || text_content
|
51
59
|
end
|
52
60
|
|
53
61
|
# @return [String, nil]
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class ValueClassPatternParser
|
@@ -10,72 +12,55 @@ module MicroMicro
|
|
10
12
|
'datetime' => %w[del ins time]
|
11
13
|
}.freeze
|
12
14
|
|
13
|
-
# @param context [Nokogiri::XML::Element]
|
14
|
-
# @param separator [String]
|
15
|
-
def initialize(node, separator = '')
|
16
|
-
@node = node
|
17
|
-
@separator = separator
|
18
|
-
end
|
19
|
-
|
20
|
-
# @return [String, nil]
|
21
|
-
def value
|
22
|
-
@value ||= values.join(separator).strip if values.any?
|
23
|
-
end
|
24
|
-
|
25
|
-
# @return [Array<String>]
|
26
|
-
def values
|
27
|
-
@values ||= value_nodes.map { |value_node| self.class.value_from(value_node) }.select(&:present?)
|
28
|
-
end
|
29
|
-
|
30
15
|
# @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
31
16
|
# @param node_set [Nokogiri::XML::NodeSet]
|
32
17
|
# @return [Nokogiri::XML::NodeSet]
|
33
|
-
def self.
|
34
|
-
context.each { |node|
|
18
|
+
def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
19
|
+
context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
35
20
|
|
36
|
-
if context.is_a?(Nokogiri::XML::Element) && !
|
37
|
-
if value_class_node?(context) || value_title_node?(context)
|
21
|
+
if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
|
22
|
+
if Helpers.value_class_node?(context) || Helpers.value_title_node?(context)
|
38
23
|
node_set << context
|
39
24
|
else
|
40
|
-
|
25
|
+
node_set_from(context.element_children, node_set)
|
41
26
|
end
|
42
27
|
end
|
43
28
|
|
44
29
|
node_set
|
45
30
|
end
|
46
31
|
|
47
|
-
# @param node [Nokogiri::XML::Element]
|
48
|
-
# @return [Boolean]
|
49
|
-
def self.value_class_node?(node)
|
50
|
-
node.classes.include?('value')
|
51
|
-
end
|
52
|
-
|
53
32
|
# @param node [Nokogiri::XML::Element]
|
54
33
|
# @return [String, nil]
|
55
34
|
def self.value_from(node)
|
56
|
-
return node['title'] if value_title_node?(node)
|
35
|
+
return node['title'] if Helpers.value_title_node?(node)
|
57
36
|
|
58
|
-
|
59
|
-
|
60
|
-
end
|
37
|
+
Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP) || node.text
|
38
|
+
end
|
61
39
|
|
62
|
-
|
40
|
+
# @param context [Nokogiri::XML::Element]
|
41
|
+
# @param separator [String]
|
42
|
+
def initialize(node, separator = '')
|
43
|
+
@node = node
|
44
|
+
@separator = separator
|
63
45
|
end
|
64
46
|
|
65
|
-
# @
|
66
|
-
|
67
|
-
|
68
|
-
|
47
|
+
# @return [String, nil]
|
48
|
+
def value
|
49
|
+
@value ||= values.join(separator).strip if values.any?
|
50
|
+
end
|
51
|
+
|
52
|
+
# @return [Array<String>]
|
53
|
+
def values
|
54
|
+
@values ||=
|
55
|
+
self.class
|
56
|
+
.node_set_from(node)
|
57
|
+
.map { |value_node| self.class.value_from(value_node) }
|
58
|
+
.select(&:present?)
|
69
59
|
end
|
70
60
|
|
71
61
|
private
|
72
62
|
|
73
63
|
attr_reader :node, :separator
|
74
|
-
|
75
|
-
# @return [Nokogiri::XML::NodeSet]
|
76
|
-
def value_nodes
|
77
|
-
@value_nodes ||= self.class.nodes_from(node)
|
78
|
-
end
|
79
64
|
end
|
80
65
|
end
|
81
66
|
end
|