micromicro 1.1.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +43 -1
- data/CONTRIBUTING.md +3 -3
- data/README.md +9 -102
- data/lib/micro_micro/collectible.rb +2 -0
- data/lib/micro_micro/collections/base_collection.rb +8 -1
- data/lib/micro_micro/collections/items_collection.rb +84 -1
- data/lib/micro_micro/collections/properties_collection.rb +111 -0
- data/lib/micro_micro/collections/relationships_collection.rb +85 -6
- data/lib/micro_micro/document.rb +21 -103
- data/lib/micro_micro/helpers.rb +94 -0
- data/lib/micro_micro/implied_property.rb +15 -0
- data/lib/micro_micro/item.rb +93 -79
- data/lib/micro_micro/parsers/base_implied_property_parser.rb +29 -0
- data/lib/micro_micro/parsers/base_property_parser.rb +6 -12
- data/lib/micro_micro/parsers/date_time_parser.rb +61 -25
- data/lib/micro_micro/parsers/date_time_property_parser.rb +10 -6
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +4 -2
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +15 -16
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +21 -43
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +12 -30
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +4 -1
- data/lib/micro_micro/parsers/url_property_parser.rb +22 -12
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +29 -42
- data/lib/micro_micro/property.rb +126 -56
- data/lib/micro_micro/relationship.rb +38 -13
- data/lib/micro_micro/version.rb +3 -1
- data/lib/micromicro.rb +32 -26
- data/micromicro.gemspec +11 -6
- metadata +22 -19
data/lib/micro_micro/document.rb
CHANGED
@@ -1,35 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
class Document
|
3
|
-
# A map of HTML `srcset` attributes and their associated element names
|
4
|
-
#
|
5
|
-
# @see https://html.spec.whatwg.org/#srcset-attributes
|
6
|
-
# @see https://html.spec.whatwg.org/#attributes-3
|
7
|
-
HTML_IMAGE_CANDIDATE_STRINGS_ATTRIBUTES_MAP = {
|
8
|
-
'imagesrcset' => %w[link],
|
9
|
-
'srcset' => %w[img source]
|
10
|
-
}.freeze
|
11
|
-
|
12
|
-
# A map of HTML URL attributes and their associated element names
|
13
|
-
#
|
14
|
-
# @see https://html.spec.whatwg.org/#attributes-3
|
15
|
-
HTML_URL_ATTRIBUTES_MAP = {
|
16
|
-
'action' => %w[form],
|
17
|
-
'cite' => %w[blockquote del ins q],
|
18
|
-
'data' => %w[object],
|
19
|
-
'formaction' => %w[button input],
|
20
|
-
'href' => %w[a area base link],
|
21
|
-
'manifest' => %w[html],
|
22
|
-
'ping' => %w[a area],
|
23
|
-
'poster' => %w[video],
|
24
|
-
'src' => %w[audio embed iframe img input script source track video]
|
25
|
-
}.freeze
|
26
|
-
|
27
5
|
# Parse a string of HTML for microformats2-encoded data.
|
28
6
|
#
|
7
|
+
# @example Parse a String of markup
|
29
8
|
# MicroMicro::Document.new('<a href="/" class="h-card" rel="me">Jason Garber</a>', 'https://sixtwothree.org')
|
30
9
|
#
|
31
|
-
#
|
32
|
-
#
|
10
|
+
# @example Parse a String of markup from a URL
|
33
11
|
# url = 'https://tantek.com'
|
34
12
|
# markup = Net::HTTP.get(URI.parse(url))
|
35
13
|
#
|
@@ -38,34 +16,41 @@ module MicroMicro
|
|
38
16
|
# @param markup [String] The HTML to parse for microformats2-encoded data.
|
39
17
|
# @param base_url [String] The URL associated with markup. Used for relative URL resolution.
|
40
18
|
def initialize(markup, base_url)
|
41
|
-
@
|
42
|
-
@base_url = base_url
|
43
|
-
|
44
|
-
resolve_relative_urls
|
19
|
+
@document = Nokogiri::HTML(markup, base_url).resolve_relative_urls!
|
45
20
|
end
|
46
21
|
|
47
22
|
# @return [String]
|
23
|
+
#
|
24
|
+
# :nocov:
|
48
25
|
def inspect
|
49
|
-
|
26
|
+
"#<#{self.class}:#{format('%#0x', object_id)} " \
|
27
|
+
"items: #{items.inspect}, " \
|
28
|
+
"relationships: #{relationships.inspect}>"
|
50
29
|
end
|
30
|
+
# :nocov:
|
51
31
|
|
52
|
-
# A collection of
|
32
|
+
# A collection of {MicroMicro::Item}s parsed from the provided markup.
|
53
33
|
#
|
54
34
|
# @return [MicroMicro::Collections::ItemsCollection]
|
55
35
|
def items
|
56
|
-
@items ||= Collections::ItemsCollection.new(Item.
|
36
|
+
@items ||= Collections::ItemsCollection.new(Item.from_context(document.element_children))
|
57
37
|
end
|
58
38
|
|
59
|
-
# A collection of
|
39
|
+
# A collection of {MicroMicro::Relationship}s parsed from the provided markup.
|
60
40
|
#
|
61
41
|
# @return [MicroMicro::Collections::RelationshipsCollection]
|
62
42
|
def relationships
|
63
|
-
@relationships ||= Collections::RelationshipsCollection.new(Relationship.
|
43
|
+
@relationships ||= Collections::RelationshipsCollection.new(Relationship.from_context(document))
|
64
44
|
end
|
65
45
|
|
66
46
|
# Return the parsed document as a Hash.
|
67
47
|
#
|
68
48
|
# @see https://microformats.org/wiki/microformats2-parsing#parse_a_document_for_microformats
|
49
|
+
# microformats.org: Parse a document for microformats
|
50
|
+
#
|
51
|
+
# @see MicroMicro::Collections::ItemsCollection#to_a
|
52
|
+
# @see MicroMicro::Collections::RelationshipsCollection#group_by_rel
|
53
|
+
# @see MicroMicro::Collections::RelationshipsCollection#group_by_url
|
69
54
|
#
|
70
55
|
# @return [Hash{Symbol => Array, Hash}]
|
71
56
|
def to_h
|
@@ -76,76 +61,9 @@ module MicroMicro
|
|
76
61
|
}
|
77
62
|
end
|
78
63
|
|
79
|
-
# Ignore this node?
|
80
|
-
#
|
81
|
-
# @param node [Nokogiri::XML::Element]
|
82
|
-
# @return [Boolean]
|
83
|
-
def self.ignore_node?(node)
|
84
|
-
ignored_node_names.include?(node.name)
|
85
|
-
end
|
86
|
-
|
87
|
-
# A list of HTML element names the parser should ignore.
|
88
|
-
#
|
89
|
-
# @return [Array<String>]
|
90
|
-
def self.ignored_node_names
|
91
|
-
%w[script style template]
|
92
|
-
end
|
93
|
-
|
94
|
-
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_properties
|
95
|
-
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
96
|
-
#
|
97
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
98
|
-
# @yield [context]
|
99
|
-
# @return [String]
|
100
|
-
def self.text_content_from(context)
|
101
|
-
context.css(*ignored_node_names).unlink
|
102
|
-
|
103
|
-
yield(context) if block_given?
|
104
|
-
|
105
|
-
context.text.strip
|
106
|
-
end
|
107
|
-
|
108
64
|
private
|
109
65
|
|
110
|
-
attr_reader :base_url, :markup
|
111
|
-
|
112
|
-
# @return [Nokogiri::XML::Element, nil]
|
113
|
-
def base_element
|
114
|
-
@base_element ||= Nokogiri::HTML(markup).at('//base[@href]')
|
115
|
-
end
|
116
|
-
|
117
66
|
# @return [Nokogiri::HTML::Document]
|
118
|
-
|
119
|
-
@document ||= Nokogiri::HTML(markup, resolved_base_url)
|
120
|
-
end
|
121
|
-
|
122
|
-
def resolve_relative_urls
|
123
|
-
HTML_URL_ATTRIBUTES_MAP.each do |attribute, names|
|
124
|
-
document.xpath(*names.map { |name| "//#{name}[@#{attribute}]" }).each do |node|
|
125
|
-
node[attribute] = Addressable::URI.join(resolved_base_url, node[attribute].strip).normalize.to_s
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
HTML_IMAGE_CANDIDATE_STRINGS_ATTRIBUTES_MAP.each do |attribute, names|
|
130
|
-
document.xpath(*names.map { |name| "//#{name}[@#{attribute}]" }).each do |node|
|
131
|
-
candidates = node[attribute].split(',').map(&:strip).map { |candidate| candidate.match(/^(?<url>.+?)(?<descriptor>\s+.+)?$/) }
|
132
|
-
|
133
|
-
node[attribute] = candidates.map { |candidate| "#{Addressable::URI.join(resolved_base_url, candidate[:url]).normalize}#{candidate[:descriptor]}" }.join(', ')
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
self
|
138
|
-
end
|
139
|
-
|
140
|
-
# @return [String]
|
141
|
-
def resolved_base_url
|
142
|
-
@resolved_base_url ||= begin
|
143
|
-
if base_element
|
144
|
-
Addressable::URI.join(base_url, base_element['href'].strip).normalize.to_s
|
145
|
-
else
|
146
|
-
base_url
|
147
|
-
end
|
148
|
-
end
|
149
|
-
end
|
67
|
+
attr_reader :document
|
150
68
|
end
|
151
69
|
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module MicroMicro
|
4
|
+
module Helpers
|
5
|
+
IGNORED_NODE_NAMES = %w[script style template].freeze
|
6
|
+
|
7
|
+
private_constant :IGNORED_NODE_NAMES
|
8
|
+
|
9
|
+
# @param node [Nokogiri::XML::Element]
|
10
|
+
# @param attributes_map [Hash{String => Array}]
|
11
|
+
# @return [String, nil]
|
12
|
+
def self.attribute_value_from(node, attributes_map)
|
13
|
+
attributes_map.filter_map do |attribute, names|
|
14
|
+
node[attribute] if names.include?(node.name) && node[attribute]
|
15
|
+
end.first
|
16
|
+
end
|
17
|
+
|
18
|
+
# @param node [Nokogiri::XML::Element]
|
19
|
+
# @return [Boolean]
|
20
|
+
def self.ignore_node?(node)
|
21
|
+
IGNORED_NODE_NAMES.include?(node.name)
|
22
|
+
end
|
23
|
+
|
24
|
+
# @param nodes [Nokogiri::XML::NodeSet]
|
25
|
+
# @return [Boolean]
|
26
|
+
def self.ignore_nodes?(nodes)
|
27
|
+
(nodes.map(&:name) & IGNORED_NODE_NAMES).any?
|
28
|
+
end
|
29
|
+
|
30
|
+
# @param node [Nokogiri::XML::Element]
|
31
|
+
# @return [Boolean]
|
32
|
+
def self.item_node?(node)
|
33
|
+
root_class_names_from(node).any?
|
34
|
+
end
|
35
|
+
|
36
|
+
# @param nodes [Nokogiri::XML::NodeSet]
|
37
|
+
# @return [Boolean]
|
38
|
+
def self.item_nodes?(nodes)
|
39
|
+
nodes.filter_map { |node| item_node?(node) }.any?
|
40
|
+
end
|
41
|
+
|
42
|
+
# @param node [Nokogiri::XML::Element]
|
43
|
+
# @return [Array<String>]
|
44
|
+
def self.property_class_names_from(node)
|
45
|
+
node.classes.grep(/^(?:dt|e|p|u)(?:-[0-9a-z]+)?(?:-[a-z]+)+$/).uniq
|
46
|
+
end
|
47
|
+
|
48
|
+
# @param node [Nokogiri::XML::Element]
|
49
|
+
# @return [Boolean]
|
50
|
+
def self.property_node?(node)
|
51
|
+
property_class_names_from(node).any?
|
52
|
+
end
|
53
|
+
|
54
|
+
# @param node [Nokogiri::XML::Element]
|
55
|
+
# @return [Array<String>]
|
56
|
+
def self.root_class_names_from(node)
|
57
|
+
node.classes.grep(/^h(?:-[0-9a-z]+)?(?:-[a-z]+)+$/).uniq.sort
|
58
|
+
end
|
59
|
+
|
60
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_properties
|
61
|
+
# microformats.org: microformats2 parsing specification § Parse an element for properties
|
62
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
63
|
+
# microformats.org: microformats2 parsing specification § Parsing for implied properties
|
64
|
+
#
|
65
|
+
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
66
|
+
# @yield [context]
|
67
|
+
# @return [String]
|
68
|
+
def self.text_content_from(context)
|
69
|
+
context.css(*IGNORED_NODE_NAMES).unlink
|
70
|
+
|
71
|
+
yield(context) if block_given?
|
72
|
+
|
73
|
+
context.text.strip
|
74
|
+
end
|
75
|
+
|
76
|
+
# @see https://microformats.org/wiki/value-class-pattern#Basic_Parsing
|
77
|
+
# microformats.org: Value Class Pattern § Basic Parsing
|
78
|
+
#
|
79
|
+
# @param node [Nokogiri::XML::Element]
|
80
|
+
# @return [Boolean]
|
81
|
+
def self.value_class_node?(node)
|
82
|
+
node.classes.include?('value')
|
83
|
+
end
|
84
|
+
|
85
|
+
# @see https://microformats.org/wiki/value-class-pattern#Parsing_value_from_a_title_attribute
|
86
|
+
# microformats.org: Value Class Pattern § Parsing value from a title attribute
|
87
|
+
#
|
88
|
+
# @param node [Nokogiri::XML::Element]
|
89
|
+
# @return [Boolean]
|
90
|
+
def self.value_title_node?(node)
|
91
|
+
node.classes.include?('value-title')
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
class ImpliedProperty < Property
|
3
5
|
IMPLIED_PROPERTY_PARSERS_MAP = {
|
@@ -6,11 +8,24 @@ module MicroMicro
|
|
6
8
|
'url' => Parsers::ImpliedUrlPropertyParser
|
7
9
|
}.freeze
|
8
10
|
|
11
|
+
private_constant :IMPLIED_PROPERTY_PARSERS_MAP
|
12
|
+
|
13
|
+
# Always return +true+ when asked if this {MicroMicro::ImpliedProperty} is
|
14
|
+
# an implied property.
|
15
|
+
#
|
16
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
17
|
+
# microformats.org: microformats2 parsing specification § Parsing for implied properties
|
18
|
+
#
|
19
|
+
# @see MicroMicro::Property#implied?
|
20
|
+
#
|
9
21
|
# @return [Boolean]
|
10
22
|
def implied?
|
11
23
|
true
|
12
24
|
end
|
13
25
|
|
26
|
+
# Always return +false+ when asked if this {MicroMicro::ImpliedProperty} is
|
27
|
+
# a {MicroMicro::Item} node.
|
28
|
+
#
|
14
29
|
# @return [Boolean]
|
15
30
|
def item_node?
|
16
31
|
false
|
data/lib/micro_micro/item.rb
CHANGED
@@ -1,10 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
class Item
|
3
5
|
include Collectible
|
4
6
|
|
7
|
+
class ItemNodeSearch
|
8
|
+
attr_reader :node_set
|
9
|
+
|
10
|
+
def initialize(document)
|
11
|
+
@node_set = Nokogiri::XML::NodeSet.new(document, [])
|
12
|
+
end
|
13
|
+
|
14
|
+
# rubocop:disable Metrics
|
15
|
+
def search(context)
|
16
|
+
context.each { |node| search(node) } if context.is_a?(Nokogiri::XML::NodeSet)
|
17
|
+
|
18
|
+
if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
|
19
|
+
if Helpers.item_node?(context)
|
20
|
+
node_set << context unless Helpers.item_nodes?(context.ancestors) && Helpers.property_node?(context)
|
21
|
+
else
|
22
|
+
search(context.element_children)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
node_set
|
27
|
+
end
|
28
|
+
# rubocop:enable Metrics
|
29
|
+
end
|
30
|
+
|
31
|
+
private_constant :ItemNodeSearch
|
32
|
+
|
33
|
+
# Extract {MicroMicro::Item}s from a context.
|
34
|
+
#
|
35
|
+
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
36
|
+
# @return [Array<MicroMicro::Item>]
|
37
|
+
def self.from_context(context)
|
38
|
+
ItemNodeSearch
|
39
|
+
.new(context.document)
|
40
|
+
.search(context)
|
41
|
+
.map { |node| new(node) }
|
42
|
+
end
|
43
|
+
|
5
44
|
# Parse a node for microformats2-encoded data.
|
6
45
|
#
|
7
46
|
# @param node [Nokogiri::XML::Element]
|
47
|
+
# @return [MicroMicro::Item]
|
8
48
|
def initialize(node)
|
9
49
|
@node = node
|
10
50
|
|
@@ -13,44 +53,65 @@ module MicroMicro
|
|
13
53
|
properties << implied_url if implied_url?
|
14
54
|
end
|
15
55
|
|
16
|
-
# A collection of child
|
56
|
+
# A collection of child {MicroMicro::Item}s parsed from the node.
|
17
57
|
#
|
18
58
|
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
|
59
|
+
# microformats.org: microformats2 parsing specification § Parse an element for class microformats
|
19
60
|
#
|
20
61
|
# @return [MicroMicro::Collections::ItemsCollection]
|
21
62
|
def children
|
22
|
-
@children ||= Collections::ItemsCollection.new(
|
63
|
+
@children ||= Collections::ItemsCollection.new(self.class.from_context(node.element_children))
|
23
64
|
end
|
24
65
|
|
25
|
-
#
|
66
|
+
# Does this {MicroMicro::Item} contain any child {MicroMicro::Item}s?
|
67
|
+
#
|
68
|
+
# @return [Boolean]
|
69
|
+
def children?
|
70
|
+
children.any?
|
71
|
+
end
|
72
|
+
|
73
|
+
# The value of the node's +id+ attribute, if present.
|
26
74
|
#
|
27
75
|
# @return [String, nil]
|
28
76
|
def id
|
29
77
|
@id ||= node['id']&.strip
|
30
78
|
end
|
31
79
|
|
32
|
-
#
|
33
|
-
|
34
|
-
|
80
|
+
# Does this {MicroMicro::Item} have an +id+ attribute value?
|
81
|
+
#
|
82
|
+
# @return [Boolean]
|
83
|
+
def id?
|
84
|
+
id.present?
|
35
85
|
end
|
36
86
|
|
37
|
-
#
|
87
|
+
# @return [String]
|
38
88
|
#
|
39
|
-
#
|
40
|
-
def
|
41
|
-
|
89
|
+
# :nocov:
|
90
|
+
def inspect
|
91
|
+
"#<#{self.class}:#{format('%#0x', object_id)} " \
|
92
|
+
"types: #{types.inspect}, " \
|
93
|
+
"properties: #{properties.count}, " \
|
94
|
+
"children: #{children.count}>"
|
42
95
|
end
|
96
|
+
# :nocov:
|
43
97
|
|
44
|
-
# A collection of
|
98
|
+
# A collection of {MicroMicro::Property}s parsed from the node.
|
45
99
|
#
|
46
100
|
# @return [MicroMicro::Collections::PropertiesCollection]
|
47
101
|
def properties
|
48
|
-
@properties ||= Collections::PropertiesCollection.new(Property.
|
102
|
+
@properties ||= Collections::PropertiesCollection.new(Property.from_context(node.element_children))
|
49
103
|
end
|
50
104
|
|
51
|
-
# Return the parsed
|
105
|
+
# Return the parsed {MicroMicro::Item} as a Hash.
|
52
106
|
#
|
53
107
|
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
|
108
|
+
# microformats.org: microformats2 parsing specification § Parse an element for class microformats
|
109
|
+
#
|
110
|
+
# @see MicroMicro::Item#children
|
111
|
+
# @see MicroMicro::Item#id
|
112
|
+
# @see MicroMicro::Item#properties
|
113
|
+
# @see MicroMicro::Item#types
|
114
|
+
# @see MicroMicro::Collections::PropertiesCollection#to_h
|
54
115
|
#
|
55
116
|
# @return [Hash]
|
56
117
|
def to_h
|
@@ -59,81 +120,27 @@ module MicroMicro
|
|
59
120
|
properties: properties.to_h
|
60
121
|
}
|
61
122
|
|
62
|
-
hash[:id] = id if id
|
63
|
-
hash[:children] = children.to_a if children
|
123
|
+
hash[:id] = id if id?
|
124
|
+
hash[:children] = children.to_a if children?
|
64
125
|
|
65
126
|
hash
|
66
127
|
end
|
67
128
|
|
68
|
-
# An
|
129
|
+
# An Array of root class names parsed from the node's +class+ attribute.
|
69
130
|
#
|
70
131
|
# @return [Array<String>]
|
71
132
|
def types
|
72
|
-
@types ||=
|
73
|
-
end
|
74
|
-
|
75
|
-
# A collection of url properties parsed from the node.
|
76
|
-
#
|
77
|
-
# @return [MicroMicro::Collections::PropertiesCollection]
|
78
|
-
def url_properties
|
79
|
-
@url_properties ||= Collections::PropertiesCollection.new(properties.select { |property| property.prefix == 'u' })
|
80
|
-
end
|
81
|
-
|
82
|
-
# Does this node's `class` attribute contain root class names?
|
83
|
-
#
|
84
|
-
# @param node [Nokogiri::XML::Element]
|
85
|
-
# @return [Boolean]
|
86
|
-
def self.item_node?(node)
|
87
|
-
types_from(node).any?
|
88
|
-
end
|
89
|
-
|
90
|
-
# Extract items from a context.
|
91
|
-
#
|
92
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
93
|
-
# @return [Array<MicroMicro::Item>]
|
94
|
-
def self.items_from(context)
|
95
|
-
nodes_from(context).map { |node| new(node) }
|
96
|
-
end
|
97
|
-
|
98
|
-
# Extract item nodes from a context.
|
99
|
-
#
|
100
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
101
|
-
# @param node_set [Nokogiri::XML::NodeSet]
|
102
|
-
# @return [Nokogiri::XML::NodeSet]
|
103
|
-
def self.nodes_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
104
|
-
return nodes_from(context.element_children, node_set) if context.is_a?(Nokogiri::HTML::Document)
|
105
|
-
|
106
|
-
context.each { |node| nodes_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
107
|
-
|
108
|
-
if context.is_a?(Nokogiri::XML::Element) && !Document.ignore_node?(context)
|
109
|
-
if item_node?(context)
|
110
|
-
node_set << context unless Property.property_node?(context)
|
111
|
-
else
|
112
|
-
nodes_from(context.element_children, node_set)
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
node_set
|
117
|
-
end
|
118
|
-
|
119
|
-
# Extract root class names from a node.
|
120
|
-
#
|
121
|
-
# node = Nokogiri::HTML('<div class="h-card">Jason Garber</div>').at_css('div')
|
122
|
-
# MicroMicro::Item.types_from(node) #=> ['h-card']
|
123
|
-
#
|
124
|
-
# @param node [Nokogiri::XML::Element]
|
125
|
-
# @return [Array<String>]
|
126
|
-
def self.types_from(node)
|
127
|
-
node.classes.select { |token| token.match?(/^h(?:-[0-9a-z]+)?(?:-[a-z]+)+$/) }.uniq.sort
|
133
|
+
@types ||= Helpers.root_class_names_from(node)
|
128
134
|
end
|
129
135
|
|
130
136
|
private
|
131
137
|
|
138
|
+
# @return [Nokogiri::XML::Element]
|
132
139
|
attr_reader :node
|
133
140
|
|
134
141
|
# @return [MicroMicro::ImpliedProperty]
|
135
142
|
def implied_name
|
136
|
-
@implied_name ||= ImpliedProperty.new(node,
|
143
|
+
@implied_name ||= ImpliedProperty.new(node, 'p-name')
|
137
144
|
end
|
138
145
|
|
139
146
|
# @return [Boolean]
|
@@ -143,7 +150,7 @@ module MicroMicro
|
|
143
150
|
|
144
151
|
# @return [MicroMicro::ImpliedProperty]
|
145
152
|
def implied_photo
|
146
|
-
@implied_photo ||= ImpliedProperty.new(node,
|
153
|
+
@implied_photo ||= ImpliedProperty.new(node, 'u-photo')
|
147
154
|
end
|
148
155
|
|
149
156
|
# @return [Boolean]
|
@@ -153,7 +160,7 @@ module MicroMicro
|
|
153
160
|
|
154
161
|
# @return [MicroMicro::ImpliedProperty]
|
155
162
|
def implied_url
|
156
|
-
@implied_url ||= ImpliedProperty.new(node,
|
163
|
+
@implied_url ||= ImpliedProperty.new(node, 'u-url')
|
157
164
|
end
|
158
165
|
|
159
166
|
# @return [Boolean]
|
@@ -163,22 +170,29 @@ module MicroMicro
|
|
163
170
|
|
164
171
|
# @return [Boolean]
|
165
172
|
def imply_name?
|
166
|
-
properties.none?
|
173
|
+
properties.names.none?('name') &&
|
174
|
+
properties.none?(&:embedded_markup_property?) &&
|
175
|
+
properties.none?(&:plain_text_property?) &&
|
176
|
+
!nested_items?
|
167
177
|
end
|
168
178
|
|
169
179
|
# @return [Boolean]
|
170
180
|
def imply_photo?
|
171
|
-
properties.none?
|
181
|
+
properties.names.none?('photo') &&
|
182
|
+
properties.reject(&:implied?).none?(&:url_property?) &&
|
183
|
+
!nested_items?
|
172
184
|
end
|
173
185
|
|
174
186
|
# @return [Boolean]
|
175
187
|
def imply_url?
|
176
|
-
properties.none?
|
188
|
+
properties.names.none?('url') &&
|
189
|
+
properties.reject(&:implied?).none?(&:url_property?) &&
|
190
|
+
!nested_items?
|
177
191
|
end
|
178
192
|
|
179
193
|
# @return [Boolean]
|
180
194
|
def nested_items?
|
181
|
-
@nested_items ||= properties.find(&:item_node?) || children
|
195
|
+
@nested_items ||= properties.find(&:item_node?) || children?
|
182
196
|
end
|
183
197
|
end
|
184
198
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module MicroMicro
|
4
|
+
module Parsers
|
5
|
+
class BaseImpliedPropertyParser < BasePropertyParser
|
6
|
+
private
|
7
|
+
|
8
|
+
# @return [String, nil]
|
9
|
+
def attribute_value
|
10
|
+
candidate_node[self.class::HTML_ELEMENTS_MAP[candidate_node.name]] if candidate_node
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Nokogiri::XML::Element, nil]
|
14
|
+
def candidate_node
|
15
|
+
@candidate_node ||=
|
16
|
+
candidate_nodes.find do |node|
|
17
|
+
self.class::HTML_ELEMENTS_MAP.filter_map do |name, attribute|
|
18
|
+
node if name == node.name && node[attribute]
|
19
|
+
end.any?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# @return [Nokogiri::XML::NodeSet]
|
24
|
+
def candidate_nodes
|
25
|
+
Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class BasePropertyParser
|
@@ -8,24 +10,16 @@ module MicroMicro
|
|
8
10
|
end
|
9
11
|
|
10
12
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
13
|
+
# microformats.org: microformats2 parsing specification § Parsing a +p-+ property
|
11
14
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
15
|
+
# microformats.org: microformats2 parsing specification § Parsing an +e-+ property
|
12
16
|
#
|
13
17
|
# @return [String]
|
14
18
|
def value
|
15
|
-
@value ||=
|
16
|
-
|
19
|
+
@value ||=
|
20
|
+
Helpers.text_content_from(node) do |context|
|
17
21
|
context.css('img').each { |img| img.content = " #{img['alt'] || img['src']} " }
|
18
22
|
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
# @param node [Nokogiri::XML::Element]
|
23
|
-
# @param attributes_map [Hash{String => Array}]
|
24
|
-
# @return [Array]
|
25
|
-
def self.attribute_value_from(node, attributes_map)
|
26
|
-
attributes_map.map do |attribute, names|
|
27
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
28
|
-
end.compact.first
|
29
23
|
end
|
30
24
|
|
31
25
|
private
|