micromicro 1.1.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +43 -1
- data/CONTRIBUTING.md +3 -3
- data/README.md +9 -102
- data/lib/micro_micro/collectible.rb +2 -0
- data/lib/micro_micro/collections/base_collection.rb +8 -1
- data/lib/micro_micro/collections/items_collection.rb +84 -1
- data/lib/micro_micro/collections/properties_collection.rb +111 -0
- data/lib/micro_micro/collections/relationships_collection.rb +85 -6
- data/lib/micro_micro/document.rb +21 -103
- data/lib/micro_micro/helpers.rb +94 -0
- data/lib/micro_micro/implied_property.rb +15 -0
- data/lib/micro_micro/item.rb +93 -79
- data/lib/micro_micro/parsers/base_implied_property_parser.rb +29 -0
- data/lib/micro_micro/parsers/base_property_parser.rb +6 -12
- data/lib/micro_micro/parsers/date_time_parser.rb +61 -25
- data/lib/micro_micro/parsers/date_time_property_parser.rb +10 -6
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +4 -2
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +15 -16
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +21 -43
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +12 -30
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +4 -1
- data/lib/micro_micro/parsers/url_property_parser.rb +22 -12
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +29 -42
- data/lib/micro_micro/property.rb +126 -56
- data/lib/micro_micro/relationship.rb +38 -13
- data/lib/micro_micro/version.rb +3 -1
- data/lib/micromicro.rb +32 -26
- data/micromicro.gemspec +11 -6
- metadata +22 -19
data/lib/micro_micro/document.rb
CHANGED
@@ -1,35 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
class Document
|
3
|
-
# A map of HTML `srcset` attributes and their associated element names
|
4
|
-
#
|
5
|
-
# @see https://html.spec.whatwg.org/#srcset-attributes
|
6
|
-
# @see https://html.spec.whatwg.org/#attributes-3
|
7
|
-
HTML_IMAGE_CANDIDATE_STRINGS_ATTRIBUTES_MAP = {
|
8
|
-
'imagesrcset' => %w[link],
|
9
|
-
'srcset' => %w[img source]
|
10
|
-
}.freeze
|
11
|
-
|
12
|
-
# A map of HTML URL attributes and their associated element names
|
13
|
-
#
|
14
|
-
# @see https://html.spec.whatwg.org/#attributes-3
|
15
|
-
HTML_URL_ATTRIBUTES_MAP = {
|
16
|
-
'action' => %w[form],
|
17
|
-
'cite' => %w[blockquote del ins q],
|
18
|
-
'data' => %w[object],
|
19
|
-
'formaction' => %w[button input],
|
20
|
-
'href' => %w[a area base link],
|
21
|
-
'manifest' => %w[html],
|
22
|
-
'ping' => %w[a area],
|
23
|
-
'poster' => %w[video],
|
24
|
-
'src' => %w[audio embed iframe img input script source track video]
|
25
|
-
}.freeze
|
26
|
-
|
27
5
|
# Parse a string of HTML for microformats2-encoded data.
|
28
6
|
#
|
7
|
+
# @example Parse a String of markup
|
29
8
|
# MicroMicro::Document.new('<a href="/" class="h-card" rel="me">Jason Garber</a>', 'https://sixtwothree.org')
|
30
9
|
#
|
31
|
-
#
|
32
|
-
#
|
10
|
+
# @example Parse a String of markup from a URL
|
33
11
|
# url = 'https://tantek.com'
|
34
12
|
# markup = Net::HTTP.get(URI.parse(url))
|
35
13
|
#
|
@@ -38,34 +16,41 @@ module MicroMicro
|
|
38
16
|
# @param markup [String] The HTML to parse for microformats2-encoded data.
|
39
17
|
# @param base_url [String] The URL associated with markup. Used for relative URL resolution.
|
40
18
|
def initialize(markup, base_url)
|
41
|
-
@
|
42
|
-
@base_url = base_url
|
43
|
-
|
44
|
-
resolve_relative_urls
|
19
|
+
@document = Nokogiri::HTML(markup, base_url).resolve_relative_urls!
|
45
20
|
end
|
46
21
|
|
47
22
|
# @return [String]
|
23
|
+
#
|
24
|
+
# :nocov:
|
48
25
|
def inspect
|
49
|
-
|
26
|
+
"#<#{self.class}:#{format('%#0x', object_id)} " \
|
27
|
+
"items: #{items.inspect}, " \
|
28
|
+
"relationships: #{relationships.inspect}>"
|
50
29
|
end
|
30
|
+
# :nocov:
|
51
31
|
|
52
|
-
# A collection of
|
32
|
+
# A collection of {MicroMicro::Item}s parsed from the provided markup.
|
53
33
|
#
|
54
34
|
# @return [MicroMicro::Collections::ItemsCollection]
|
55
35
|
def items
|
56
|
-
@items ||= Collections::ItemsCollection.new(Item.
|
36
|
+
@items ||= Collections::ItemsCollection.new(Item.from_context(document.element_children))
|
57
37
|
end
|
58
38
|
|
59
|
-
# A collection of
|
39
|
+
# A collection of {MicroMicro::Relationship}s parsed from the provided markup.
|
60
40
|
#
|
61
41
|
# @return [MicroMicro::Collections::RelationshipsCollection]
|
62
42
|
def relationships
|
63
|
-
@relationships ||= Collections::RelationshipsCollection.new(Relationship.
|
43
|
+
@relationships ||= Collections::RelationshipsCollection.new(Relationship.from_context(document))
|
64
44
|
end
|
65
45
|
|
66
46
|
# Return the parsed document as a Hash.
|
67
47
|
#
|
68
48
|
# @see https://microformats.org/wiki/microformats2-parsing#parse_a_document_for_microformats
|
49
|
+
# microformats.org: Parse a document for microformats
|
50
|
+
#
|
51
|
+
# @see MicroMicro::Collections::ItemsCollection#to_a
|
52
|
+
# @see MicroMicro::Collections::RelationshipsCollection#group_by_rel
|
53
|
+
# @see MicroMicro::Collections::RelationshipsCollection#group_by_url
|
69
54
|
#
|
70
55
|
# @return [Hash{Symbol => Array, Hash}]
|
71
56
|
def to_h
|
@@ -76,76 +61,9 @@ module MicroMicro
|
|
76
61
|
}
|
77
62
|
end
|
78
63
|
|
79
|
-
# Ignore this node?
|
80
|
-
#
|
81
|
-
# @param node [Nokogiri::XML::Element]
|
82
|
-
# @return [Boolean]
|
83
|
-
def self.ignore_node?(node)
|
84
|
-
ignored_node_names.include?(node.name)
|
85
|
-
end
|
86
|
-
|
87
|
-
# A list of HTML element names the parser should ignore.
|
88
|
-
#
|
89
|
-
# @return [Array<String>]
|
90
|
-
def self.ignored_node_names
|
91
|
-
%w[script style template]
|
92
|
-
end
|
93
|
-
|
94
|
-
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_properties
|
95
|
-
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
96
|
-
#
|
97
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
98
|
-
# @yield [context]
|
99
|
-
# @return [String]
|
100
|
-
def self.text_content_from(context)
|
101
|
-
context.css(*ignored_node_names).unlink
|
102
|
-
|
103
|
-
yield(context) if block_given?
|
104
|
-
|
105
|
-
context.text.strip
|
106
|
-
end
|
107
|
-
|
108
64
|
private
|
109
65
|
|
110
|
-
attr_reader :base_url, :markup
|
111
|
-
|
112
|
-
# @return [Nokogiri::XML::Element, nil]
|
113
|
-
def base_element
|
114
|
-
@base_element ||= Nokogiri::HTML(markup).at('//base[@href]')
|
115
|
-
end
|
116
|
-
|
117
66
|
# @return [Nokogiri::HTML::Document]
|
118
|
-
|
119
|
-
@document ||= Nokogiri::HTML(markup, resolved_base_url)
|
120
|
-
end
|
121
|
-
|
122
|
-
def resolve_relative_urls
|
123
|
-
HTML_URL_ATTRIBUTES_MAP.each do |attribute, names|
|
124
|
-
document.xpath(*names.map { |name| "//#{name}[@#{attribute}]" }).each do |node|
|
125
|
-
node[attribute] = Addressable::URI.join(resolved_base_url, node[attribute].strip).normalize.to_s
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
HTML_IMAGE_CANDIDATE_STRINGS_ATTRIBUTES_MAP.each do |attribute, names|
|
130
|
-
document.xpath(*names.map { |name| "//#{name}[@#{attribute}]" }).each do |node|
|
131
|
-
candidates = node[attribute].split(',').map(&:strip).map { |candidate| candidate.match(/^(?<url>.+?)(?<descriptor>\s+.+)?$/) }
|
132
|
-
|
133
|
-
node[attribute] = candidates.map { |candidate| "#{Addressable::URI.join(resolved_base_url, candidate[:url]).normalize}#{candidate[:descriptor]}" }.join(', ')
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
self
|
138
|
-
end
|
139
|
-
|
140
|
-
# @return [String]
|
141
|
-
def resolved_base_url
|
142
|
-
@resolved_base_url ||= begin
|
143
|
-
if base_element
|
144
|
-
Addressable::URI.join(base_url, base_element['href'].strip).normalize.to_s
|
145
|
-
else
|
146
|
-
base_url
|
147
|
-
end
|
148
|
-
end
|
149
|
-
end
|
67
|
+
attr_reader :document
|
150
68
|
end
|
151
69
|
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module MicroMicro
|
4
|
+
module Helpers
|
5
|
+
IGNORED_NODE_NAMES = %w[script style template].freeze
|
6
|
+
|
7
|
+
private_constant :IGNORED_NODE_NAMES
|
8
|
+
|
9
|
+
# @param node [Nokogiri::XML::Element]
|
10
|
+
# @param attributes_map [Hash{String => Array}]
|
11
|
+
# @return [String, nil]
|
12
|
+
def self.attribute_value_from(node, attributes_map)
|
13
|
+
attributes_map.filter_map do |attribute, names|
|
14
|
+
node[attribute] if names.include?(node.name) && node[attribute]
|
15
|
+
end.first
|
16
|
+
end
|
17
|
+
|
18
|
+
# @param node [Nokogiri::XML::Element]
|
19
|
+
# @return [Boolean]
|
20
|
+
def self.ignore_node?(node)
|
21
|
+
IGNORED_NODE_NAMES.include?(node.name)
|
22
|
+
end
|
23
|
+
|
24
|
+
# @param nodes [Nokogiri::XML::NodeSet]
|
25
|
+
# @return [Boolean]
|
26
|
+
def self.ignore_nodes?(nodes)
|
27
|
+
(nodes.map(&:name) & IGNORED_NODE_NAMES).any?
|
28
|
+
end
|
29
|
+
|
30
|
+
# @param node [Nokogiri::XML::Element]
|
31
|
+
# @return [Boolean]
|
32
|
+
def self.item_node?(node)
|
33
|
+
root_class_names_from(node).any?
|
34
|
+
end
|
35
|
+
|
36
|
+
# @param nodes [Nokogiri::XML::NodeSet]
|
37
|
+
# @return [Boolean]
|
38
|
+
def self.item_nodes?(nodes)
|
39
|
+
nodes.filter_map { |node| item_node?(node) }.any?
|
40
|
+
end
|
41
|
+
|
42
|
+
# @param node [Nokogiri::XML::Element]
|
43
|
+
# @return [Array<String>]
|
44
|
+
def self.property_class_names_from(node)
|
45
|
+
node.classes.grep(/^(?:dt|e|p|u)(?:-[0-9a-z]+)?(?:-[a-z]+)+$/).uniq
|
46
|
+
end
|
47
|
+
|
48
|
+
# @param node [Nokogiri::XML::Element]
|
49
|
+
# @return [Boolean]
|
50
|
+
def self.property_node?(node)
|
51
|
+
property_class_names_from(node).any?
|
52
|
+
end
|
53
|
+
|
54
|
+
# @param node [Nokogiri::XML::Element]
|
55
|
+
# @return [Array<String>]
|
56
|
+
def self.root_class_names_from(node)
|
57
|
+
node.classes.grep(/^h(?:-[0-9a-z]+)?(?:-[a-z]+)+$/).uniq.sort
|
58
|
+
end
|
59
|
+
|
60
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_properties
|
61
|
+
# microformats.org: microformats2 parsing specification § Parse an element for properties
|
62
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
63
|
+
# microformats.org: microformats2 parsing specification § Parsing for implied properties
|
64
|
+
#
|
65
|
+
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
66
|
+
# @yield [context]
|
67
|
+
# @return [String]
|
68
|
+
def self.text_content_from(context)
|
69
|
+
context.css(*IGNORED_NODE_NAMES).unlink
|
70
|
+
|
71
|
+
yield(context) if block_given?
|
72
|
+
|
73
|
+
context.text.strip
|
74
|
+
end
|
75
|
+
|
76
|
+
# @see https://microformats.org/wiki/value-class-pattern#Basic_Parsing
|
77
|
+
# microformats.org: Value Class Pattern § Basic Parsing
|
78
|
+
#
|
79
|
+
# @param node [Nokogiri::XML::Element]
|
80
|
+
# @return [Boolean]
|
81
|
+
def self.value_class_node?(node)
|
82
|
+
node.classes.include?('value')
|
83
|
+
end
|
84
|
+
|
85
|
+
# @see https://microformats.org/wiki/value-class-pattern#Parsing_value_from_a_title_attribute
|
86
|
+
# microformats.org: Value Class Pattern § Parsing value from a title attribute
|
87
|
+
#
|
88
|
+
# @param node [Nokogiri::XML::Element]
|
89
|
+
# @return [Boolean]
|
90
|
+
def self.value_title_node?(node)
|
91
|
+
node.classes.include?('value-title')
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
class ImpliedProperty < Property
|
3
5
|
IMPLIED_PROPERTY_PARSERS_MAP = {
|
@@ -6,11 +8,24 @@ module MicroMicro
|
|
6
8
|
'url' => Parsers::ImpliedUrlPropertyParser
|
7
9
|
}.freeze
|
8
10
|
|
11
|
+
private_constant :IMPLIED_PROPERTY_PARSERS_MAP
|
12
|
+
|
13
|
+
# Always return +true+ when asked if this {MicroMicro::ImpliedProperty} is
|
14
|
+
# an implied property.
|
15
|
+
#
|
16
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
17
|
+
# microformats.org: microformats2 parsing specification § Parsing for implied properties
|
18
|
+
#
|
19
|
+
# @see MicroMicro::Property#implied?
|
20
|
+
#
|
9
21
|
# @return [Boolean]
|
10
22
|
def implied?
|
11
23
|
true
|
12
24
|
end
|
13
25
|
|
26
|
+
# Always return +false+ when asked if this {MicroMicro::ImpliedProperty} is
|
27
|
+
# a {MicroMicro::Item} node.
|
28
|
+
#
|
14
29
|
# @return [Boolean]
|
15
30
|
def item_node?
|
16
31
|
false
|
data/lib/micro_micro/item.rb
CHANGED
@@ -1,10 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
class Item
|
3
5
|
include Collectible
|
4
6
|
|
7
|
+
class ItemNodeSearch
|
8
|
+
attr_reader :node_set
|
9
|
+
|
10
|
+
def initialize(document)
|
11
|
+
@node_set = Nokogiri::XML::NodeSet.new(document, [])
|
12
|
+
end
|
13
|
+
|
14
|
+
# rubocop:disable Metrics
|
15
|
+
def search(context)
|
16
|
+
context.each { |node| search(node) } if context.is_a?(Nokogiri::XML::NodeSet)
|
17
|
+
|
18
|
+
if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
|
19
|
+
if Helpers.item_node?(context)
|
20
|
+
node_set << context unless Helpers.item_nodes?(context.ancestors) && Helpers.property_node?(context)
|
21
|
+
else
|
22
|
+
search(context.element_children)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
node_set
|
27
|
+
end
|
28
|
+
# rubocop:enable Metrics
|
29
|
+
end
|
30
|
+
|
31
|
+
private_constant :ItemNodeSearch
|
32
|
+
|
33
|
+
# Extract {MicroMicro::Item}s from a context.
|
34
|
+
#
|
35
|
+
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
36
|
+
# @return [Array<MicroMicro::Item>]
|
37
|
+
def self.from_context(context)
|
38
|
+
ItemNodeSearch
|
39
|
+
.new(context.document)
|
40
|
+
.search(context)
|
41
|
+
.map { |node| new(node) }
|
42
|
+
end
|
43
|
+
|
5
44
|
# Parse a node for microformats2-encoded data.
|
6
45
|
#
|
7
46
|
# @param node [Nokogiri::XML::Element]
|
47
|
+
# @return [MicroMicro::Item]
|
8
48
|
def initialize(node)
|
9
49
|
@node = node
|
10
50
|
|
@@ -13,44 +53,65 @@ module MicroMicro
|
|
13
53
|
properties << implied_url if implied_url?
|
14
54
|
end
|
15
55
|
|
16
|
-
# A collection of child
|
56
|
+
# A collection of child {MicroMicro::Item}s parsed from the node.
|
17
57
|
#
|
18
58
|
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
|
59
|
+
# microformats.org: microformats2 parsing specification § Parse an element for class microformats
|
19
60
|
#
|
20
61
|
# @return [MicroMicro::Collections::ItemsCollection]
|
21
62
|
def children
|
22
|
-
@children ||= Collections::ItemsCollection.new(
|
63
|
+
@children ||= Collections::ItemsCollection.new(self.class.from_context(node.element_children))
|
23
64
|
end
|
24
65
|
|
25
|
-
#
|
66
|
+
# Does this {MicroMicro::Item} contain any child {MicroMicro::Item}s?
|
67
|
+
#
|
68
|
+
# @return [Boolean]
|
69
|
+
def children?
|
70
|
+
children.any?
|
71
|
+
end
|
72
|
+
|
73
|
+
# The value of the node's +id+ attribute, if present.
|
26
74
|
#
|
27
75
|
# @return [String, nil]
|
28
76
|
def id
|
29
77
|
@id ||= node['id']&.strip
|
30
78
|
end
|
31
79
|
|
32
|
-
#
|
33
|
-
|
34
|
-
|
80
|
+
# Does this {MicroMicro::Item} have an +id+ attribute value?
|
81
|
+
#
|
82
|
+
# @return [Boolean]
|
83
|
+
def id?
|
84
|
+
id.present?
|
35
85
|
end
|
36
86
|
|
37
|
-
#
|
87
|
+
# @return [String]
|
38
88
|
#
|
39
|
-
#
|
40
|
-
def
|
41
|
-
|
89
|
+
# :nocov:
|
90
|
+
def inspect
|
91
|
+
"#<#{self.class}:#{format('%#0x', object_id)} " \
|
92
|
+
"types: #{types.inspect}, " \
|
93
|
+
"properties: #{properties.count}, " \
|
94
|
+
"children: #{children.count}>"
|
42
95
|
end
|
96
|
+
# :nocov:
|
43
97
|
|
44
|
-
# A collection of
|
98
|
+
# A collection of {MicroMicro::Property}s parsed from the node.
|
45
99
|
#
|
46
100
|
# @return [MicroMicro::Collections::PropertiesCollection]
|
47
101
|
def properties
|
48
|
-
@properties ||= Collections::PropertiesCollection.new(Property.
|
102
|
+
@properties ||= Collections::PropertiesCollection.new(Property.from_context(node.element_children))
|
49
103
|
end
|
50
104
|
|
51
|
-
# Return the parsed
|
105
|
+
# Return the parsed {MicroMicro::Item} as a Hash.
|
52
106
|
#
|
53
107
|
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_class_microformats
|
108
|
+
# microformats.org: microformats2 parsing specification § Parse an element for class microformats
|
109
|
+
#
|
110
|
+
# @see MicroMicro::Item#children
|
111
|
+
# @see MicroMicro::Item#id
|
112
|
+
# @see MicroMicro::Item#properties
|
113
|
+
# @see MicroMicro::Item#types
|
114
|
+
# @see MicroMicro::Collections::PropertiesCollection#to_h
|
54
115
|
#
|
55
116
|
# @return [Hash]
|
56
117
|
def to_h
|
@@ -59,81 +120,27 @@ module MicroMicro
|
|
59
120
|
properties: properties.to_h
|
60
121
|
}
|
61
122
|
|
62
|
-
hash[:id] = id if id
|
63
|
-
hash[:children] = children.to_a if children
|
123
|
+
hash[:id] = id if id?
|
124
|
+
hash[:children] = children.to_a if children?
|
64
125
|
|
65
126
|
hash
|
66
127
|
end
|
67
128
|
|
68
|
-
# An
|
129
|
+
# An Array of root class names parsed from the node's +class+ attribute.
|
69
130
|
#
|
70
131
|
# @return [Array<String>]
|
71
132
|
def types
|
72
|
-
@types ||=
|
73
|
-
end
|
74
|
-
|
75
|
-
# A collection of url properties parsed from the node.
|
76
|
-
#
|
77
|
-
# @return [MicroMicro::Collections::PropertiesCollection]
|
78
|
-
def url_properties
|
79
|
-
@url_properties ||= Collections::PropertiesCollection.new(properties.select { |property| property.prefix == 'u' })
|
80
|
-
end
|
81
|
-
|
82
|
-
# Does this node's `class` attribute contain root class names?
|
83
|
-
#
|
84
|
-
# @param node [Nokogiri::XML::Element]
|
85
|
-
# @return [Boolean]
|
86
|
-
def self.item_node?(node)
|
87
|
-
types_from(node).any?
|
88
|
-
end
|
89
|
-
|
90
|
-
# Extract items from a context.
|
91
|
-
#
|
92
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
93
|
-
# @return [Array<MicroMicro::Item>]
|
94
|
-
def self.items_from(context)
|
95
|
-
nodes_from(context).map { |node| new(node) }
|
96
|
-
end
|
97
|
-
|
98
|
-
# Extract item nodes from a context.
|
99
|
-
#
|
100
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
101
|
-
# @param node_set [Nokogiri::XML::NodeSet]
|
102
|
-
# @return [Nokogiri::XML::NodeSet]
|
103
|
-
def self.nodes_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
104
|
-
return nodes_from(context.element_children, node_set) if context.is_a?(Nokogiri::HTML::Document)
|
105
|
-
|
106
|
-
context.each { |node| nodes_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
107
|
-
|
108
|
-
if context.is_a?(Nokogiri::XML::Element) && !Document.ignore_node?(context)
|
109
|
-
if item_node?(context)
|
110
|
-
node_set << context unless Property.property_node?(context)
|
111
|
-
else
|
112
|
-
nodes_from(context.element_children, node_set)
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
node_set
|
117
|
-
end
|
118
|
-
|
119
|
-
# Extract root class names from a node.
|
120
|
-
#
|
121
|
-
# node = Nokogiri::HTML('<div class="h-card">Jason Garber</div>').at_css('div')
|
122
|
-
# MicroMicro::Item.types_from(node) #=> ['h-card']
|
123
|
-
#
|
124
|
-
# @param node [Nokogiri::XML::Element]
|
125
|
-
# @return [Array<String>]
|
126
|
-
def self.types_from(node)
|
127
|
-
node.classes.select { |token| token.match?(/^h(?:-[0-9a-z]+)?(?:-[a-z]+)+$/) }.uniq.sort
|
133
|
+
@types ||= Helpers.root_class_names_from(node)
|
128
134
|
end
|
129
135
|
|
130
136
|
private
|
131
137
|
|
138
|
+
# @return [Nokogiri::XML::Element]
|
132
139
|
attr_reader :node
|
133
140
|
|
134
141
|
# @return [MicroMicro::ImpliedProperty]
|
135
142
|
def implied_name
|
136
|
-
@implied_name ||= ImpliedProperty.new(node,
|
143
|
+
@implied_name ||= ImpliedProperty.new(node, 'p-name')
|
137
144
|
end
|
138
145
|
|
139
146
|
# @return [Boolean]
|
@@ -143,7 +150,7 @@ module MicroMicro
|
|
143
150
|
|
144
151
|
# @return [MicroMicro::ImpliedProperty]
|
145
152
|
def implied_photo
|
146
|
-
@implied_photo ||= ImpliedProperty.new(node,
|
153
|
+
@implied_photo ||= ImpliedProperty.new(node, 'u-photo')
|
147
154
|
end
|
148
155
|
|
149
156
|
# @return [Boolean]
|
@@ -153,7 +160,7 @@ module MicroMicro
|
|
153
160
|
|
154
161
|
# @return [MicroMicro::ImpliedProperty]
|
155
162
|
def implied_url
|
156
|
-
@implied_url ||= ImpliedProperty.new(node,
|
163
|
+
@implied_url ||= ImpliedProperty.new(node, 'u-url')
|
157
164
|
end
|
158
165
|
|
159
166
|
# @return [Boolean]
|
@@ -163,22 +170,29 @@ module MicroMicro
|
|
163
170
|
|
164
171
|
# @return [Boolean]
|
165
172
|
def imply_name?
|
166
|
-
properties.none?
|
173
|
+
properties.names.none?('name') &&
|
174
|
+
properties.none?(&:embedded_markup_property?) &&
|
175
|
+
properties.none?(&:plain_text_property?) &&
|
176
|
+
!nested_items?
|
167
177
|
end
|
168
178
|
|
169
179
|
# @return [Boolean]
|
170
180
|
def imply_photo?
|
171
|
-
properties.none?
|
181
|
+
properties.names.none?('photo') &&
|
182
|
+
properties.reject(&:implied?).none?(&:url_property?) &&
|
183
|
+
!nested_items?
|
172
184
|
end
|
173
185
|
|
174
186
|
# @return [Boolean]
|
175
187
|
def imply_url?
|
176
|
-
properties.none?
|
188
|
+
properties.names.none?('url') &&
|
189
|
+
properties.reject(&:implied?).none?(&:url_property?) &&
|
190
|
+
!nested_items?
|
177
191
|
end
|
178
192
|
|
179
193
|
# @return [Boolean]
|
180
194
|
def nested_items?
|
181
|
-
@nested_items ||= properties.find(&:item_node?) || children
|
195
|
+
@nested_items ||= properties.find(&:item_node?) || children?
|
182
196
|
end
|
183
197
|
end
|
184
198
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module MicroMicro
|
4
|
+
module Parsers
|
5
|
+
class BaseImpliedPropertyParser < BasePropertyParser
|
6
|
+
private
|
7
|
+
|
8
|
+
# @return [String, nil]
|
9
|
+
def attribute_value
|
10
|
+
candidate_node[self.class::HTML_ELEMENTS_MAP[candidate_node.name]] if candidate_node
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Nokogiri::XML::Element, nil]
|
14
|
+
def candidate_node
|
15
|
+
@candidate_node ||=
|
16
|
+
candidate_nodes.find do |node|
|
17
|
+
self.class::HTML_ELEMENTS_MAP.filter_map do |name, attribute|
|
18
|
+
node if name == node.name && node[attribute]
|
19
|
+
end.any?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# @return [Nokogiri::XML::NodeSet]
|
24
|
+
def candidate_nodes
|
25
|
+
Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Parsers
|
3
5
|
class BasePropertyParser
|
@@ -8,24 +10,16 @@ module MicroMicro
|
|
8
10
|
end
|
9
11
|
|
10
12
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
13
|
+
# microformats.org: microformats2 parsing specification § Parsing a +p-+ property
|
11
14
|
# @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
|
15
|
+
# microformats.org: microformats2 parsing specification § Parsing an +e-+ property
|
12
16
|
#
|
13
17
|
# @return [String]
|
14
18
|
def value
|
15
|
-
@value ||=
|
16
|
-
|
19
|
+
@value ||=
|
20
|
+
Helpers.text_content_from(node) do |context|
|
17
21
|
context.css('img').each { |img| img.content = " #{img['alt'] || img['src']} " }
|
18
22
|
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
# @param node [Nokogiri::XML::Element]
|
23
|
-
# @param attributes_map [Hash{String => Array}]
|
24
|
-
# @return [Array]
|
25
|
-
def self.attribute_value_from(node, attributes_map)
|
26
|
-
attributes_map.map do |attribute, names|
|
27
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
28
|
-
end.compact.first
|
29
23
|
end
|
30
24
|
|
31
25
|
private
|