micromicro 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.simplecov +2 -0
- data/CHANGELOG.md +10 -0
- data/Gemfile +5 -5
- data/README.md +44 -9
- data/lib/micro_micro/collectible.rb +13 -0
- data/lib/micro_micro/collections/base_collection.rb +11 -12
- data/lib/micro_micro/collections/items_collection.rb +5 -0
- data/lib/micro_micro/collections/properties_collection.rb +9 -7
- data/lib/micro_micro/collections/{relations_collection.rb → relationships_collection.rb} +14 -5
- data/lib/micro_micro/document.rb +94 -14
- data/lib/micro_micro/item.rb +40 -7
- data/lib/micro_micro/parsers/base_property_parser.rb +17 -14
- data/lib/micro_micro/parsers/date_time_parser.rb +1 -7
- data/lib/micro_micro/parsers/date_time_property_parser.rb +17 -27
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +4 -15
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +17 -56
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +5 -9
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +3 -13
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +9 -18
- data/lib/micro_micro/parsers/url_property_parser.rb +11 -27
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +1 -12
- data/lib/micro_micro/property.rb +16 -23
- data/lib/micro_micro/{relation.rb → relationship.rb} +6 -5
- data/lib/micro_micro/version.rb +1 -1
- data/lib/micromicro.rb +13 -3
- data/micromicro.gemspec +1 -1
- metadata +11 -10
@@ -1,38 +1,29 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class PlainTextPropertyParser < BasePropertyParser
|
4
|
-
# @see microformats2 Parsing Specification section 1.3.1
|
5
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
6
4
|
HTML_ATTRIBUTES_MAP = {
|
7
5
|
'title' => %w[abbr link],
|
8
6
|
'value' => %w[data input],
|
9
7
|
'alt' => %w[area img]
|
10
8
|
}.freeze
|
11
9
|
|
10
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
11
|
+
#
|
12
12
|
# @return [String]
|
13
13
|
def value
|
14
|
-
@value ||=
|
15
|
-
return value_class_pattern_parser.value if value_class_pattern_parser.value?
|
16
|
-
return attribute_values.first if attribute_values.any?
|
17
|
-
|
18
|
-
super
|
19
|
-
end
|
14
|
+
@value ||= value_class_pattern_value || attribute_value || super
|
20
15
|
end
|
21
16
|
|
22
17
|
private
|
23
18
|
|
24
|
-
# @return [
|
25
|
-
def
|
26
|
-
|
27
|
-
HTML_ATTRIBUTES_MAP.map do |attribute, names|
|
28
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
29
|
-
end.compact
|
30
|
-
end
|
19
|
+
# @return [String, nil]
|
20
|
+
def attribute_value
|
21
|
+
self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
31
22
|
end
|
32
23
|
|
33
|
-
# @return [
|
34
|
-
def
|
35
|
-
|
24
|
+
# @return [String, nil]
|
25
|
+
def value_class_pattern_value
|
26
|
+
ValueClassPatternParser.new(node).value
|
36
27
|
end
|
37
28
|
end
|
38
29
|
end
|
@@ -1,8 +1,6 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class UrlPropertyParser < BasePropertyParser
|
4
|
-
# @see microformats2 Parsing Specification section 1.3.2
|
5
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
|
6
4
|
HTML_ATTRIBUTES_MAP = {
|
7
5
|
'href' => %w[a area link],
|
8
6
|
'src' => %w[audio iframe img source video],
|
@@ -10,14 +8,12 @@ module MicroMicro
|
|
10
8
|
'data' => %w[object]
|
11
9
|
}.freeze
|
12
10
|
|
13
|
-
# @see microformats2 Parsing Specification section 1.3.2
|
14
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
|
15
11
|
EXTENDED_HTML_ATTRIBUTES_MAP = {
|
16
12
|
'title' => %w[abbr],
|
17
13
|
'value' => %w[data input]
|
18
14
|
}.freeze
|
19
15
|
|
20
|
-
# @see microformats2
|
16
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
|
21
17
|
# @see http://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
|
22
18
|
#
|
23
19
|
# @return [String, Hash{Symbol => String}]
|
@@ -34,20 +30,14 @@ module MicroMicro
|
|
34
30
|
|
35
31
|
private
|
36
32
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
41
|
-
end.compact
|
42
|
-
end
|
33
|
+
# @return [String, nil]
|
34
|
+
def attribute_value
|
35
|
+
self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
43
36
|
end
|
44
37
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
49
|
-
end
|
50
|
-
end.compact
|
38
|
+
# @return [String, nil]
|
39
|
+
def extended_attribute_value
|
40
|
+
self.class.attribute_value_from(node, EXTENDED_HTML_ATTRIBUTES_MAP)
|
51
41
|
end
|
52
42
|
|
53
43
|
# @return [String]
|
@@ -57,18 +47,12 @@ module MicroMicro
|
|
57
47
|
|
58
48
|
# @return [String]
|
59
49
|
def unresolved_value
|
60
|
-
|
61
|
-
return attribute_values.first if attribute_values.any?
|
62
|
-
return value_class_pattern_parser.value if value_class_pattern_parser.value?
|
63
|
-
return extended_attribute_values.first if extended_attribute_values.any?
|
64
|
-
|
65
|
-
serialized_node.text
|
66
|
-
end
|
50
|
+
attribute_value || value_class_pattern_value || extended_attribute_value || Document.text_content_from(node)
|
67
51
|
end
|
68
52
|
|
69
|
-
# @return [
|
70
|
-
def
|
71
|
-
|
53
|
+
# @return [String, nil]
|
54
|
+
def value_class_pattern_value
|
55
|
+
ValueClassPatternParser.new(node).value
|
72
56
|
end
|
73
57
|
end
|
74
58
|
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class ValueClassPatternParser
|
4
|
-
# @see Value Class Pattern sections 3 and 4
|
5
4
|
# @see http://microformats.org/wiki/value-class-pattern#Basic_Parsing
|
6
5
|
# @see http://microformats.org/wiki/value-class-pattern#Date_and_time_values
|
7
6
|
HTML_ATTRIBUTES_MAP = {
|
@@ -20,12 +19,7 @@ module MicroMicro
|
|
20
19
|
|
21
20
|
# @return [String, nil]
|
22
21
|
def value
|
23
|
-
@value ||= values.join(separator).strip if values?
|
24
|
-
end
|
25
|
-
|
26
|
-
# @return [Boolean]
|
27
|
-
def value?
|
28
|
-
value.present?
|
22
|
+
@value ||= values.join(separator).strip if values.any?
|
29
23
|
end
|
30
24
|
|
31
25
|
# @return [Array<String>]
|
@@ -33,11 +27,6 @@ module MicroMicro
|
|
33
27
|
@values ||= value_nodes.map { |value_node| self.class.value_from(value_node) }.select(&:present?)
|
34
28
|
end
|
35
29
|
|
36
|
-
# @return [Boolean]
|
37
|
-
def values?
|
38
|
-
values.any?
|
39
|
-
end
|
40
|
-
|
41
30
|
# @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
42
31
|
# @param node_set [Nokogiri::XML::NodeSet]
|
43
32
|
# @return [Nokogiri::XML::NodeSet]
|
data/lib/micro_micro/property.rb
CHANGED
@@ -7,7 +7,8 @@ module MicroMicro
|
|
7
7
|
'u' => Parsers::UrlPropertyParser
|
8
8
|
}.freeze
|
9
9
|
|
10
|
-
|
10
|
+
include Collectible
|
11
|
+
|
11
12
|
attr_reader :name, :node, :prefix
|
12
13
|
|
13
14
|
# @param node [Nokogiri::XML::Element]
|
@@ -29,19 +30,29 @@ module MicroMicro
|
|
29
30
|
format(%(#<#{self.class.name}:%#0x name: #{name.inspect}, prefix: #{prefix.inspect}, value: #{value.inspect}>), object_id)
|
30
31
|
end
|
31
32
|
|
33
|
+
# @return [MicroMicro::Item, nil]
|
34
|
+
def item
|
35
|
+
@item ||= Item.new(node) if item_node?
|
36
|
+
end
|
37
|
+
|
32
38
|
# @return [Boolean]
|
33
39
|
def item_node?
|
34
40
|
@item_node ||= Item.item_node?(node)
|
35
41
|
end
|
36
42
|
|
37
|
-
# @return [String, Hash
|
43
|
+
# @return [String, Hash]
|
38
44
|
def value
|
39
45
|
@value ||= begin
|
40
46
|
return parser.value unless item_node?
|
41
47
|
|
42
|
-
|
48
|
+
hash = item.to_h
|
49
|
+
|
50
|
+
return hash.merge(parser.value) if prefix == 'e'
|
43
51
|
|
44
|
-
item
|
52
|
+
p_property = item.properties.find { |property| property.name == 'name' } if prefix == 'p'
|
53
|
+
u_property = item.properties.find { |property| property.name == 'url' } if prefix == 'u'
|
54
|
+
|
55
|
+
hash.merge(value: (p_property || u_property || parser).value)
|
45
56
|
end
|
46
57
|
end
|
47
58
|
|
@@ -86,29 +97,11 @@ module MicroMicro
|
|
86
97
|
# node = Nokogiri::HTML('<a href="https://sixtwothree.org" class="p-name u-url">Jason Garber</a>').at_css('a')
|
87
98
|
# MicroMicro::Property.types_from(node) #=> [['p', 'name'], ['u', 'url']]
|
88
99
|
def self.types_from(node)
|
89
|
-
node.classes.select { |token| token.match?(/^(?:dt|e|p|u)(
|
100
|
+
node.classes.select { |token| token.match?(/^(?:dt|e|p|u)(?:-[0-9a-z]+)?(?:-[a-z]+)+$/) }.map { |token| token.split(/-/, 2) }.uniq
|
90
101
|
end
|
91
102
|
|
92
103
|
private
|
93
104
|
|
94
|
-
# @return [MicroMicro::Item, nil]
|
95
|
-
def item
|
96
|
-
@item ||= Item.new(node) if item_node?
|
97
|
-
end
|
98
|
-
|
99
|
-
# @reutrn [String, nil]
|
100
|
-
def item_value
|
101
|
-
return unless item_node?
|
102
|
-
|
103
|
-
obj_by_prefix = case prefix
|
104
|
-
when 'e' then item
|
105
|
-
when 'p' then item.properties.find { |property| property.name == 'name' }
|
106
|
-
when 'u' then item.properties.find { |property| property.name == 'url' }
|
107
|
-
end
|
108
|
-
|
109
|
-
(obj_by_prefix || parser).value
|
110
|
-
end
|
111
|
-
|
112
105
|
def parser
|
113
106
|
@parser ||= PROPERTY_PARSERS_MAP[prefix].new(self)
|
114
107
|
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
module MicroMicro
|
2
|
-
class
|
2
|
+
class Relationship
|
3
|
+
include Collectible
|
4
|
+
|
3
5
|
# @param node [Nokogiri::XML::Element]
|
4
6
|
def initialize(node)
|
5
7
|
@node = node
|
@@ -7,7 +9,7 @@ module MicroMicro
|
|
7
9
|
|
8
10
|
# @return [String]
|
9
11
|
def href
|
10
|
-
@href ||=
|
12
|
+
@href ||= node['href']
|
11
13
|
end
|
12
14
|
|
13
15
|
# @return [String, nil]
|
@@ -18,7 +20,6 @@ module MicroMicro
|
|
18
20
|
# @return [String]
|
19
21
|
def inspect
|
20
22
|
format(%(#<#{self.class.name}:%#0x href: #{href.inspect}, rels: #{rels.inspect}>), object_id)
|
21
|
-
# format(%(#<#{self.class.name}:%#0x rels: #{rels}>), object_id)
|
22
23
|
end
|
23
24
|
|
24
25
|
# @return [String, nil]
|
@@ -66,8 +67,8 @@ module MicroMicro
|
|
66
67
|
end
|
67
68
|
|
68
69
|
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::Element]
|
69
|
-
# @return [Array<MicroMicro::
|
70
|
-
def self.
|
70
|
+
# @return [Array<MicroMicro::Relationship>]
|
71
|
+
def self.relationships_from(context)
|
71
72
|
nodes_from(context).map { |node| new(node) }
|
72
73
|
end
|
73
74
|
|
data/lib/micro_micro/version.rb
CHANGED
data/lib/micromicro.rb
CHANGED
@@ -1,13 +1,15 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
1
3
|
require 'absolutely'
|
2
4
|
require 'active_support/core_ext/array/grouping'
|
3
5
|
require 'active_support/core_ext/hash/deep_transform_values'
|
4
6
|
require 'active_support/core_ext/hash/keys'
|
5
7
|
require 'active_support/core_ext/hash/slice'
|
6
|
-
require 'active_support/core_ext/module/delegation'
|
7
8
|
require 'active_support/core_ext/object/blank'
|
8
9
|
require 'nokogiri'
|
9
10
|
|
10
11
|
require 'micro_micro/version'
|
12
|
+
require 'micro_micro/collectible'
|
11
13
|
|
12
14
|
require 'micro_micro/parsers/date_time_parser'
|
13
15
|
require 'micro_micro/parsers/value_class_pattern_parser'
|
@@ -25,14 +27,22 @@ require 'micro_micro/document'
|
|
25
27
|
require 'micro_micro/item'
|
26
28
|
require 'micro_micro/property'
|
27
29
|
require 'micro_micro/implied_property'
|
28
|
-
require 'micro_micro/
|
30
|
+
require 'micro_micro/relationship'
|
29
31
|
|
30
32
|
require 'micro_micro/collections/base_collection'
|
31
33
|
require 'micro_micro/collections/items_collection'
|
32
34
|
require 'micro_micro/collections/properties_collection'
|
33
|
-
require 'micro_micro/collections/
|
35
|
+
require 'micro_micro/collections/relationships_collection'
|
34
36
|
|
35
37
|
module MicroMicro
|
38
|
+
# Parse a string of HTML for microformats2-encoded data.
|
39
|
+
# Convenience method for MicroMicro::Document.new.
|
40
|
+
#
|
41
|
+
# MicroMicro.parse('<a href="/" class="h-card" rel="me">Jason Garber</a>', 'https://sixtwothree.org')
|
42
|
+
#
|
43
|
+
# @param markup [String] The HTML to parse for microformats2-encoded data.
|
44
|
+
# @param base_url [String] The URL associated with markup. Used for relative URL resolution.
|
45
|
+
# @return [MicroMicro::Document]
|
36
46
|
def self.parse(markup, base_url)
|
37
47
|
Document.new(markup, base_url)
|
38
48
|
end
|
data/micromicro.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.metadata['bug_tracker_uri'] = "#{spec.homepage}/issues"
|
23
23
|
spec.metadata['changelog_uri'] = "#{spec.homepage}/blob/v#{spec.version}/CHANGELOG.md"
|
24
24
|
|
25
|
-
spec.add_runtime_dependency 'absolutely', '~>
|
25
|
+
spec.add_runtime_dependency 'absolutely', '~> 5.0'
|
26
26
|
spec.add_runtime_dependency 'activesupport', '~> 6.0'
|
27
27
|
spec.add_runtime_dependency 'nokogiri', '~> 1.10'
|
28
28
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: micromicro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jason Garber
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-11-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: absolutely
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '5.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '5.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: activesupport
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -75,10 +75,11 @@ files:
|
|
75
75
|
- LICENSE
|
76
76
|
- README.md
|
77
77
|
- Rakefile
|
78
|
+
- lib/micro_micro/collectible.rb
|
78
79
|
- lib/micro_micro/collections/base_collection.rb
|
79
80
|
- lib/micro_micro/collections/items_collection.rb
|
80
81
|
- lib/micro_micro/collections/properties_collection.rb
|
81
|
-
- lib/micro_micro/collections/
|
82
|
+
- lib/micro_micro/collections/relationships_collection.rb
|
82
83
|
- lib/micro_micro/document.rb
|
83
84
|
- lib/micro_micro/implied_property.rb
|
84
85
|
- lib/micro_micro/item.rb
|
@@ -93,7 +94,7 @@ files:
|
|
93
94
|
- lib/micro_micro/parsers/url_property_parser.rb
|
94
95
|
- lib/micro_micro/parsers/value_class_pattern_parser.rb
|
95
96
|
- lib/micro_micro/property.rb
|
96
|
-
- lib/micro_micro/
|
97
|
+
- lib/micro_micro/relationship.rb
|
97
98
|
- lib/micro_micro/version.rb
|
98
99
|
- lib/micromicro.rb
|
99
100
|
- micromicro.gemspec
|
@@ -102,8 +103,8 @@ licenses:
|
|
102
103
|
- MIT
|
103
104
|
metadata:
|
104
105
|
bug_tracker_uri: https://github.com/jgarber623/micromicro/issues
|
105
|
-
changelog_uri: https://github.com/jgarber623/micromicro/blob/
|
106
|
-
post_install_message:
|
106
|
+
changelog_uri: https://github.com/jgarber623/micromicro/blob/v1.0.0/CHANGELOG.md
|
107
|
+
post_install_message:
|
107
108
|
rdoc_options: []
|
108
109
|
require_paths:
|
109
110
|
- lib
|
@@ -122,7 +123,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
123
|
version: '0'
|
123
124
|
requirements: []
|
124
125
|
rubygems_version: 3.1.2
|
125
|
-
signing_key:
|
126
|
+
signing_key:
|
126
127
|
specification_version: 4
|
127
128
|
summary: Extract microformats2-encoded data from HTML documents.
|
128
129
|
test_files: []
|