micromicro 0.1.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.simplecov +2 -0
- data/CHANGELOG.md +10 -0
- data/Gemfile +5 -5
- data/README.md +44 -9
- data/lib/micro_micro/collectible.rb +13 -0
- data/lib/micro_micro/collections/base_collection.rb +11 -12
- data/lib/micro_micro/collections/items_collection.rb +5 -0
- data/lib/micro_micro/collections/properties_collection.rb +9 -7
- data/lib/micro_micro/collections/{relations_collection.rb → relationships_collection.rb} +14 -5
- data/lib/micro_micro/document.rb +94 -14
- data/lib/micro_micro/item.rb +40 -7
- data/lib/micro_micro/parsers/base_property_parser.rb +17 -14
- data/lib/micro_micro/parsers/date_time_parser.rb +1 -7
- data/lib/micro_micro/parsers/date_time_property_parser.rb +17 -27
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +4 -15
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +17 -56
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +5 -9
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +3 -13
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +9 -18
- data/lib/micro_micro/parsers/url_property_parser.rb +11 -27
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +1 -12
- data/lib/micro_micro/property.rb +16 -23
- data/lib/micro_micro/{relation.rb → relationship.rb} +6 -5
- data/lib/micro_micro/version.rb +1 -1
- data/lib/micromicro.rb +13 -3
- data/micromicro.gemspec +1 -1
- metadata +11 -10
@@ -1,38 +1,29 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class PlainTextPropertyParser < BasePropertyParser
|
4
|
-
# @see microformats2 Parsing Specification section 1.3.1
|
5
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
6
4
|
HTML_ATTRIBUTES_MAP = {
|
7
5
|
'title' => %w[abbr link],
|
8
6
|
'value' => %w[data input],
|
9
7
|
'alt' => %w[area img]
|
10
8
|
}.freeze
|
11
9
|
|
10
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
|
11
|
+
#
|
12
12
|
# @return [String]
|
13
13
|
def value
|
14
|
-
@value ||=
|
15
|
-
return value_class_pattern_parser.value if value_class_pattern_parser.value?
|
16
|
-
return attribute_values.first if attribute_values.any?
|
17
|
-
|
18
|
-
super
|
19
|
-
end
|
14
|
+
@value ||= value_class_pattern_value || attribute_value || super
|
20
15
|
end
|
21
16
|
|
22
17
|
private
|
23
18
|
|
24
|
-
# @return [
|
25
|
-
def
|
26
|
-
|
27
|
-
HTML_ATTRIBUTES_MAP.map do |attribute, names|
|
28
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
29
|
-
end.compact
|
30
|
-
end
|
19
|
+
# @return [String, nil]
|
20
|
+
def attribute_value
|
21
|
+
self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
31
22
|
end
|
32
23
|
|
33
|
-
# @return [
|
34
|
-
def
|
35
|
-
|
24
|
+
# @return [String, nil]
|
25
|
+
def value_class_pattern_value
|
26
|
+
ValueClassPatternParser.new(node).value
|
36
27
|
end
|
37
28
|
end
|
38
29
|
end
|
@@ -1,8 +1,6 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class UrlPropertyParser < BasePropertyParser
|
4
|
-
# @see microformats2 Parsing Specification section 1.3.2
|
5
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
|
6
4
|
HTML_ATTRIBUTES_MAP = {
|
7
5
|
'href' => %w[a area link],
|
8
6
|
'src' => %w[audio iframe img source video],
|
@@ -10,14 +8,12 @@ module MicroMicro
|
|
10
8
|
'data' => %w[object]
|
11
9
|
}.freeze
|
12
10
|
|
13
|
-
# @see microformats2 Parsing Specification section 1.3.2
|
14
|
-
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
|
15
11
|
EXTENDED_HTML_ATTRIBUTES_MAP = {
|
16
12
|
'title' => %w[abbr],
|
17
13
|
'value' => %w[data input]
|
18
14
|
}.freeze
|
19
15
|
|
20
|
-
# @see microformats2
|
16
|
+
# @see http://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
|
21
17
|
# @see http://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
|
22
18
|
#
|
23
19
|
# @return [String, Hash{Symbol => String}]
|
@@ -34,20 +30,14 @@ module MicroMicro
|
|
34
30
|
|
35
31
|
private
|
36
32
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
41
|
-
end.compact
|
42
|
-
end
|
33
|
+
# @return [String, nil]
|
34
|
+
def attribute_value
|
35
|
+
self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
|
43
36
|
end
|
44
37
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
node[attribute] if names.include?(node.name) && node[attribute]
|
49
|
-
end
|
50
|
-
end.compact
|
38
|
+
# @return [String, nil]
|
39
|
+
def extended_attribute_value
|
40
|
+
self.class.attribute_value_from(node, EXTENDED_HTML_ATTRIBUTES_MAP)
|
51
41
|
end
|
52
42
|
|
53
43
|
# @return [String]
|
@@ -57,18 +47,12 @@ module MicroMicro
|
|
57
47
|
|
58
48
|
# @return [String]
|
59
49
|
def unresolved_value
|
60
|
-
|
61
|
-
return attribute_values.first if attribute_values.any?
|
62
|
-
return value_class_pattern_parser.value if value_class_pattern_parser.value?
|
63
|
-
return extended_attribute_values.first if extended_attribute_values.any?
|
64
|
-
|
65
|
-
serialized_node.text
|
66
|
-
end
|
50
|
+
attribute_value || value_class_pattern_value || extended_attribute_value || Document.text_content_from(node)
|
67
51
|
end
|
68
52
|
|
69
|
-
# @return [
|
70
|
-
def
|
71
|
-
|
53
|
+
# @return [String, nil]
|
54
|
+
def value_class_pattern_value
|
55
|
+
ValueClassPatternParser.new(node).value
|
72
56
|
end
|
73
57
|
end
|
74
58
|
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
module MicroMicro
|
2
2
|
module Parsers
|
3
3
|
class ValueClassPatternParser
|
4
|
-
# @see Value Class Pattern sections 3 and 4
|
5
4
|
# @see http://microformats.org/wiki/value-class-pattern#Basic_Parsing
|
6
5
|
# @see http://microformats.org/wiki/value-class-pattern#Date_and_time_values
|
7
6
|
HTML_ATTRIBUTES_MAP = {
|
@@ -20,12 +19,7 @@ module MicroMicro
|
|
20
19
|
|
21
20
|
# @return [String, nil]
|
22
21
|
def value
|
23
|
-
@value ||= values.join(separator).strip if values?
|
24
|
-
end
|
25
|
-
|
26
|
-
# @return [Boolean]
|
27
|
-
def value?
|
28
|
-
value.present?
|
22
|
+
@value ||= values.join(separator).strip if values.any?
|
29
23
|
end
|
30
24
|
|
31
25
|
# @return [Array<String>]
|
@@ -33,11 +27,6 @@ module MicroMicro
|
|
33
27
|
@values ||= value_nodes.map { |value_node| self.class.value_from(value_node) }.select(&:present?)
|
34
28
|
end
|
35
29
|
|
36
|
-
# @return [Boolean]
|
37
|
-
def values?
|
38
|
-
values.any?
|
39
|
-
end
|
40
|
-
|
41
30
|
# @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
42
31
|
# @param node_set [Nokogiri::XML::NodeSet]
|
43
32
|
# @return [Nokogiri::XML::NodeSet]
|
data/lib/micro_micro/property.rb
CHANGED
@@ -7,7 +7,8 @@ module MicroMicro
|
|
7
7
|
'u' => Parsers::UrlPropertyParser
|
8
8
|
}.freeze
|
9
9
|
|
10
|
-
|
10
|
+
include Collectible
|
11
|
+
|
11
12
|
attr_reader :name, :node, :prefix
|
12
13
|
|
13
14
|
# @param node [Nokogiri::XML::Element]
|
@@ -29,19 +30,29 @@ module MicroMicro
|
|
29
30
|
format(%(#<#{self.class.name}:%#0x name: #{name.inspect}, prefix: #{prefix.inspect}, value: #{value.inspect}>), object_id)
|
30
31
|
end
|
31
32
|
|
33
|
+
# @return [MicroMicro::Item, nil]
|
34
|
+
def item
|
35
|
+
@item ||= Item.new(node) if item_node?
|
36
|
+
end
|
37
|
+
|
32
38
|
# @return [Boolean]
|
33
39
|
def item_node?
|
34
40
|
@item_node ||= Item.item_node?(node)
|
35
41
|
end
|
36
42
|
|
37
|
-
# @return [String, Hash
|
43
|
+
# @return [String, Hash]
|
38
44
|
def value
|
39
45
|
@value ||= begin
|
40
46
|
return parser.value unless item_node?
|
41
47
|
|
42
|
-
|
48
|
+
hash = item.to_h
|
49
|
+
|
50
|
+
return hash.merge(parser.value) if prefix == 'e'
|
43
51
|
|
44
|
-
item
|
52
|
+
p_property = item.properties.find { |property| property.name == 'name' } if prefix == 'p'
|
53
|
+
u_property = item.properties.find { |property| property.name == 'url' } if prefix == 'u'
|
54
|
+
|
55
|
+
hash.merge(value: (p_property || u_property || parser).value)
|
45
56
|
end
|
46
57
|
end
|
47
58
|
|
@@ -86,29 +97,11 @@ module MicroMicro
|
|
86
97
|
# node = Nokogiri::HTML('<a href="https://sixtwothree.org" class="p-name u-url">Jason Garber</a>').at_css('a')
|
87
98
|
# MicroMicro::Property.types_from(node) #=> [['p', 'name'], ['u', 'url']]
|
88
99
|
def self.types_from(node)
|
89
|
-
node.classes.select { |token| token.match?(/^(?:dt|e|p|u)(
|
100
|
+
node.classes.select { |token| token.match?(/^(?:dt|e|p|u)(?:-[0-9a-z]+)?(?:-[a-z]+)+$/) }.map { |token| token.split(/-/, 2) }.uniq
|
90
101
|
end
|
91
102
|
|
92
103
|
private
|
93
104
|
|
94
|
-
# @return [MicroMicro::Item, nil]
|
95
|
-
def item
|
96
|
-
@item ||= Item.new(node) if item_node?
|
97
|
-
end
|
98
|
-
|
99
|
-
# @reutrn [String, nil]
|
100
|
-
def item_value
|
101
|
-
return unless item_node?
|
102
|
-
|
103
|
-
obj_by_prefix = case prefix
|
104
|
-
when 'e' then item
|
105
|
-
when 'p' then item.properties.find { |property| property.name == 'name' }
|
106
|
-
when 'u' then item.properties.find { |property| property.name == 'url' }
|
107
|
-
end
|
108
|
-
|
109
|
-
(obj_by_prefix || parser).value
|
110
|
-
end
|
111
|
-
|
112
105
|
def parser
|
113
106
|
@parser ||= PROPERTY_PARSERS_MAP[prefix].new(self)
|
114
107
|
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
module MicroMicro
|
2
|
-
class
|
2
|
+
class Relationship
|
3
|
+
include Collectible
|
4
|
+
|
3
5
|
# @param node [Nokogiri::XML::Element]
|
4
6
|
def initialize(node)
|
5
7
|
@node = node
|
@@ -7,7 +9,7 @@ module MicroMicro
|
|
7
9
|
|
8
10
|
# @return [String]
|
9
11
|
def href
|
10
|
-
@href ||=
|
12
|
+
@href ||= node['href']
|
11
13
|
end
|
12
14
|
|
13
15
|
# @return [String, nil]
|
@@ -18,7 +20,6 @@ module MicroMicro
|
|
18
20
|
# @return [String]
|
19
21
|
def inspect
|
20
22
|
format(%(#<#{self.class.name}:%#0x href: #{href.inspect}, rels: #{rels.inspect}>), object_id)
|
21
|
-
# format(%(#<#{self.class.name}:%#0x rels: #{rels}>), object_id)
|
22
23
|
end
|
23
24
|
|
24
25
|
# @return [String, nil]
|
@@ -66,8 +67,8 @@ module MicroMicro
|
|
66
67
|
end
|
67
68
|
|
68
69
|
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::Element]
|
69
|
-
# @return [Array<MicroMicro::
|
70
|
-
def self.
|
70
|
+
# @return [Array<MicroMicro::Relationship>]
|
71
|
+
def self.relationships_from(context)
|
71
72
|
nodes_from(context).map { |node| new(node) }
|
72
73
|
end
|
73
74
|
|
data/lib/micro_micro/version.rb
CHANGED
data/lib/micromicro.rb
CHANGED
@@ -1,13 +1,15 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
1
3
|
require 'absolutely'
|
2
4
|
require 'active_support/core_ext/array/grouping'
|
3
5
|
require 'active_support/core_ext/hash/deep_transform_values'
|
4
6
|
require 'active_support/core_ext/hash/keys'
|
5
7
|
require 'active_support/core_ext/hash/slice'
|
6
|
-
require 'active_support/core_ext/module/delegation'
|
7
8
|
require 'active_support/core_ext/object/blank'
|
8
9
|
require 'nokogiri'
|
9
10
|
|
10
11
|
require 'micro_micro/version'
|
12
|
+
require 'micro_micro/collectible'
|
11
13
|
|
12
14
|
require 'micro_micro/parsers/date_time_parser'
|
13
15
|
require 'micro_micro/parsers/value_class_pattern_parser'
|
@@ -25,14 +27,22 @@ require 'micro_micro/document'
|
|
25
27
|
require 'micro_micro/item'
|
26
28
|
require 'micro_micro/property'
|
27
29
|
require 'micro_micro/implied_property'
|
28
|
-
require 'micro_micro/
|
30
|
+
require 'micro_micro/relationship'
|
29
31
|
|
30
32
|
require 'micro_micro/collections/base_collection'
|
31
33
|
require 'micro_micro/collections/items_collection'
|
32
34
|
require 'micro_micro/collections/properties_collection'
|
33
|
-
require 'micro_micro/collections/
|
35
|
+
require 'micro_micro/collections/relationships_collection'
|
34
36
|
|
35
37
|
module MicroMicro
|
38
|
+
# Parse a string of HTML for microformats2-encoded data.
|
39
|
+
# Convenience method for MicroMicro::Document.new.
|
40
|
+
#
|
41
|
+
# MicroMicro.parse('<a href="/" class="h-card" rel="me">Jason Garber</a>', 'https://sixtwothree.org')
|
42
|
+
#
|
43
|
+
# @param markup [String] The HTML to parse for microformats2-encoded data.
|
44
|
+
# @param base_url [String] The URL associated with markup. Used for relative URL resolution.
|
45
|
+
# @return [MicroMicro::Document]
|
36
46
|
def self.parse(markup, base_url)
|
37
47
|
Document.new(markup, base_url)
|
38
48
|
end
|
data/micromicro.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.metadata['bug_tracker_uri'] = "#{spec.homepage}/issues"
|
23
23
|
spec.metadata['changelog_uri'] = "#{spec.homepage}/blob/v#{spec.version}/CHANGELOG.md"
|
24
24
|
|
25
|
-
spec.add_runtime_dependency 'absolutely', '~>
|
25
|
+
spec.add_runtime_dependency 'absolutely', '~> 5.0'
|
26
26
|
spec.add_runtime_dependency 'activesupport', '~> 6.0'
|
27
27
|
spec.add_runtime_dependency 'nokogiri', '~> 1.10'
|
28
28
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: micromicro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jason Garber
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-11-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: absolutely
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '5.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '5.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: activesupport
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -75,10 +75,11 @@ files:
|
|
75
75
|
- LICENSE
|
76
76
|
- README.md
|
77
77
|
- Rakefile
|
78
|
+
- lib/micro_micro/collectible.rb
|
78
79
|
- lib/micro_micro/collections/base_collection.rb
|
79
80
|
- lib/micro_micro/collections/items_collection.rb
|
80
81
|
- lib/micro_micro/collections/properties_collection.rb
|
81
|
-
- lib/micro_micro/collections/
|
82
|
+
- lib/micro_micro/collections/relationships_collection.rb
|
82
83
|
- lib/micro_micro/document.rb
|
83
84
|
- lib/micro_micro/implied_property.rb
|
84
85
|
- lib/micro_micro/item.rb
|
@@ -93,7 +94,7 @@ files:
|
|
93
94
|
- lib/micro_micro/parsers/url_property_parser.rb
|
94
95
|
- lib/micro_micro/parsers/value_class_pattern_parser.rb
|
95
96
|
- lib/micro_micro/property.rb
|
96
|
-
- lib/micro_micro/
|
97
|
+
- lib/micro_micro/relationship.rb
|
97
98
|
- lib/micro_micro/version.rb
|
98
99
|
- lib/micromicro.rb
|
99
100
|
- micromicro.gemspec
|
@@ -102,8 +103,8 @@ licenses:
|
|
102
103
|
- MIT
|
103
104
|
metadata:
|
104
105
|
bug_tracker_uri: https://github.com/jgarber623/micromicro/issues
|
105
|
-
changelog_uri: https://github.com/jgarber623/micromicro/blob/
|
106
|
-
post_install_message:
|
106
|
+
changelog_uri: https://github.com/jgarber623/micromicro/blob/v1.0.0/CHANGELOG.md
|
107
|
+
post_install_message:
|
107
108
|
rdoc_options: []
|
108
109
|
require_paths:
|
109
110
|
- lib
|
@@ -122,7 +123,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
123
|
version: '0'
|
123
124
|
requirements: []
|
124
125
|
rubygems_version: 3.1.2
|
125
|
-
signing_key:
|
126
|
+
signing_key:
|
126
127
|
specification_version: 4
|
127
128
|
summary: Extract microformats2-encoded data from HTML documents.
|
128
129
|
test_files: []
|