micromicro 1.1.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -1
- data/CONTRIBUTING.md +3 -3
- data/README.md +9 -9
- data/lib/micro_micro/collectible.rb +2 -0
- data/lib/micro_micro/collections/base_collection.rb +7 -1
- data/lib/micro_micro/collections/items_collection.rb +3 -1
- data/lib/micro_micro/collections/properties_collection.rb +12 -0
- data/lib/micro_micro/collections/relationships_collection.rb +10 -9
- data/lib/micro_micro/document.rb +10 -98
- data/lib/micro_micro/helpers.rb +82 -0
- data/lib/micro_micro/implied_property.rb +2 -0
- data/lib/micro_micro/item.rb +53 -60
- data/lib/micro_micro/parsers/base_implied_property_parser.rb +29 -0
- data/lib/micro_micro/parsers/base_property_parser.rb +4 -12
- data/lib/micro_micro/parsers/date_time_parser.rb +60 -25
- data/lib/micro_micro/parsers/date_time_property_parser.rb +7 -6
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +3 -2
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +14 -16
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +19 -43
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +11 -30
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +3 -1
- data/lib/micro_micro/parsers/url_property_parser.rb +20 -12
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +27 -42
- data/lib/micro_micro/property.rb +68 -56
- data/lib/micro_micro/relationship.rb +15 -13
- data/lib/micro_micro/version.rb +3 -1
- data/lib/micromicro.rb +31 -26
- data/micromicro.gemspec +11 -6
- metadata +22 -19
data/lib/micro_micro/property.rb
CHANGED
@@ -1,5 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
class Property
|
5
|
+
include Collectible
|
6
|
+
|
3
7
|
PROPERTY_PARSERS_MAP = {
|
4
8
|
'dt' => Parsers::DateTimePropertyParser,
|
5
9
|
'e' => Parsers::EmbeddedMarkupPropertyParser,
|
@@ -7,17 +11,46 @@ module MicroMicro
|
|
7
11
|
'u' => Parsers::UrlPropertyParser
|
8
12
|
}.freeze
|
9
13
|
|
10
|
-
include Collectible
|
11
|
-
|
12
14
|
attr_reader :name, :node, :prefix
|
13
15
|
|
16
|
+
# @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
17
|
+
# @return [Array<MicroMicro::Property>]
|
18
|
+
def self.from_context(context)
|
19
|
+
node_set_from(context).flat_map do |node|
|
20
|
+
Helpers.property_class_names_from(node).map { |token| new(node, token) }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
25
|
+
# @param node_set [Nokogiri::XML::NodeSet]
|
26
|
+
# @return [Nokogiri::XML::NodeSet]
|
27
|
+
def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
28
|
+
context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
29
|
+
|
30
|
+
if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
|
31
|
+
node_set << context if Helpers.property_node?(context)
|
32
|
+
|
33
|
+
node_set_from(context.element_children, node_set) unless Helpers.item_node?(context)
|
34
|
+
end
|
35
|
+
|
36
|
+
node_set
|
37
|
+
end
|
38
|
+
|
14
39
|
# @param node [Nokogiri::XML::Element]
|
15
|
-
# @param
|
16
|
-
|
17
|
-
def initialize(node, name:, prefix:)
|
40
|
+
# @param token [String]
|
41
|
+
def initialize(node, token)
|
18
42
|
@node = node
|
19
|
-
@name =
|
20
|
-
|
43
|
+
@prefix, @name = token.split(/-/, 2)
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Boolean]
|
47
|
+
def date_time_property?
|
48
|
+
prefix == 'dt'
|
49
|
+
end
|
50
|
+
|
51
|
+
# @return [Boolean]
|
52
|
+
def embedded_markup_property?
|
53
|
+
prefix == 'e'
|
21
54
|
end
|
22
55
|
|
23
56
|
# @return [Boolean]
|
@@ -25,10 +58,15 @@ module MicroMicro
|
|
25
58
|
false
|
26
59
|
end
|
27
60
|
|
61
|
+
# :nocov:
|
28
62
|
# @return [String]
|
29
63
|
def inspect
|
30
|
-
|
64
|
+
"#<#{self.class}:#{format('%#0x', object_id)} " \
|
65
|
+
"name: #{name.inspect}, " \
|
66
|
+
"prefix: #{prefix.inspect}, " \
|
67
|
+
"value: #{value.inspect}>"
|
31
68
|
end
|
69
|
+
# :nocov:
|
32
70
|
|
33
71
|
# @return [MicroMicro::Item, nil]
|
34
72
|
def item
|
@@ -37,67 +75,41 @@ module MicroMicro
|
|
37
75
|
|
38
76
|
# @return [Boolean]
|
39
77
|
def item_node?
|
40
|
-
@item_node ||=
|
78
|
+
@item_node ||= Helpers.item_node?(node)
|
41
79
|
end
|
42
80
|
|
43
|
-
# @return [
|
44
|
-
def
|
45
|
-
|
46
|
-
return parser.value unless item_node?
|
47
|
-
|
48
|
-
hash = item.to_h
|
49
|
-
|
50
|
-
return hash.merge(parser.value) if prefix == 'e'
|
51
|
-
|
52
|
-
p_property = item.properties.find { |property| property.name == 'name' } if prefix == 'p'
|
53
|
-
u_property = item.properties.find { |property| property.name == 'url' } if prefix == 'u'
|
54
|
-
|
55
|
-
hash.merge(value: (p_property || u_property || parser).value)
|
56
|
-
end
|
81
|
+
# @return [Boolean]
|
82
|
+
def plain_text_property?
|
83
|
+
prefix == 'p'
|
57
84
|
end
|
58
85
|
|
59
86
|
# @return [Boolean]
|
60
|
-
def
|
61
|
-
|
87
|
+
def url_property?
|
88
|
+
prefix == 'u'
|
62
89
|
end
|
63
90
|
|
64
|
-
# @
|
65
|
-
#
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
if context.is_a?(Nokogiri::XML::Element) && !Document.ignore_node?(context)
|
71
|
-
node_set << context if property_node?(context)
|
91
|
+
# @return [String, Hash]
|
92
|
+
# rubocop:disable Metrics
|
93
|
+
def value
|
94
|
+
@value ||=
|
95
|
+
if item_node?
|
96
|
+
hash = item.to_h
|
72
97
|
|
73
|
-
|
74
|
-
end
|
98
|
+
return hash.merge(parser.value) if embedded_markup_property?
|
75
99
|
|
76
|
-
|
77
|
-
|
100
|
+
p_property = item.properties.find { |property| property.name == 'name' } if plain_text_property?
|
101
|
+
u_property = item.properties.find { |property| property.name == 'url' } if url_property?
|
78
102
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
types_from(node).map { |prefix, name| new(node, name: name, prefix: prefix) }
|
84
|
-
end.flatten
|
103
|
+
hash.merge(value: (p_property || u_property || parser).value)
|
104
|
+
else
|
105
|
+
parser.value
|
106
|
+
end
|
85
107
|
end
|
108
|
+
# rubocop:enable Metrics
|
86
109
|
|
87
|
-
# @param node [Nokogiri::XML::Element]
|
88
110
|
# @return [Boolean]
|
89
|
-
def
|
90
|
-
|
91
|
-
end
|
92
|
-
|
93
|
-
# @param node [Nokogiri::XML::Element]
|
94
|
-
# @return [Array<Array(String, String)>]
|
95
|
-
#
|
96
|
-
# @example
|
97
|
-
# node = Nokogiri::HTML('<a href="https://sixtwothree.org" class="p-name u-url">Jason Garber</a>').at_css('a')
|
98
|
-
# MicroMicro::Property.types_from(node) #=> [['p', 'name'], ['u', 'url']]
|
99
|
-
def self.types_from(node)
|
100
|
-
node.classes.select { |token| token.match?(/^(?:dt|e|p|u)(?:-[0-9a-z]+)?(?:-[a-z]+)+$/) }.map { |token| token.split(/-/, 2) }.uniq
|
111
|
+
def value?
|
112
|
+
value.present?
|
101
113
|
end
|
102
114
|
|
103
115
|
private
|
@@ -1,7 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
class Relationship
|
3
5
|
include Collectible
|
4
6
|
|
7
|
+
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::Element]
|
8
|
+
# @return [Array<MicroMicro::Relationship>]
|
9
|
+
def self.relationships_from(context)
|
10
|
+
context.css('[href][rel]:not([rel=""])')
|
11
|
+
.reject { |node| Helpers.ignore_nodes?(node.ancestors) }
|
12
|
+
.map { |node| new(node) }
|
13
|
+
end
|
14
|
+
|
5
15
|
# @param node [Nokogiri::XML::Element]
|
6
16
|
def initialize(node)
|
7
17
|
@node = node
|
@@ -17,10 +27,14 @@ module MicroMicro
|
|
17
27
|
@hreflang ||= node['hreflang']&.strip
|
18
28
|
end
|
19
29
|
|
30
|
+
# :nocov:
|
20
31
|
# @return [String]
|
21
32
|
def inspect
|
22
|
-
|
33
|
+
"#<#{self.class}:#{format('%#0x', object_id)} " \
|
34
|
+
"href: #{href.inspect}, " \
|
35
|
+
"rels: #{rels.inspect}>"
|
23
36
|
end
|
37
|
+
# :nocov:
|
24
38
|
|
25
39
|
# @return [String, nil]
|
26
40
|
def media
|
@@ -60,18 +74,6 @@ module MicroMicro
|
|
60
74
|
@type ||= node['type']&.strip
|
61
75
|
end
|
62
76
|
|
63
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::Element]
|
64
|
-
# @return [Nokogiri::XML::NodeSet]
|
65
|
-
def self.nodes_from(context)
|
66
|
-
context.css('[href][rel]:not([rel=""])').reject { |node| (node.ancestors.map(&:name) & Document.ignored_node_names).any? }
|
67
|
-
end
|
68
|
-
|
69
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::Element]
|
70
|
-
# @return [Array<MicroMicro::Relationship>]
|
71
|
-
def self.relationships_from(context)
|
72
|
-
nodes_from(context).map { |node| new(node) }
|
73
|
-
end
|
74
|
-
|
75
77
|
private
|
76
78
|
|
77
79
|
attr_reader :node
|
data/lib/micro_micro/version.rb
CHANGED
data/lib/micromicro.rb
CHANGED
@@ -1,38 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'forwardable'
|
2
4
|
|
3
|
-
require 'addressable/uri'
|
4
5
|
require 'active_support/core_ext/array/grouping'
|
5
6
|
require 'active_support/core_ext/hash/deep_transform_values'
|
6
7
|
require 'active_support/core_ext/hash/keys'
|
7
8
|
require 'active_support/core_ext/hash/slice'
|
8
9
|
require 'active_support/core_ext/object/blank'
|
9
10
|
require 'nokogiri'
|
11
|
+
require 'nokogiri/html-ext'
|
12
|
+
|
13
|
+
require_relative 'micro_micro/version'
|
14
|
+
require_relative 'micro_micro/collectible'
|
15
|
+
require_relative 'micro_micro/helpers'
|
16
|
+
|
17
|
+
require_relative 'micro_micro/parsers/date_time_parser'
|
18
|
+
require_relative 'micro_micro/parsers/value_class_pattern_parser'
|
19
|
+
|
20
|
+
require_relative 'micro_micro/parsers/base_property_parser'
|
21
|
+
require_relative 'micro_micro/parsers/date_time_property_parser'
|
22
|
+
require_relative 'micro_micro/parsers/embedded_markup_property_parser'
|
23
|
+
require_relative 'micro_micro/parsers/plain_text_property_parser'
|
24
|
+
require_relative 'micro_micro/parsers/url_property_parser'
|
25
|
+
|
26
|
+
require_relative 'micro_micro/parsers/base_implied_property_parser'
|
27
|
+
require_relative 'micro_micro/parsers/implied_name_property_parser'
|
28
|
+
require_relative 'micro_micro/parsers/implied_photo_property_parser'
|
29
|
+
require_relative 'micro_micro/parsers/implied_url_property_parser'
|
30
|
+
|
31
|
+
require_relative 'micro_micro/document'
|
32
|
+
require_relative 'micro_micro/item'
|
33
|
+
require_relative 'micro_micro/property'
|
34
|
+
require_relative 'micro_micro/implied_property'
|
35
|
+
require_relative 'micro_micro/relationship'
|
10
36
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
require 'micro_micro/parsers/value_class_pattern_parser'
|
16
|
-
|
17
|
-
require 'micro_micro/parsers/base_property_parser'
|
18
|
-
require 'micro_micro/parsers/date_time_property_parser'
|
19
|
-
require 'micro_micro/parsers/embedded_markup_property_parser'
|
20
|
-
require 'micro_micro/parsers/implied_name_property_parser'
|
21
|
-
require 'micro_micro/parsers/implied_photo_property_parser'
|
22
|
-
require 'micro_micro/parsers/implied_url_property_parser'
|
23
|
-
require 'micro_micro/parsers/plain_text_property_parser'
|
24
|
-
require 'micro_micro/parsers/url_property_parser'
|
25
|
-
|
26
|
-
require 'micro_micro/document'
|
27
|
-
require 'micro_micro/item'
|
28
|
-
require 'micro_micro/property'
|
29
|
-
require 'micro_micro/implied_property'
|
30
|
-
require 'micro_micro/relationship'
|
31
|
-
|
32
|
-
require 'micro_micro/collections/base_collection'
|
33
|
-
require 'micro_micro/collections/items_collection'
|
34
|
-
require 'micro_micro/collections/properties_collection'
|
35
|
-
require 'micro_micro/collections/relationships_collection'
|
37
|
+
require_relative 'micro_micro/collections/base_collection'
|
38
|
+
require_relative 'micro_micro/collections/items_collection'
|
39
|
+
require_relative 'micro_micro/collections/properties_collection'
|
40
|
+
require_relative 'micro_micro/collections/relationships_collection'
|
36
41
|
|
37
42
|
module MicroMicro
|
38
43
|
# Parse a string of HTML for microformats2-encoded data.
|
data/micromicro.gemspec
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'lib/micro_micro/version'
|
2
4
|
|
3
5
|
Gem::Specification.new do |spec|
|
4
|
-
spec.required_ruby_version =
|
6
|
+
spec.required_ruby_version = '>= 2.7', '< 4'
|
5
7
|
|
6
8
|
spec.name = 'micromicro'
|
7
9
|
spec.version = MicroMicro::VERSION
|
@@ -19,10 +21,13 @@ Gem::Specification.new do |spec|
|
|
19
21
|
|
20
22
|
spec.require_paths = ['lib']
|
21
23
|
|
22
|
-
spec.metadata
|
23
|
-
|
24
|
+
spec.metadata = {
|
25
|
+
'bug_tracker_uri' => "#{spec.homepage}/issues",
|
26
|
+
'changelog_uri' => "#{spec.homepage}/blob/v#{spec.version}/CHANGELOG.md",
|
27
|
+
'rubygems_mfa_required' => 'true'
|
28
|
+
}
|
24
29
|
|
25
|
-
spec.add_runtime_dependency '
|
26
|
-
spec.add_runtime_dependency '
|
27
|
-
spec.add_runtime_dependency 'nokogiri', '~>
|
30
|
+
spec.add_runtime_dependency 'activesupport', '~> 7.0'
|
31
|
+
spec.add_runtime_dependency 'nokogiri', '>= 1.13'
|
32
|
+
spec.add_runtime_dependency 'nokogiri-html-ext', '~> 0.2.0'
|
28
33
|
end
|
metadata
CHANGED
@@ -1,57 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: micromicro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jason Garber
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: activesupport
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '7.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '7.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '1.13'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '1.13'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name: nokogiri
|
42
|
+
name: nokogiri-html-ext
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 0.2.0
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 0.2.0
|
55
55
|
description: Extract microformats2-encoded data from HTML documents.
|
56
56
|
email:
|
57
57
|
- jason@sixtwothree.org
|
@@ -69,8 +69,10 @@ files:
|
|
69
69
|
- lib/micro_micro/collections/properties_collection.rb
|
70
70
|
- lib/micro_micro/collections/relationships_collection.rb
|
71
71
|
- lib/micro_micro/document.rb
|
72
|
+
- lib/micro_micro/helpers.rb
|
72
73
|
- lib/micro_micro/implied_property.rb
|
73
74
|
- lib/micro_micro/item.rb
|
75
|
+
- lib/micro_micro/parsers/base_implied_property_parser.rb
|
74
76
|
- lib/micro_micro/parsers/base_property_parser.rb
|
75
77
|
- lib/micro_micro/parsers/date_time_parser.rb
|
76
78
|
- lib/micro_micro/parsers/date_time_property_parser.rb
|
@@ -91,8 +93,9 @@ licenses:
|
|
91
93
|
- MIT
|
92
94
|
metadata:
|
93
95
|
bug_tracker_uri: https://github.com/jgarber623/micromicro/issues
|
94
|
-
changelog_uri: https://github.com/jgarber623/micromicro/blob/
|
95
|
-
|
96
|
+
changelog_uri: https://github.com/jgarber623/micromicro/blob/v2.0.0/CHANGELOG.md
|
97
|
+
rubygems_mfa_required: 'true'
|
98
|
+
post_install_message:
|
96
99
|
rdoc_options: []
|
97
100
|
require_paths:
|
98
101
|
- lib
|
@@ -100,7 +103,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
100
103
|
requirements:
|
101
104
|
- - ">="
|
102
105
|
- !ruby/object:Gem::Version
|
103
|
-
version: '2.
|
106
|
+
version: '2.7'
|
104
107
|
- - "<"
|
105
108
|
- !ruby/object:Gem::Version
|
106
109
|
version: '4'
|
@@ -110,8 +113,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
110
113
|
- !ruby/object:Gem::Version
|
111
114
|
version: '0'
|
112
115
|
requirements: []
|
113
|
-
rubygems_version: 3.
|
114
|
-
signing_key:
|
116
|
+
rubygems_version: 3.3.16
|
117
|
+
signing_key:
|
115
118
|
specification_version: 4
|
116
119
|
summary: Extract microformats2-encoded data from HTML documents.
|
117
120
|
test_files: []
|