micromicro 1.1.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -1
- data/CONTRIBUTING.md +3 -3
- data/README.md +9 -9
- data/lib/micro_micro/collectible.rb +2 -0
- data/lib/micro_micro/collections/base_collection.rb +7 -1
- data/lib/micro_micro/collections/items_collection.rb +3 -1
- data/lib/micro_micro/collections/properties_collection.rb +12 -0
- data/lib/micro_micro/collections/relationships_collection.rb +10 -9
- data/lib/micro_micro/document.rb +10 -98
- data/lib/micro_micro/helpers.rb +82 -0
- data/lib/micro_micro/implied_property.rb +2 -0
- data/lib/micro_micro/item.rb +53 -60
- data/lib/micro_micro/parsers/base_implied_property_parser.rb +29 -0
- data/lib/micro_micro/parsers/base_property_parser.rb +4 -12
- data/lib/micro_micro/parsers/date_time_parser.rb +60 -25
- data/lib/micro_micro/parsers/date_time_property_parser.rb +7 -6
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +3 -2
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +14 -16
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +19 -43
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +11 -30
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +3 -1
- data/lib/micro_micro/parsers/url_property_parser.rb +20 -12
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +27 -42
- data/lib/micro_micro/property.rb +68 -56
- data/lib/micro_micro/relationship.rb +15 -13
- data/lib/micro_micro/version.rb +3 -1
- data/lib/micromicro.rb +31 -26
- data/micromicro.gemspec +11 -6
- metadata +22 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3037dad18fbd29d5487d985c388e288a8a09beab3f075488696ece006855f64
|
4
|
+
data.tar.gz: e0426d2ab7f6bfb762ff856f4bfb3afcca7af1c499ddedefdbfd9111c41c26eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8024a4c6de518aab23991e056554ea97d4a21c06b3ba27e38362840be82b5021bebe618d82698903d1e2ce7a5b49d22b131f8cfb7d460074198a89cef9873476
|
7
|
+
data.tar.gz: 6137ab273b1418e2e5f063eda6ef35f1b3bdec5841272e73ef3a9ac25c8e7995ac09aa2495c5f0ad9e88726e88c8fdba97ac50d8d5bdc0e16d271a00c5a42597
|
data/CHANGELOG.md
CHANGED
@@ -1,10 +1,31 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 2.0.0 / 2022-08-12
|
4
|
+
|
5
|
+
- Refactor implied property parsers (203fec9)
|
6
|
+
- Add `Helpers` module (caa1c02)
|
7
|
+
- New `PropertiesCollection` and `Property` instance methods (e9bb38b):
|
8
|
+
- `PropertiesCollection#plain_text_properties`
|
9
|
+
- `PropertiesCollection#url_properties`
|
10
|
+
- `Property#date_time_property?`
|
11
|
+
- `Property#embedded_markup_property?`
|
12
|
+
- `Property#plain_text_property?`
|
13
|
+
- `Property#url_property?`
|
14
|
+
- Remove Addressable (66c2bb4)
|
15
|
+
- Refactor classes to use nokogiri-html-ext (33fdf4a)
|
16
|
+
- Update activesupport (563bf56)
|
17
|
+
- **Breaking change:** Set minimum supported Ruby to 2.7 (ba17d05)
|
18
|
+
- Update development Ruby to 2.7.6 (ba17d05)
|
19
|
+
- Remove Reek (c1e76c5)
|
20
|
+
- Update runtime dependency version constraints (f83f26a)
|
21
|
+
- ~~**Breaking change:** Set minimum supported Ruby to 2.6~~ (fc588cd)
|
22
|
+
- ~~Update development Ruby to 2.6.10~~ (d05a2ac)
|
23
|
+
|
3
24
|
## 1.1.0 / 2021-06-10
|
4
25
|
|
5
26
|
- Replace Absolutely dependency with Addressable (e93721b)
|
6
27
|
- Add support for Ruby 3.0 (d897c54)
|
7
|
-
- Update development Ruby version to 2.
|
28
|
+
- Update development Ruby version to 2.6.10 (051c9ad)
|
8
29
|
|
9
30
|
## 1.0.0 / 2020-11-08
|
10
31
|
|
data/CONTRIBUTING.md
CHANGED
@@ -8,9 +8,9 @@ There are a couple ways you can help improve MicroMicro:
|
|
8
8
|
|
9
9
|
## Getting Started
|
10
10
|
|
11
|
-
MicroMicro is developed using Ruby 2.
|
11
|
+
MicroMicro is developed using Ruby 2.7.6 and is additionally tested against Ruby 3.0 and 3.1 using [GitHub Actions](https://github.com/jgarber623/micromicro/actions).
|
12
12
|
|
13
|
-
Before making changes to MicroMicro, you'll want to install Ruby 2.
|
13
|
+
Before making changes to MicroMicro, you'll want to install Ruby 2.7.6. It's recommended that you use a Ruby version managment tool like [rbenv](https://github.com/rbenv/rbenv), [chruby](https://github.com/postmodern/chruby), or [rvm](https://github.com/rvm/rvm). Once you've installed Ruby 2.7.6 using your method of choice, install the project's gems by running:
|
14
14
|
|
15
15
|
```sh
|
16
16
|
bundle install
|
@@ -22,7 +22,7 @@ bundle install
|
|
22
22
|
1. Install development dependencies as outlined above.
|
23
23
|
1. Create a feature branch for the code changes you're looking to make: `git checkout -b my-new-feature`.
|
24
24
|
1. _Write some code!_
|
25
|
-
1. If your changes would benefit from testing, add the necessary tests and verify everything passes by running `
|
25
|
+
1. If your changes would benefit from testing, add the necessary tests and verify everything passes by running `bundle exec rspec`.
|
26
26
|
1. Commit your changes: `git commit -am 'Add some new feature or fix some issue'`. _(See [this excellent article](https://chris.beams.io/posts/git-commit/) for tips on writing useful Git commit messages.)_
|
27
27
|
1. Push the branch to your fork: `git push -u origin my-new-feature`.
|
28
28
|
1. Create a new [pull request][pulls] and we'll review your changes.
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
[](https://rubygems.org/gems/micromicro)
|
6
6
|
[](https://rubygems.org/gems/micromicro)
|
7
|
-
[](https://github.com/jgarber623/micromicro/actions/workflows/ci.yml)
|
8
8
|
[](https://codeclimate.com/github/jgarber623/micromicro)
|
9
9
|
[](https://codeclimate.com/github/jgarber623/micromicro/code)
|
10
10
|
|
@@ -12,7 +12,7 @@
|
|
12
12
|
|
13
13
|
- Parses microformats2-encoded HTML documents according to the [microformats2 parsing specification](https://microformats.org/wiki/microformats2-parsing)
|
14
14
|
- Passes all microformats2 tests from [the official test suite](https://github.com/microformats/tests)¹
|
15
|
-
- Supports Ruby 2.
|
15
|
+
- Supports Ruby 2.7 and newer
|
16
16
|
|
17
17
|
**Note:** MicroMicro **does not** parse [Classic Microformats](https://microformats.org/wiki/Main_Page#Classic_Microformats) (referred to in [the parsing specification](https://microformats.org/wiki/microformats2-parsing#note_backward_compatibility_details) as "backcompat root classes" and "backcompat properties" and in vocabulary specifications in the "Parser Compatibility" sections [e.g. [h-entry](https://microformats.org/wiki/h-entry#Parser_Compatibility)]). To parse documents marked up with Classic Microformats, consider using [the official microformats-ruby parser](https://github.com/microformats/microformats-ruby).
|
18
18
|
|
@@ -20,24 +20,24 @@
|
|
20
20
|
|
21
21
|
## Getting Started
|
22
22
|
|
23
|
-
Before installing and using MicroMicro, you'll want to have [Ruby](https://www.ruby-lang.org) 2.
|
23
|
+
Before installing and using MicroMicro, you'll want to have [Ruby](https://www.ruby-lang.org) 2.7 (or newer) installed. It's recommended that you use a Ruby version managment tool like [rbenv](https://github.com/rbenv/rbenv), [chruby](https://github.com/postmodern/chruby), or [rvm](https://github.com/rvm/rvm).
|
24
24
|
|
25
|
-
MicroMicro is developed using Ruby 2.
|
25
|
+
MicroMicro is developed using Ruby 2.7.6 and is additionally tested against Ruby 3.0 and 3.1 using [GitHub Actions](https://github.com/jgarber623/micromicro/actions).
|
26
26
|
|
27
27
|
## Installation
|
28
28
|
|
29
|
-
If you're using [Bundler](https://bundler.io), add MicroMicro to your project's
|
29
|
+
If you're using [Bundler](https://bundler.io) to manage gem dependencies, add MicroMicro to your project's Gemfile:
|
30
30
|
|
31
31
|
```ruby
|
32
|
-
source 'https://rubygems.org'
|
33
|
-
|
34
32
|
gem 'micromicro'
|
35
33
|
```
|
36
34
|
|
37
|
-
…and
|
35
|
+
…and run `bundle install` in your shell.
|
36
|
+
|
37
|
+
To install the gem manually, run the following in your shell:
|
38
38
|
|
39
39
|
```sh
|
40
|
-
|
40
|
+
gem install micromicro
|
41
41
|
```
|
42
42
|
|
43
43
|
## Usage
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Collections
|
3
5
|
class BaseCollection
|
@@ -12,10 +14,14 @@ module MicroMicro
|
|
12
14
|
members.each { |member| push(member) }
|
13
15
|
end
|
14
16
|
|
17
|
+
# :nocov:
|
15
18
|
# @return [String]
|
16
19
|
def inspect
|
17
|
-
|
20
|
+
"#<#{self.class}:#{format('%#0x', object_id)} " \
|
21
|
+
"count: #{count}, " \
|
22
|
+
"members: #{members.inspect}>"
|
18
23
|
end
|
24
|
+
# :nocov:
|
19
25
|
|
20
26
|
# @param member [MicroMicro::Item, MicroMicro::Property, MicroMicro::Relationship]
|
21
27
|
def push(member)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Collections
|
3
5
|
class ItemsCollection < BaseCollection
|
@@ -8,7 +10,7 @@ module MicroMicro
|
|
8
10
|
|
9
11
|
# @return [Array<String>]
|
10
12
|
def types
|
11
|
-
@types ||=
|
13
|
+
@types ||= flat_map(&:types).uniq.sort
|
12
14
|
end
|
13
15
|
end
|
14
16
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Collections
|
3
5
|
class PropertiesCollection < BaseCollection
|
@@ -6,11 +8,21 @@ module MicroMicro
|
|
6
8
|
@names ||= map(&:name).uniq.sort
|
7
9
|
end
|
8
10
|
|
11
|
+
# @return [MicroMicro::Collections::PropertiesCollection]
|
12
|
+
def plain_text_properties
|
13
|
+
self.class.new(select(&:plain_text_property?))
|
14
|
+
end
|
15
|
+
|
9
16
|
# @return [Hash{Symbol => Array<String, Hash>}]
|
10
17
|
def to_h
|
11
18
|
group_by(&:name).symbolize_keys.deep_transform_values(&:value)
|
12
19
|
end
|
13
20
|
|
21
|
+
# @return [MicroMicro::Collections::PropertiesCollection]
|
22
|
+
def url_properties
|
23
|
+
self.class.new(select(&:url_property?))
|
24
|
+
end
|
25
|
+
|
14
26
|
# @return [Array<String, Hash>]
|
15
27
|
def values
|
16
28
|
@values ||= map(&:value).uniq
|
@@ -1,26 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
module Collections
|
3
5
|
class RelationshipsCollection < BaseCollection
|
4
|
-
# @see https://microformats.org/wiki/microformats2-parsing#parse_a_hyperlink_element_for_rel_microformats
|
5
|
-
#
|
6
|
-
# @return [Hash{Symbol => Hash{Symbol => Array, String}}]
|
7
|
-
def group_by_url
|
8
|
-
group_by(&:href).symbolize_keys.transform_values { |relationships| relationships.first.to_h.slice!(:href) }
|
9
|
-
end
|
10
|
-
|
11
6
|
# @see https://microformats.org/wiki/microformats2-parsing#parse_a_hyperlink_element_for_rel_microformats
|
12
7
|
#
|
13
8
|
# @return [Hash{Symbol => Array<String>}]
|
14
9
|
def group_by_rel
|
15
|
-
# flat_map { |member| member.rels.map { |rel| [rel, member.href] } }.group_by(&:shift).symbolize_keys.transform_values(&:flatten).transform_values(&:uniq)
|
16
10
|
each_with_object(Hash.new { |hash, key| hash[key] = [] }) do |member, hash|
|
17
11
|
member.rels.each { |rel| hash[rel] << member.href }
|
18
12
|
end.symbolize_keys.transform_values(&:uniq)
|
19
13
|
end
|
20
14
|
|
15
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parse_a_hyperlink_element_for_rel_microformats
|
16
|
+
#
|
17
|
+
# @return [Hash{Symbol => Hash{Symbol => Array, String}}]
|
18
|
+
def group_by_url
|
19
|
+
group_by(&:href).symbolize_keys.transform_values { |relationships| relationships.first.to_h.slice!(:href) }
|
20
|
+
end
|
21
|
+
|
21
22
|
# @return [Array<String>]
|
22
23
|
def rels
|
23
|
-
@rels ||=
|
24
|
+
@rels ||= flat_map(&:rels).uniq.sort
|
24
25
|
end
|
25
26
|
|
26
27
|
# @return [Array<String>]
|
data/lib/micro_micro/document.rb
CHANGED
@@ -1,29 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
class Document
|
3
|
-
# A map of HTML `srcset` attributes and their associated element names
|
4
|
-
#
|
5
|
-
# @see https://html.spec.whatwg.org/#srcset-attributes
|
6
|
-
# @see https://html.spec.whatwg.org/#attributes-3
|
7
|
-
HTML_IMAGE_CANDIDATE_STRINGS_ATTRIBUTES_MAP = {
|
8
|
-
'imagesrcset' => %w[link],
|
9
|
-
'srcset' => %w[img source]
|
10
|
-
}.freeze
|
11
|
-
|
12
|
-
# A map of HTML URL attributes and their associated element names
|
13
|
-
#
|
14
|
-
# @see https://html.spec.whatwg.org/#attributes-3
|
15
|
-
HTML_URL_ATTRIBUTES_MAP = {
|
16
|
-
'action' => %w[form],
|
17
|
-
'cite' => %w[blockquote del ins q],
|
18
|
-
'data' => %w[object],
|
19
|
-
'formaction' => %w[button input],
|
20
|
-
'href' => %w[a area base link],
|
21
|
-
'manifest' => %w[html],
|
22
|
-
'ping' => %w[a area],
|
23
|
-
'poster' => %w[video],
|
24
|
-
'src' => %w[audio embed iframe img input script source track video]
|
25
|
-
}.freeze
|
26
|
-
|
27
5
|
# Parse a string of HTML for microformats2-encoded data.
|
28
6
|
#
|
29
7
|
# MicroMicro::Document.new('<a href="/" class="h-card" rel="me">Jason Garber</a>', 'https://sixtwothree.org')
|
@@ -38,22 +16,23 @@ module MicroMicro
|
|
38
16
|
# @param markup [String] The HTML to parse for microformats2-encoded data.
|
39
17
|
# @param base_url [String] The URL associated with markup. Used for relative URL resolution.
|
40
18
|
def initialize(markup, base_url)
|
41
|
-
@
|
42
|
-
@base_url = base_url
|
43
|
-
|
44
|
-
resolve_relative_urls
|
19
|
+
@document = Nokogiri::HTML(markup, base_url).resolve_relative_urls!
|
45
20
|
end
|
46
21
|
|
22
|
+
# :nocov:
|
47
23
|
# @return [String]
|
48
24
|
def inspect
|
49
|
-
|
25
|
+
"#<#{self.class}:#{format('%#0x', object_id)} " \
|
26
|
+
"items: #{items.inspect}, " \
|
27
|
+
"relationships: #{relationships.inspect}>"
|
50
28
|
end
|
29
|
+
# :nocov:
|
51
30
|
|
52
31
|
# A collection of items parsed from the provided markup.
|
53
32
|
#
|
54
33
|
# @return [MicroMicro::Collections::ItemsCollection]
|
55
34
|
def items
|
56
|
-
@items ||= Collections::ItemsCollection.new(Item.
|
35
|
+
@items ||= Collections::ItemsCollection.new(Item.from_context(document.element_children))
|
57
36
|
end
|
58
37
|
|
59
38
|
# A collection of relationships parsed from the provided markup.
|
@@ -76,76 +55,9 @@ module MicroMicro
|
|
76
55
|
}
|
77
56
|
end
|
78
57
|
|
79
|
-
# Ignore this node?
|
80
|
-
#
|
81
|
-
# @param node [Nokogiri::XML::Element]
|
82
|
-
# @return [Boolean]
|
83
|
-
def self.ignore_node?(node)
|
84
|
-
ignored_node_names.include?(node.name)
|
85
|
-
end
|
86
|
-
|
87
|
-
# A list of HTML element names the parser should ignore.
|
88
|
-
#
|
89
|
-
# @return [Array<String>]
|
90
|
-
def self.ignored_node_names
|
91
|
-
%w[script style template]
|
92
|
-
end
|
93
|
-
|
94
|
-
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_properties
|
95
|
-
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
96
|
-
#
|
97
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
98
|
-
# @yield [context]
|
99
|
-
# @return [String]
|
100
|
-
def self.text_content_from(context)
|
101
|
-
context.css(*ignored_node_names).unlink
|
102
|
-
|
103
|
-
yield(context) if block_given?
|
104
|
-
|
105
|
-
context.text.strip
|
106
|
-
end
|
107
|
-
|
108
58
|
private
|
109
59
|
|
110
|
-
attr_reader :base_url, :markup
|
111
|
-
|
112
|
-
# @return [Nokogiri::XML::Element, nil]
|
113
|
-
def base_element
|
114
|
-
@base_element ||= Nokogiri::HTML(markup).at('//base[@href]')
|
115
|
-
end
|
116
|
-
|
117
60
|
# @return [Nokogiri::HTML::Document]
|
118
|
-
|
119
|
-
@document ||= Nokogiri::HTML(markup, resolved_base_url)
|
120
|
-
end
|
121
|
-
|
122
|
-
def resolve_relative_urls
|
123
|
-
HTML_URL_ATTRIBUTES_MAP.each do |attribute, names|
|
124
|
-
document.xpath(*names.map { |name| "//#{name}[@#{attribute}]" }).each do |node|
|
125
|
-
node[attribute] = Addressable::URI.join(resolved_base_url, node[attribute].strip).normalize.to_s
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
HTML_IMAGE_CANDIDATE_STRINGS_ATTRIBUTES_MAP.each do |attribute, names|
|
130
|
-
document.xpath(*names.map { |name| "//#{name}[@#{attribute}]" }).each do |node|
|
131
|
-
candidates = node[attribute].split(',').map(&:strip).map { |candidate| candidate.match(/^(?<url>.+?)(?<descriptor>\s+.+)?$/) }
|
132
|
-
|
133
|
-
node[attribute] = candidates.map { |candidate| "#{Addressable::URI.join(resolved_base_url, candidate[:url]).normalize}#{candidate[:descriptor]}" }.join(', ')
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
self
|
138
|
-
end
|
139
|
-
|
140
|
-
# @return [String]
|
141
|
-
def resolved_base_url
|
142
|
-
@resolved_base_url ||= begin
|
143
|
-
if base_element
|
144
|
-
Addressable::URI.join(base_url, base_element['href'].strip).normalize.to_s
|
145
|
-
else
|
146
|
-
base_url
|
147
|
-
end
|
148
|
-
end
|
149
|
-
end
|
61
|
+
attr_reader :document
|
150
62
|
end
|
151
63
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module MicroMicro
|
4
|
+
module Helpers
|
5
|
+
IGNORED_NODE_NAMES = %w[script style template].freeze
|
6
|
+
|
7
|
+
# @param node [Nokogiri::XML::Element]
|
8
|
+
# @param attributes_map [Hash{String => Array}]
|
9
|
+
# @return [String, nil]
|
10
|
+
def self.attribute_value_from(node, attributes_map)
|
11
|
+
attributes_map.filter_map do |attribute, names|
|
12
|
+
node[attribute] if names.include?(node.name) && node[attribute]
|
13
|
+
end.first
|
14
|
+
end
|
15
|
+
|
16
|
+
# @param node [Nokogiri::XML::Element]
|
17
|
+
# @return [Boolean]
|
18
|
+
def self.ignore_node?(node)
|
19
|
+
IGNORED_NODE_NAMES.include?(node.name)
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param nodes [Nokogiri::XML::NodeSet]
|
23
|
+
# @return [Boolean]
|
24
|
+
def self.ignore_nodes?(nodes)
|
25
|
+
(nodes.map(&:name) & IGNORED_NODE_NAMES).any?
|
26
|
+
end
|
27
|
+
|
28
|
+
# @param node [Nokogiri::XML::Element]
|
29
|
+
# @return [Boolean]
|
30
|
+
def self.item_node?(node)
|
31
|
+
root_class_names_from(node).any?
|
32
|
+
end
|
33
|
+
|
34
|
+
# @param node [Nokogiri::XML::Element]
|
35
|
+
# @return [Array<String>]
|
36
|
+
def self.property_class_names_from(node)
|
37
|
+
node.classes.grep(/^(?:dt|e|p|u)(?:-[0-9a-z]+)?(?:-[a-z]+)+$/).uniq
|
38
|
+
end
|
39
|
+
|
40
|
+
# @param node [Nokogiri::XML::Element]
|
41
|
+
# @return [Boolean]
|
42
|
+
def self.property_node?(node)
|
43
|
+
property_class_names_from(node).any?
|
44
|
+
end
|
45
|
+
|
46
|
+
# @param node [Nokogiri::XML::Element]
|
47
|
+
# @return [Array<String>]
|
48
|
+
def self.root_class_names_from(node)
|
49
|
+
node.classes.grep(/^h(?:-[0-9a-z]+)?(?:-[a-z]+)+$/).uniq.sort
|
50
|
+
end
|
51
|
+
|
52
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parse_an_element_for_properties
|
53
|
+
# @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
|
54
|
+
#
|
55
|
+
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
56
|
+
# @yield [context]
|
57
|
+
# @return [String]
|
58
|
+
def self.text_content_from(context)
|
59
|
+
context.css(*IGNORED_NODE_NAMES).unlink
|
60
|
+
|
61
|
+
yield(context) if block_given?
|
62
|
+
|
63
|
+
context.text.strip
|
64
|
+
end
|
65
|
+
|
66
|
+
# @see https://microformats.org/wiki/value-class-pattern#Basic_Parsing
|
67
|
+
#
|
68
|
+
# @param node [Nokogiri::XML::Element]
|
69
|
+
# @return [Boolean]
|
70
|
+
def self.value_class_node?(node)
|
71
|
+
node.classes.include?('value')
|
72
|
+
end
|
73
|
+
|
74
|
+
# @see https://microformats.org/wiki/value-class-pattern#Parsing_value_from_a_title_attribute
|
75
|
+
#
|
76
|
+
# @param node [Nokogiri::XML::Element]
|
77
|
+
# @return [Boolean]
|
78
|
+
def self.value_title_node?(node)
|
79
|
+
node.classes.include?('value-title')
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
data/lib/micro_micro/item.rb
CHANGED
@@ -1,7 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module MicroMicro
|
2
4
|
class Item
|
3
5
|
include Collectible
|
4
6
|
|
7
|
+
# Extract items from a context.
|
8
|
+
#
|
9
|
+
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
10
|
+
# @return [Array<MicroMicro::Item>]
|
11
|
+
def self.from_context(context)
|
12
|
+
node_set_from(context).map { |node| new(node) }
|
13
|
+
end
|
14
|
+
|
15
|
+
# Extract item nodes from a context.
|
16
|
+
#
|
17
|
+
# @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
18
|
+
# @param node_set [Nokogiri::XML::NodeSet]
|
19
|
+
# @return [Nokogiri::XML::NodeSet]
|
20
|
+
def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
21
|
+
context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
22
|
+
|
23
|
+
if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
|
24
|
+
if Helpers.item_node?(context)
|
25
|
+
node_set << context unless Helpers.property_node?(context)
|
26
|
+
else
|
27
|
+
node_set_from(context.element_children, node_set)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
node_set
|
32
|
+
end
|
33
|
+
|
5
34
|
# Parse a node for microformats2-encoded data.
|
6
35
|
#
|
7
36
|
# @param node [Nokogiri::XML::Element]
|
@@ -19,7 +48,7 @@ module MicroMicro
|
|
19
48
|
#
|
20
49
|
# @return [MicroMicro::Collections::ItemsCollection]
|
21
50
|
def children
|
22
|
-
@children ||= Collections::ItemsCollection.new(
|
51
|
+
@children ||= Collections::ItemsCollection.new(self.class.from_context(node.element_children))
|
23
52
|
end
|
24
53
|
|
25
54
|
# The value of the node's `id` attribute, if present.
|
@@ -29,23 +58,28 @@ module MicroMicro
|
|
29
58
|
@id ||= node['id']&.strip
|
30
59
|
end
|
31
60
|
|
61
|
+
# :nocov:
|
32
62
|
# @return [String]
|
33
63
|
def inspect
|
34
|
-
|
64
|
+
"#<#{self.class}:#{format('%#0x', object_id)} " \
|
65
|
+
"types: #{types.inspect}, " \
|
66
|
+
"properties: #{properties.count}, " \
|
67
|
+
"children: #{children.count}>"
|
35
68
|
end
|
69
|
+
# :nocov:
|
36
70
|
|
37
71
|
# A collection of plain text properties parsed from the node.
|
38
72
|
#
|
39
73
|
# @return [MicroMicro::Collections::PropertiesCollection]
|
40
74
|
def plain_text_properties
|
41
|
-
@plain_text_properties ||=
|
75
|
+
@plain_text_properties ||= properties.plain_text_properties
|
42
76
|
end
|
43
77
|
|
44
78
|
# A collection of properties parsed from the node.
|
45
79
|
#
|
46
80
|
# @return [MicroMicro::Collections::PropertiesCollection]
|
47
81
|
def properties
|
48
|
-
@properties ||= Collections::PropertiesCollection.new(Property.
|
82
|
+
@properties ||= Collections::PropertiesCollection.new(Property.from_context(node.element_children))
|
49
83
|
end
|
50
84
|
|
51
85
|
# Return the parsed item as a Hash.
|
@@ -69,62 +103,14 @@ module MicroMicro
|
|
69
103
|
#
|
70
104
|
# @return [Array<String>]
|
71
105
|
def types
|
72
|
-
@types ||=
|
106
|
+
@types ||= Helpers.root_class_names_from(node)
|
73
107
|
end
|
74
108
|
|
75
109
|
# A collection of url properties parsed from the node.
|
76
110
|
#
|
77
111
|
# @return [MicroMicro::Collections::PropertiesCollection]
|
78
112
|
def url_properties
|
79
|
-
@url_properties ||=
|
80
|
-
end
|
81
|
-
|
82
|
-
# Does this node's `class` attribute contain root class names?
|
83
|
-
#
|
84
|
-
# @param node [Nokogiri::XML::Element]
|
85
|
-
# @return [Boolean]
|
86
|
-
def self.item_node?(node)
|
87
|
-
types_from(node).any?
|
88
|
-
end
|
89
|
-
|
90
|
-
# Extract items from a context.
|
91
|
-
#
|
92
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
93
|
-
# @return [Array<MicroMicro::Item>]
|
94
|
-
def self.items_from(context)
|
95
|
-
nodes_from(context).map { |node| new(node) }
|
96
|
-
end
|
97
|
-
|
98
|
-
# Extract item nodes from a context.
|
99
|
-
#
|
100
|
-
# @param context [Nokogiri::HTML::Document, Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
|
101
|
-
# @param node_set [Nokogiri::XML::NodeSet]
|
102
|
-
# @return [Nokogiri::XML::NodeSet]
|
103
|
-
def self.nodes_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
|
104
|
-
return nodes_from(context.element_children, node_set) if context.is_a?(Nokogiri::HTML::Document)
|
105
|
-
|
106
|
-
context.each { |node| nodes_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
|
107
|
-
|
108
|
-
if context.is_a?(Nokogiri::XML::Element) && !Document.ignore_node?(context)
|
109
|
-
if item_node?(context)
|
110
|
-
node_set << context unless Property.property_node?(context)
|
111
|
-
else
|
112
|
-
nodes_from(context.element_children, node_set)
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
node_set
|
117
|
-
end
|
118
|
-
|
119
|
-
# Extract root class names from a node.
|
120
|
-
#
|
121
|
-
# node = Nokogiri::HTML('<div class="h-card">Jason Garber</div>').at_css('div')
|
122
|
-
# MicroMicro::Item.types_from(node) #=> ['h-card']
|
123
|
-
#
|
124
|
-
# @param node [Nokogiri::XML::Element]
|
125
|
-
# @return [Array<String>]
|
126
|
-
def self.types_from(node)
|
127
|
-
node.classes.select { |token| token.match?(/^h(?:-[0-9a-z]+)?(?:-[a-z]+)+$/) }.uniq.sort
|
113
|
+
@url_properties ||= properties.url_properties
|
128
114
|
end
|
129
115
|
|
130
116
|
private
|
@@ -133,7 +119,7 @@ module MicroMicro
|
|
133
119
|
|
134
120
|
# @return [MicroMicro::ImpliedProperty]
|
135
121
|
def implied_name
|
136
|
-
@implied_name ||= ImpliedProperty.new(node,
|
122
|
+
@implied_name ||= ImpliedProperty.new(node, 'p-name')
|
137
123
|
end
|
138
124
|
|
139
125
|
# @return [Boolean]
|
@@ -143,7 +129,7 @@ module MicroMicro
|
|
143
129
|
|
144
130
|
# @return [MicroMicro::ImpliedProperty]
|
145
131
|
def implied_photo
|
146
|
-
@implied_photo ||= ImpliedProperty.new(node,
|
132
|
+
@implied_photo ||= ImpliedProperty.new(node, 'u-photo')
|
147
133
|
end
|
148
134
|
|
149
135
|
# @return [Boolean]
|
@@ -153,7 +139,7 @@ module MicroMicro
|
|
153
139
|
|
154
140
|
# @return [MicroMicro::ImpliedProperty]
|
155
141
|
def implied_url
|
156
|
-
@implied_url ||= ImpliedProperty.new(node,
|
142
|
+
@implied_url ||= ImpliedProperty.new(node, 'u-url')
|
157
143
|
end
|
158
144
|
|
159
145
|
# @return [Boolean]
|
@@ -163,17 +149,24 @@ module MicroMicro
|
|
163
149
|
|
164
150
|
# @return [Boolean]
|
165
151
|
def imply_name?
|
166
|
-
properties.none?
|
152
|
+
properties.names.none?('name') &&
|
153
|
+
properties.none?(&:embedded_markup_property?) &&
|
154
|
+
properties.none?(&:plain_text_property?) &&
|
155
|
+
!nested_items?
|
167
156
|
end
|
168
157
|
|
169
158
|
# @return [Boolean]
|
170
159
|
def imply_photo?
|
171
|
-
properties.none?
|
160
|
+
properties.names.none?('photo') &&
|
161
|
+
properties.reject(&:implied?).none?(&:url_property?) &&
|
162
|
+
!nested_items?
|
172
163
|
end
|
173
164
|
|
174
165
|
# @return [Boolean]
|
175
166
|
def imply_url?
|
176
|
-
properties.none?
|
167
|
+
properties.names.none?('url') &&
|
168
|
+
properties.reject(&:implied?).none?(&:url_property?) &&
|
169
|
+
!nested_items?
|
177
170
|
end
|
178
171
|
|
179
172
|
# @return [Boolean]
|