webmention 0.1.6 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.editorconfig +10 -0
- data/.gitignore +31 -14
- data/.reek.yml +8 -0
- data/.rubocop +3 -0
- data/.rubocop.yml +30 -0
- data/.ruby-version +1 -0
- data/.simplecov +10 -0
- data/.travis.yml +13 -3
- data/CHANGELOG.md +18 -0
- data/CONTRIBUTING.md +37 -0
- data/LICENSE +13 -0
- data/README.md +77 -45
- data/Rakefile +18 -7
- data/lib/webmention.rb +23 -5
- data/lib/webmention/client.rb +24 -167
- data/lib/webmention/exceptions.rb +15 -0
- data/lib/webmention/http_request.rb +41 -0
- data/lib/webmention/parsers.rb +29 -0
- data/lib/webmention/parsers/html_parser.rb +73 -0
- data/lib/webmention/registerable.rb +11 -0
- data/lib/webmention/version.rb +1 -1
- data/webmention.gemspec +39 -30
- metadata +156 -69
- data/bin/webmention +0 -59
- data/example.rb +0 -20
- data/test/data/sample_html.rb +0 -125
- data/test/lib/webmention/crawl_test.rb +0 -23
- data/test/lib/webmention/discovery_test.rb +0 -124
- data/test/lib/webmention/mention_test.rb +0 -38
- data/test/lib/webmention/url_test.rb +0 -45
- data/test/lib/webmention/version_test.rb +0 -9
- data/test/test_helper.rb +0 -5
data/lib/webmention/client.rb
CHANGED
@@ -1,190 +1,47 @@
|
|
1
1
|
module Webmention
|
2
2
|
class Client
|
3
|
-
|
4
|
-
|
3
|
+
def initialize(source)
|
4
|
+
raise ArgumentError, "source must be a String (given #{source.class.name})" unless source.is_a?(String)
|
5
5
|
|
6
|
-
|
7
|
-
attr_reader :links
|
6
|
+
@source = source
|
8
7
|
|
9
|
-
|
10
|
-
#
|
11
|
-
# url - The url you want us to crawl.
|
12
|
-
def initialize(url)
|
13
|
-
@url = URI.parse(url)
|
14
|
-
@links ||= Set.new
|
15
|
-
|
16
|
-
unless Webmention::Client.valid_http_url? @url
|
17
|
-
raise ArgumentError.new "#{@url} is not a valid HTTP or HTTPS URI."
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
# Public: Crawl the url this client was initialized with.
|
22
|
-
#
|
23
|
-
# Returns the number of links found.
|
24
|
-
def crawl
|
25
|
-
@links ||= Set.new
|
26
|
-
if @url.nil?
|
27
|
-
raise ArgumentError.new "url is nil."
|
28
|
-
end
|
29
|
-
|
30
|
-
Nokogiri::HTML(open(self.url)).css('.h-entry a').each do |link|
|
31
|
-
link = link.attribute('href').to_s
|
32
|
-
if Webmention::Client.valid_http_url? link
|
33
|
-
@links.add link
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
return @links.count
|
8
|
+
raise ArgumentError, 'source must be an absolute URL (e.g. https://example.com)' unless source_uri.absolute?
|
38
9
|
end
|
39
10
|
|
40
|
-
|
41
|
-
|
42
|
-
# Returns the number of links mentioned.
|
43
|
-
def send_mentions
|
44
|
-
if self.links.nil? or self.links.empty?
|
45
|
-
self.crawl
|
46
|
-
end
|
47
|
-
|
48
|
-
cnt = 0
|
49
|
-
self.links.each do |link|
|
50
|
-
endpoint = Webmention::Client.supports_webmention? link
|
51
|
-
if endpoint
|
52
|
-
cnt += 1 if Webmention::Client.send_mention endpoint, self.url, link
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
return cnt
|
11
|
+
def send_all_mentions
|
12
|
+
mentioned_urls.each_with_object({}) { |url, hash| hash[url] = send_mention(url) }
|
57
13
|
end
|
58
14
|
|
59
|
-
|
60
|
-
|
61
|
-
# endpoint - URL to send mention to.
|
62
|
-
# source - Source of mention (your page).
|
63
|
-
# target - The link that was mentioned in the source page.
|
64
|
-
#
|
65
|
-
# Returns a boolean.
|
66
|
-
def self.send_mention endpoint, source, target, full_response=false
|
67
|
-
data = {
|
68
|
-
:source => source,
|
69
|
-
:target => target,
|
70
|
-
}
|
71
|
-
|
72
|
-
# Ensure the endpoint is an absolute URL
|
73
|
-
endpoint = absolute_endpoint endpoint, target
|
15
|
+
def mentioned_urls
|
16
|
+
raise UnsupportedMimeTypeError, "Unsupported MIME Type: #{source_response.mime_type}" unless parser_for_mime_type
|
74
17
|
|
75
|
-
|
76
|
-
response = HTTParty.post(endpoint, {
|
77
|
-
:body => data
|
78
|
-
})
|
79
|
-
|
80
|
-
if full_response
|
81
|
-
return response
|
82
|
-
else
|
83
|
-
return response.code == 200 || response.code == 202
|
84
|
-
end
|
85
|
-
rescue
|
86
|
-
return false
|
87
|
-
end
|
18
|
+
@mentioned_urls ||= parser_for_mime_type.new(source_response).results
|
88
19
|
end
|
89
20
|
|
90
|
-
|
91
|
-
|
92
|
-
# url - URL to check
|
93
|
-
#
|
94
|
-
# Returns false if does not support webmention, returns string
|
95
|
-
# of url to ping if it does.
|
96
|
-
def self.supports_webmention? url
|
97
|
-
return false if !Webmention::Client.valid_http_url? url
|
98
|
-
|
99
|
-
doc = nil
|
100
|
-
|
101
|
-
begin
|
102
|
-
response = HTTParty.get(url, {
|
103
|
-
:timeout => 3,
|
104
|
-
:headers => {
|
105
|
-
'User-Agent' => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36 (https://rubygems.org/gems/webmention)",
|
106
|
-
'Accept' => '*/*'
|
107
|
-
}
|
108
|
-
})
|
109
|
-
|
110
|
-
# First check the HTTP Headers
|
111
|
-
if !response.headers['Link'].nil?
|
112
|
-
endpoint = self.discover_webmention_endpoint_from_header response.headers['Link']
|
113
|
-
return endpoint if endpoint
|
114
|
-
end
|
21
|
+
def send_mention(target)
|
22
|
+
endpoint = IndieWeb::Endpoints.get(target).webmention
|
115
23
|
|
116
|
-
|
117
|
-
endpoint = self.discover_webmention_endpoint_from_html response.body.to_s
|
118
|
-
return endpoint if endpoint
|
24
|
+
return unless endpoint
|
119
25
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
# return doc.css('link[rel="pingback"]').attribute("href").value
|
124
|
-
# end
|
125
|
-
|
126
|
-
rescue EOFError
|
127
|
-
rescue Errno::ECONNRESET
|
128
|
-
end
|
129
|
-
|
130
|
-
return false
|
26
|
+
HttpRequest.post(Addressable::URI.parse(endpoint), source: @source, target: target)
|
27
|
+
rescue IndieWeb::Endpoints::IndieWebEndpointsError => exception
|
28
|
+
raise Webmention.const_get(exception.class.name.split('::').last), exception
|
131
29
|
end
|
132
30
|
|
133
|
-
|
134
|
-
doc = Nokogiri::HTML(html)
|
135
|
-
if !doc.css('[rel~="webmention"]').css('[href]').empty?
|
136
|
-
doc.css('[rel~="webmention"]').css('[href]').attribute("href").value
|
137
|
-
elsif !doc.css('[rel="http://webmention.org/"]').css('[href]').empty?
|
138
|
-
doc.css('[rel="http://webmention.org/"]').css('[href]').attribute("href").value
|
139
|
-
elsif !doc.css('[rel="http://webmention.org"]').css('[href]').empty?
|
140
|
-
doc.css('[rel="http://webmention.org"]').css('[href]').attribute("href").value
|
141
|
-
else
|
142
|
-
false
|
143
|
-
end
|
144
|
-
end
|
31
|
+
private
|
145
32
|
|
146
|
-
def
|
147
|
-
|
148
|
-
return matches[1]
|
149
|
-
elsif matches = header.match(%r{<([^>]+)>; rel=webmention})
|
150
|
-
return matches[1]
|
151
|
-
elsif matches = header.match(%r{rel="[^"]*\s?webmention\s?[^"]*"; <([^>]+)>})
|
152
|
-
return matches[1]
|
153
|
-
elsif matches = header.match(%r{rel=webmention; <([^>]+)>})
|
154
|
-
return matches[1]
|
155
|
-
elsif matches = header.match(%r{<([^>]+)>; rel="http://webmention\.org/?"})
|
156
|
-
return matches[1]
|
157
|
-
elsif matches = header.match(%r{rel="http://webmention\.org/?"; <([^>]+)>})
|
158
|
-
return matches[1]
|
159
|
-
end
|
160
|
-
return false
|
33
|
+
def parser_for_mime_type
|
34
|
+
@parser_for_mime_type ||= Parsers.registered[source_response.mime_type]
|
161
35
|
end
|
162
36
|
|
163
|
-
|
164
|
-
|
165
|
-
# url - URL to check
|
166
|
-
#
|
167
|
-
# Returns a boolean.
|
168
|
-
def self.valid_http_url? url
|
169
|
-
if url.is_a? String
|
170
|
-
url = URI.parse(url)
|
171
|
-
end
|
172
|
-
|
173
|
-
return (url.is_a? URI::HTTP or url.is_a? URI::HTTPS)
|
37
|
+
def source_response
|
38
|
+
@source_response ||= HttpRequest.get(source_uri)
|
174
39
|
end
|
175
40
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
#
|
181
|
-
# Returns original endpoint if it is already an absolute URL; constructs
|
182
|
-
# new absolute URL using relative endpoint if not
|
183
|
-
def self.absolute_endpoint endpoint, url
|
184
|
-
unless Webmention::Client.valid_http_url? endpoint
|
185
|
-
endpoint = URI.join(url, endpoint).to_s
|
186
|
-
end
|
187
|
-
endpoint
|
41
|
+
def source_uri
|
42
|
+
@source_uri ||= Addressable::URI.parse(@source)
|
43
|
+
rescue Addressable::URI::InvalidURIError => exception
|
44
|
+
raise InvalidURIError, exception
|
188
45
|
end
|
189
46
|
end
|
190
47
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Webmention
|
2
|
+
class WebmentionClientError < StandardError; end
|
3
|
+
|
4
|
+
class ArgumentError < WebmentionClientError; end
|
5
|
+
|
6
|
+
class ConnectionError < WebmentionClientError; end
|
7
|
+
|
8
|
+
class InvalidURIError < WebmentionClientError; end
|
9
|
+
|
10
|
+
class TimeoutError < WebmentionClientError; end
|
11
|
+
|
12
|
+
class TooManyRedirectsError < WebmentionClientError; end
|
13
|
+
|
14
|
+
class UnsupportedMimeTypeError < WebmentionClientError; end
|
15
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Webmention
|
2
|
+
class HttpRequest
|
3
|
+
# Defaults derived from Webmention specification examples
|
4
|
+
# https://www.w3.org/TR/webmention/#limits-on-get-requests
|
5
|
+
# rubocop:disable Layout/AlignHash
|
6
|
+
HTTP_CLIENT_OPTS = {
|
7
|
+
follow: {
|
8
|
+
max_hops: 20
|
9
|
+
},
|
10
|
+
headers: {
|
11
|
+
accept: '*/*',
|
12
|
+
user_agent: 'Webmention Client (https://rubygems.org/gems/webmention)'
|
13
|
+
},
|
14
|
+
timeout_options: {
|
15
|
+
connect_timeout: 5,
|
16
|
+
read_timeout: 5
|
17
|
+
}
|
18
|
+
}.freeze
|
19
|
+
# rubocop:enable Layout/AlignHash
|
20
|
+
|
21
|
+
class << self
|
22
|
+
def get(uri)
|
23
|
+
request(:get, uri)
|
24
|
+
end
|
25
|
+
|
26
|
+
def post(uri, **options)
|
27
|
+
request(:post, uri, form: options)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def request(method, uri, **options)
|
33
|
+
HTTP::Client.new(HTTP_CLIENT_OPTS).request(method, uri, options)
|
34
|
+
rescue HTTP::ConnectionError,
|
35
|
+
HTTP::TimeoutError,
|
36
|
+
HTTP::Redirector::TooManyRedirectsError => exception
|
37
|
+
raise Webmention.const_get(exception.class.name.split('::').last), exception
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Webmention
|
2
|
+
module Parsers
|
3
|
+
extend Registerable
|
4
|
+
|
5
|
+
class BaseParser
|
6
|
+
def initialize(response)
|
7
|
+
raise ArgumentError, "response must be an HTTP::Response (given #{response.class.name})" unless response.is_a?(HTTP::Response)
|
8
|
+
|
9
|
+
@response = response
|
10
|
+
|
11
|
+
raise UnsupportedMimeTypeError, "Unsupported MIME Type: #{response.mime_type}" unless self.class.mime_types.include?(response.mime_type)
|
12
|
+
end
|
13
|
+
|
14
|
+
def results
|
15
|
+
@results ||= parse_response_body
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def response_body
|
21
|
+
@response_body ||= @response.body.to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
def response_url
|
25
|
+
@response_url ||= @response.uri.to_s
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Webmention
|
2
|
+
module Parsers
|
3
|
+
class HtmlParser < BaseParser
|
4
|
+
def self.mime_types
|
5
|
+
['text/html']
|
6
|
+
end
|
7
|
+
|
8
|
+
Parsers.register(self)
|
9
|
+
|
10
|
+
HTML_ATTRIBUTE_MAP = {
|
11
|
+
cite: %w[blockquote del ins q],
|
12
|
+
data: %w[object],
|
13
|
+
href: %w[a area],
|
14
|
+
poster: %w[video],
|
15
|
+
src: %w[audio embed img source track video],
|
16
|
+
srcset: %w[img source]
|
17
|
+
}.freeze
|
18
|
+
|
19
|
+
CSS_SELECTORS_MAP = HTML_ATTRIBUTE_MAP.each_with_object({}) do |(attribute, elements), hash|
|
20
|
+
hash[attribute] = elements.map { |element| "#{element}[#{attribute}]" }
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def doc
|
26
|
+
@doc ||= Nokogiri::HTML(response_body)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Parse an HTML string for URLs
|
30
|
+
#
|
31
|
+
# @return [Array] the URLs
|
32
|
+
def parse_response_body
|
33
|
+
CSS_SELECTORS_MAP
|
34
|
+
.each_with_object([]) { |(*args), array| array << search_node(*args) }
|
35
|
+
.flatten
|
36
|
+
.map { |url| Absolutely.to_abs(base: response_url, relative: url) }
|
37
|
+
.uniq
|
38
|
+
end
|
39
|
+
|
40
|
+
def root_node
|
41
|
+
@root_node ||= doc.css('.h-entry .e-content').first || doc.css('.h-entry').first || doc.css('body')
|
42
|
+
end
|
43
|
+
|
44
|
+
def search_node(attribute, selectors)
|
45
|
+
NodeParser.nodes_from(root_node, selectors).map { |node| NodeParser.values_from(node, attribute) }.reject(&:empty?)
|
46
|
+
end
|
47
|
+
|
48
|
+
module NodeParser
|
49
|
+
class << self
|
50
|
+
# Search a node for matching elements
|
51
|
+
#
|
52
|
+
# @param node [Nokogiri::XML::Element]
|
53
|
+
# @param selectors [Array]
|
54
|
+
# @return [Nokogiri::XML::NodeSet]
|
55
|
+
def nodes_from(node, selectors)
|
56
|
+
node.css(*selectors)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Derive attribute values from a single node
|
60
|
+
#
|
61
|
+
# @param node [Nokogiri::XML::Element]
|
62
|
+
# @param attribute [Symbol]
|
63
|
+
# @return [Array] the HTML attribute values
|
64
|
+
def values_from(node, attribute)
|
65
|
+
return Array(node[attribute]) unless attribute == :srcset
|
66
|
+
|
67
|
+
node[attribute].split(',').map { |value| value.strip.match(/^\S+/).to_s }
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/webmention/version.rb
CHANGED
data/webmention.gemspec
CHANGED
@@ -1,34 +1,43 @@
|
|
1
|
-
lib = File.expand_path('
|
1
|
+
lib = File.expand_path('lib', __dir__)
|
2
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
|
3
4
|
require 'webmention/version'
|
4
5
|
|
5
|
-
Gem::Specification.new do |
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.required_ruby_version = ['>= 2.4', '< 2.7']
|
8
|
+
|
9
|
+
spec.name = 'webmention'
|
10
|
+
spec.version = Webmention::VERSION
|
11
|
+
spec.authors = ['Aaron Parecki', 'Nat Welch']
|
12
|
+
spec.email = ['aaron@parecki.com']
|
13
|
+
|
14
|
+
spec.summary = 'Webmention notification client'
|
15
|
+
spec.description = 'A Ruby gem for sending webmention notifications.'
|
16
|
+
spec.homepage = 'https://github.com/indieweb/webmention-client-ruby'
|
17
|
+
spec.license = 'Apache-2.0'
|
18
|
+
|
19
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(bin|test)/}) }
|
20
|
+
|
21
|
+
spec.require_paths = ['lib']
|
22
|
+
|
23
|
+
spec.metadata = {
|
24
|
+
'bug_tracker_uri' => "#{spec.homepage}/issues",
|
25
|
+
'changelog_uri' => "#{spec.homepage}/blob/v#{spec.version}/CHANGELOG.md"
|
26
|
+
}
|
27
|
+
|
28
|
+
spec.add_development_dependency 'minitest', '~> 5.11'
|
29
|
+
spec.add_development_dependency 'minitest-reporters', '~> 1.3'
|
30
|
+
spec.add_development_dependency 'rake', '~> 12.3'
|
31
|
+
spec.add_development_dependency 'reek', '~> 5.4'
|
32
|
+
spec.add_development_dependency 'rubocop', '~> 0.72.0'
|
33
|
+
spec.add_development_dependency 'rubocop-performance', '~> 1.4'
|
34
|
+
spec.add_development_dependency 'simplecov', '~> 0.17.0'
|
35
|
+
spec.add_development_dependency 'simplecov-console', '~> 0.5.0'
|
36
|
+
spec.add_development_dependency 'webmock', '~> 3.6'
|
37
|
+
|
38
|
+
spec.add_runtime_dependency 'absolutely', '~> 3.0'
|
39
|
+
spec.add_runtime_dependency 'addressable', '~> 2.6'
|
40
|
+
spec.add_runtime_dependency 'http', '~> 5.0.0.pre'
|
41
|
+
spec.add_runtime_dependency 'indieweb-endpoints', '~> 0.6.0'
|
42
|
+
spec.add_runtime_dependency 'nokogiri', '~> 1.10'
|
34
43
|
end
|