jekyll-webmention_io 3.3.7 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/jekyll/commands/webmention.rb +16 -8
- data/lib/jekyll/generators/compile_js.rb +1 -2
- data/lib/jekyll/generators/queue_webmentions.rb +146 -15
- data/lib/jekyll/tags/webmention.rb +1 -2
- data/lib/jekyll/webmention_io/version.rb +1 -1
- data/lib/jekyll/webmention_io/webmention_item.rb +5 -21
- data/lib/jekyll/webmention_io.rb +201 -18
- metadata +27 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '09dd03eeea574cdb024da147de7b2be165209a645a600aa709a650975b10d1b0'
|
4
|
+
data.tar.gz: 8493598855950605e57eb6142fa4e4a9e3f9b5b09323b100ed6823192ef3ece1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de0de587f76d245329a31d2800b7e9d4e53bfc7bbdf318e3adeaef0ade095c2ae865e670510e18d0ca77a32398f9eb7060b9a6acbf2afb78a4d62708576ab015
|
7
|
+
data.tar.gz: d3113c40d8d9674f5b842ce2f2ce777d33cfe3abab46ee1dc97474f2eb0788ea1cd7f8c60699dc0dcf10563cec3ac8963f3fc6715f64f0bfddabdba8b7455a53
|
@@ -26,18 +26,17 @@ module Jekyll
|
|
26
26
|
WebmentionIO.log "msg", "Getting ready to send webmentions (this may take a while)."
|
27
27
|
|
28
28
|
count = 0
|
29
|
+
max_attempts = WebmentionIO.max_attempts()
|
29
30
|
cached_outgoing = WebmentionIO.get_cache_file_path "outgoing"
|
30
31
|
if File.exist?(cached_outgoing)
|
31
32
|
outgoing = WebmentionIO.load_yaml(cached_outgoing)
|
32
33
|
outgoing.each do |source, targets|
|
33
34
|
targets.each do |target, response|
|
34
35
|
# skip ones we’ve handled
|
35
|
-
next unless response == false
|
36
|
+
next unless response == false or response.instance_of? Integer
|
36
37
|
|
37
|
-
#
|
38
|
-
if target.index("//").zero?
|
39
|
-
target = "http:#{target}"
|
40
|
-
end
|
38
|
+
# skip protocol-less links, we'll need to revisit this again later
|
39
|
+
next if target.index("//").zero?
|
41
40
|
|
42
41
|
# produce an escaped version of the target (in case of special
|
43
42
|
# characters, etc).
|
@@ -46,6 +45,17 @@ module Jekyll
|
|
46
45
|
# skip bad URLs
|
47
46
|
next unless WebmentionIO.uri_ok?(escaped)
|
48
47
|
|
48
|
+
# give up if we've attempted this too many times
|
49
|
+
response = (response || 0) + 1
|
50
|
+
|
51
|
+
if ! max_attempts.nil? and response > max_attempts
|
52
|
+
outgoing[source][target] = ""
|
53
|
+
WebmentionIO.log "msg", "Giving up sending from #{source} to #{target}."
|
54
|
+
next
|
55
|
+
else
|
56
|
+
outgoing[source][target] = response
|
57
|
+
end
|
58
|
+
|
49
59
|
# get the endpoint
|
50
60
|
endpoint = WebmentionIO.get_webmention_endpoint(escaped)
|
51
61
|
next unless endpoint
|
@@ -64,9 +74,7 @@ module Jekyll
|
|
64
74
|
count += 1
|
65
75
|
end
|
66
76
|
end
|
67
|
-
|
68
|
-
WebmentionIO.dump_yaml(cached_outgoing, outgoing)
|
69
|
-
end
|
77
|
+
WebmentionIO.dump_yaml(cached_outgoing, outgoing)
|
70
78
|
WebmentionIO.log "msg", "#{count} webmentions sent."
|
71
79
|
end # file exists (outgoing)
|
72
80
|
end # def process
|
@@ -18,7 +18,6 @@ module Jekyll
|
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
-
using StringInflection
|
22
21
|
class CompileJS < Generator
|
23
22
|
safe true
|
24
23
|
priority :low
|
@@ -61,7 +60,7 @@ module Jekyll
|
|
61
60
|
def add_webmention_types
|
62
61
|
js_types = []
|
63
62
|
WebmentionIO.types.each do |type|
|
64
|
-
js_types.push "'#{type}': '#{type
|
63
|
+
js_types.push "'#{type}': '#{ActiveSupport::Inflector.singularize(type)}'"
|
65
64
|
end
|
66
65
|
types_js = <<-EOF
|
67
66
|
;(function(window,JekyllWebmentionIO){
|
@@ -8,6 +8,8 @@
|
|
8
8
|
# This generator caches sites you mention so they can be mentioned
|
9
9
|
#
|
10
10
|
|
11
|
+
require "jsonpath"
|
12
|
+
|
11
13
|
module Jekyll
|
12
14
|
module WebmentionIO
|
13
15
|
class QueueWebmentions < Generator
|
@@ -17,6 +19,7 @@ module Jekyll
|
|
17
19
|
def generate(site)
|
18
20
|
@site = site
|
19
21
|
@site_url = site.config["url"].to_s
|
22
|
+
@syndication = site.config.dig("webmentions", "syndication")
|
20
23
|
|
21
24
|
if @site.config['serving']
|
22
25
|
Jekyll::WebmentionIO.log "msg", "Webmentions lookups are not run when running `jekyll serve`."
|
@@ -31,52 +34,180 @@ module Jekyll
|
|
31
34
|
return
|
32
35
|
end
|
33
36
|
|
34
|
-
if @
|
35
|
-
WebmentionIO.log "info", "Webmention lookups are currently paused."
|
36
|
-
return
|
37
|
-
end
|
37
|
+
compile_jsonpath_expressions() if ! @syndication.nil?
|
38
38
|
|
39
|
-
WebmentionIO.log "msg", "
|
39
|
+
WebmentionIO.log "msg", "Collecting webmentions you’ve made. This may take a while."
|
40
40
|
|
41
41
|
upgrade_outgoing_webmention_cache
|
42
42
|
|
43
|
-
posts = WebmentionIO.gather_documents(@site)
|
44
|
-
|
43
|
+
posts = WebmentionIO.gather_documents(@site).select { |p| ! p.data["draft"] }
|
45
44
|
gather_webmentions(posts)
|
46
45
|
end
|
47
46
|
|
48
47
|
private
|
49
48
|
|
49
|
+
def compile_jsonpath_expressions()
|
50
|
+
@syndication.each do | target, config |
|
51
|
+
next if ! config.key? "response_mapping"
|
52
|
+
|
53
|
+
mapping = config["response_mapping"]
|
54
|
+
|
55
|
+
mapping.clone.each do | key, pattern |
|
56
|
+
begin
|
57
|
+
mapping[key] = JsonPath.new(pattern)
|
58
|
+
rescue StandardError => e
|
59
|
+
WebmentionIO.log "error", "Ignoring invalid JsonPath expression #{pattern}: #{e}"
|
60
|
+
|
61
|
+
mapping.delete(key)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def combine_values(a, b)
|
68
|
+
return case [ a.instance_of?(Array), b.instance_of?(Array) ]
|
69
|
+
when [ false, false ]
|
70
|
+
[ a, b ]
|
71
|
+
when [ false, true ]
|
72
|
+
[ a ] + b
|
73
|
+
when [ true, false ]
|
74
|
+
a << b
|
75
|
+
when [ true, true ]
|
76
|
+
a + b
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def process_syndication(post, target, response)
|
81
|
+
# If this is a syndication target, and we have a response,
|
82
|
+
# and the syndication entry contains a response mapping, then
|
83
|
+
# go through that map and store the selected values into
|
84
|
+
# the page front matter.
|
85
|
+
|
86
|
+
response = JSON.generate(response)
|
87
|
+
|
88
|
+
target["response_mapping"].each do |key, pattern|
|
89
|
+
result = pattern.on(response)
|
90
|
+
|
91
|
+
if ! result
|
92
|
+
WebmentionIO.log "msg", "The path #{skey} doesn't exist in the response from #{target['endpoint']} for #{uri}"
|
93
|
+
next
|
94
|
+
elsif result.length == 1
|
95
|
+
result = result.first
|
96
|
+
end
|
97
|
+
|
98
|
+
if post.data[key].nil?
|
99
|
+
post.data[key] = result
|
100
|
+
else
|
101
|
+
post.data[key] = combine_values(post.data[key], result)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def get_collection_for_post(post)
|
107
|
+
@site.collections.each do |name, collection|
|
108
|
+
next if name == "posts"
|
109
|
+
|
110
|
+
return collection if collection.docs.include? post
|
111
|
+
end
|
112
|
+
|
113
|
+
return nil
|
114
|
+
end
|
115
|
+
|
116
|
+
def get_syndication_target(uri)
|
117
|
+
return nil if @syndication.nil?
|
118
|
+
|
119
|
+
@syndication.values.detect { |t| t["endpoint"] == uri }
|
120
|
+
end
|
121
|
+
|
50
122
|
def gather_webmentions(posts)
|
51
123
|
webmentions = WebmentionIO.read_cached_webmentions "outgoing"
|
52
124
|
|
53
125
|
posts.each do |post|
|
54
|
-
|
126
|
+
# Collect potential outgoing webmentions in this post.
|
55
127
|
mentions = get_mentioned_uris(post)
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
128
|
+
|
129
|
+
mentions.each do |mentioned_uri, response|
|
130
|
+
# If this webmention was a product of a syndication instruction,
|
131
|
+
# this goes back into the configuration and pulls that syndication
|
132
|
+
# target config out.
|
133
|
+
#
|
134
|
+
# If this is just a normal webmention, this will return nil.
|
135
|
+
target = get_syndication_target(mentioned_uri)
|
136
|
+
|
137
|
+
fulluri = File.join(@site_url, post.url)
|
138
|
+
shorturi = post.data["shorturl"] || fulluri
|
139
|
+
|
140
|
+
# Old cached responses might use either the full or short URIs so
|
141
|
+
# we need to check for both.
|
142
|
+
cached_response =
|
143
|
+
webmentions.dig(shorturi, mentioned_uri) ||
|
144
|
+
webmentions.dig(fulluri, mentioned_uri)
|
145
|
+
|
146
|
+
if cached_response.nil?
|
147
|
+
if ! target.nil?
|
148
|
+
uri = target["shorturl"] ? shorturi : fulluri
|
149
|
+
|
150
|
+
if target.key? "fragment"
|
151
|
+
uri += "#" + target["fragment"]
|
152
|
+
end
|
153
|
+
else
|
154
|
+
uri = fulluri
|
60
155
|
end
|
156
|
+
|
157
|
+
webmentions[uri] ||= {}
|
158
|
+
webmentions[uri][mentioned_uri] = response
|
159
|
+
elsif ! target.nil? and target.key? "response_mapping"
|
160
|
+
process_syndication(post, target, cached_response)
|
61
161
|
end
|
62
|
-
else
|
63
|
-
webmentions[uri] = mentions
|
64
162
|
end
|
65
163
|
end
|
66
164
|
|
67
|
-
|
165
|
+
# This check is moved down here because we still need the steps
|
166
|
+
# above to populate frontmatter during the site build, even
|
167
|
+
# if we're not going to modify the webmention cache.
|
168
|
+
|
169
|
+
if @site.config.dig("webmentions", "pause_lookups")
|
170
|
+
WebmentionIO.log "info", "Webmention lookups are currently paused."
|
171
|
+
return
|
172
|
+
else
|
173
|
+
WebmentionIO.cache_webmentions "outgoing", webmentions
|
174
|
+
end
|
68
175
|
end
|
69
176
|
|
70
177
|
def get_mentioned_uris(post)
|
178
|
+
collection = get_collection_for_post(post)
|
179
|
+
|
71
180
|
uris = {}
|
181
|
+
|
182
|
+
syndication_targets = []
|
183
|
+
syndication_targets += post.data["syndicate_to"] || []
|
184
|
+
|
185
|
+
if ! collection.nil?
|
186
|
+
syndication_targets += collection.metadata["syndicate_to"] || []
|
187
|
+
end
|
188
|
+
|
189
|
+
syndication_targets.each do |endpoint|
|
190
|
+
if @syndication.key? endpoint
|
191
|
+
uris[@syndication[endpoint]["endpoint"]] = false
|
192
|
+
else
|
193
|
+
WebmentionIO.log "msg", "Found reference to syndication endpoint \"#{endpoint}\" without matching entry in configuration."
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
72
197
|
if post.data["in_reply_to"]
|
73
198
|
uris[post.data["in_reply_to"]] = false
|
74
199
|
end
|
200
|
+
|
201
|
+
if post.data["bookmark_of"]
|
202
|
+
uris[post.data["bookmark_of"]] = false
|
203
|
+
end
|
204
|
+
|
75
205
|
post.content.scan(/(?:https?:)?\/\/[^\s)#\[\]{}<>%|\^"']+/) do |match|
|
76
206
|
unless uris.key? match
|
77
207
|
uris[match] = false
|
78
208
|
end
|
79
209
|
end
|
210
|
+
|
80
211
|
return uris
|
81
212
|
end
|
82
213
|
|
@@ -11,7 +11,6 @@ require "htmlbeautifier"
|
|
11
11
|
|
12
12
|
module Jekyll
|
13
13
|
module WebmentionIO
|
14
|
-
using StringInflection
|
15
14
|
class WebmentionTag < Liquid::Tag
|
16
15
|
def initialize(tag_name, text, tokens)
|
17
16
|
super
|
@@ -50,7 +49,7 @@ module Jekyll
|
|
50
49
|
if !WebmentionIO.types.include? type
|
51
50
|
WebmentionIO.log "warn", "#{type} are not extractable"
|
52
51
|
else
|
53
|
-
type = type
|
52
|
+
type = ActiveSupport::Inflector.singularize(type)
|
54
53
|
WebmentionIO.log "info", "Searching #{webmentions.length} webmentions for type==#{type}"
|
55
54
|
if webmentions.is_a? Hash
|
56
55
|
webmentions = webmentions.values
|
@@ -50,20 +50,6 @@ module Jekyll
|
|
50
50
|
@content = determine_content
|
51
51
|
end
|
52
52
|
|
53
|
-
def markdownify(string)
|
54
|
-
@converter ||= @site.find_converter_instance(Jekyll::Converters::Markdown)
|
55
|
-
|
56
|
-
if string
|
57
|
-
string = @converter.convert(string.to_s)
|
58
|
-
unless string.start_with?("<p")
|
59
|
-
string = string.sub(/^<[^>]+>/, "<p>").sub(/<\/[^>]+>$/, "</p>")
|
60
|
-
end
|
61
|
-
string.strip
|
62
|
-
else
|
63
|
-
string
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
53
|
def determine_uri
|
68
54
|
@raw["data"]["url"] || @raw["source"]
|
69
55
|
end
|
@@ -164,13 +150,11 @@ module Jekyll
|
|
164
150
|
end
|
165
151
|
|
166
152
|
def determine_content
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
markdownify(content)
|
153
|
+
if %w(post reply link).include? @type
|
154
|
+
@raw.dig("data", "content")
|
155
|
+
else
|
156
|
+
@raw.dig("activity", "sentence_html")
|
157
|
+
end
|
174
158
|
end
|
175
159
|
end
|
176
160
|
end
|
data/lib/jekyll/webmention_io.rb
CHANGED
@@ -15,16 +15,30 @@ require "json"
|
|
15
15
|
require "net/http"
|
16
16
|
require "uri"
|
17
17
|
require "openssl"
|
18
|
-
require "
|
18
|
+
require "active_support"
|
19
19
|
require "indieweb/endpoints"
|
20
20
|
require "webmention"
|
21
21
|
|
22
22
|
module Jekyll
|
23
23
|
module WebmentionIO
|
24
|
+
module UriState
|
25
|
+
UNSUPPORTED = "unsupported"
|
26
|
+
ERROR = "error"
|
27
|
+
FAILURE = "failure"
|
28
|
+
SUCCESS = "success"
|
29
|
+
end
|
30
|
+
|
31
|
+
module UriPolicy
|
32
|
+
BAN = "ban"
|
33
|
+
IGNORE = "ignore"
|
34
|
+
RETRY = "retry"
|
35
|
+
end
|
36
|
+
|
24
37
|
class << self
|
25
38
|
# define simple getters and setters
|
26
39
|
attr_reader :config, :jekyll_config, :cache_files, :cache_folder,
|
27
|
-
:file_prefix, :types, :supported_templates, :js_handler
|
40
|
+
:file_prefix, :types, :supported_templates, :js_handler,
|
41
|
+
:uri_whitelist, :uri_blacklist
|
28
42
|
attr_writer :api_suffix
|
29
43
|
end
|
30
44
|
|
@@ -69,6 +83,18 @@ module Jekyll
|
|
69
83
|
end
|
70
84
|
|
71
85
|
@js_handler = WebmentionIO::JSHandler.new(site)
|
86
|
+
|
87
|
+
@uri_whitelist = @config
|
88
|
+
.fetch("bad_uri_policy", {})
|
89
|
+
.fetch("whitelist", [])
|
90
|
+
.clone
|
91
|
+
.insert(-1, "^https?://webmention.io/")
|
92
|
+
.map { |expr| Regexp.new(expr) }
|
93
|
+
|
94
|
+
@uri_blacklist = @config
|
95
|
+
.fetch("bad_uri_policy", {})
|
96
|
+
.fetch("blacklist", [])
|
97
|
+
.map { |expr| Regexp.new(expr) }
|
72
98
|
end
|
73
99
|
|
74
100
|
# Setter
|
@@ -81,6 +107,10 @@ module Jekyll
|
|
81
107
|
Jekyll.sanitized_path(@cache_folder, "#{@file_prefix}#{filename}")
|
82
108
|
end
|
83
109
|
|
110
|
+
def self.max_attempts()
|
111
|
+
@config.dig("max_attempts")
|
112
|
+
end
|
113
|
+
|
84
114
|
def self.get_cache_file_path(key)
|
85
115
|
@cache_files[key] || false
|
86
116
|
end
|
@@ -213,11 +243,11 @@ module Jekyll
|
|
213
243
|
endpoint = IndieWeb::Endpoints.get(uri)[:webmention]
|
214
244
|
unless endpoint
|
215
245
|
log("info", "Could not find a webmention endpoint at #{uri}")
|
216
|
-
|
246
|
+
update_uri_cache(uri, UriState::UNSUPPORTED)
|
217
247
|
end
|
218
248
|
rescue StandardError => e
|
219
249
|
log "info", "Endpoint lookup failed for #{uri}: #{e.message}"
|
220
|
-
|
250
|
+
update_uri_cache(uri, UriState::FAILURE)
|
221
251
|
endpoint = false
|
222
252
|
end
|
223
253
|
endpoint
|
@@ -231,10 +261,24 @@ module Jekyll
|
|
231
261
|
case response.code
|
232
262
|
when 200, 201, 202
|
233
263
|
log "info", "Webmention successful!"
|
264
|
+
update_uri_cache(target, UriState::SUCCESS)
|
234
265
|
response.body
|
235
266
|
else
|
236
267
|
log "info", response.inspect
|
237
268
|
log "info", "Webmention failed, but will remain queued for next time"
|
269
|
+
|
270
|
+
if response.body
|
271
|
+
begin
|
272
|
+
body = JSON.parse(response.body)
|
273
|
+
|
274
|
+
if body.key? "error"
|
275
|
+
log "msg", "Endpoint returned error: #{body['error']}"
|
276
|
+
end
|
277
|
+
rescue
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
update_uri_cache(target, UriState::ERROR)
|
238
282
|
false
|
239
283
|
end
|
240
284
|
end
|
@@ -291,12 +335,12 @@ module Jekyll
|
|
291
335
|
redirect_to = redirect_to.relative? ? "#{original_uri.scheme}://#{original_uri.host}" + redirect_to.to_s : redirect_to.to_s
|
292
336
|
return get_uri_source(redirect_to, redirect_limit - 1, original_uri)
|
293
337
|
else
|
294
|
-
|
338
|
+
update_uri_cache(uri, UriState::FAILURE)
|
295
339
|
return false
|
296
340
|
end
|
297
341
|
else
|
298
342
|
log("warn", "too many redirects for #{original_uri}") if original_uri
|
299
|
-
|
343
|
+
update_uri_cache(uri, UriState::FAILURE)
|
300
344
|
return false
|
301
345
|
end
|
302
346
|
end
|
@@ -347,37 +391,176 @@ module Jekyll
|
|
347
391
|
return response
|
348
392
|
rescue *EXCEPTIONS => e
|
349
393
|
log "warn", "Got an error checking #{uri}: #{e}"
|
350
|
-
|
394
|
+
update_uri_cache(uri, UriState::FAILURE)
|
351
395
|
return false
|
352
396
|
end
|
353
397
|
end
|
354
398
|
|
355
|
-
#
|
356
|
-
|
399
|
+
# Given the provided state value (see UriState), retrieve the policy
|
400
|
+
# entry. If no entry exists, return a new default entry that
|
401
|
+
# indicates unlimited retries.
|
402
|
+
def self.get_bad_uri_policy_entry(state)
|
403
|
+
settings = @config.fetch("bad_uri_policy", {})
|
404
|
+
|
405
|
+
default_policy = { "policy" => UriPolicy::RETRY }
|
406
|
+
policy_entry = nil
|
407
|
+
|
408
|
+
# Retrieve the policy entry, the default entry, or the canned default
|
409
|
+
policy_entry = settings.fetch(state) {
|
410
|
+
settings.fetch("default", default_policy)
|
411
|
+
}
|
412
|
+
|
413
|
+
# Convert shorthand entry to full policy record
|
414
|
+
if policy_entry.instance_of? String
|
415
|
+
policy_entry = { "policy" => policy_entry }
|
416
|
+
end
|
417
|
+
|
418
|
+
if policy_entry["policy"] == UriPolicy::RETRY and ! policy_entry.key? "retry_delay"
|
419
|
+
# If this is a retry policy and no delay is set, set up the default
|
420
|
+
# delay policy. This inherits from the legacy cache_bad_uris_for
|
421
|
+
# setting to enable backward compatibility with older configurations.
|
422
|
+
#
|
423
|
+
# We do this here to make the rule enforcement logic a little tidier.
|
424
|
+
|
425
|
+
policy_entry["retry_delay"] = [ @config.fetch("cache_bad_uris_for", 1) * 24 ]
|
426
|
+
end
|
427
|
+
|
428
|
+
return policy_entry
|
429
|
+
end
|
430
|
+
|
431
|
+
# Retrieve the bad_uris cache entry for the given URI. This method
|
432
|
+
# takes the cache and a URI instance (i.e. parsing must already be done).
|
433
|
+
#
|
434
|
+
# If the URI has no entry in the cache, returns nil and *not* a default
|
435
|
+
# entry.
|
436
|
+
def self.get_bad_uri_cache_entry(bad_uris, uri)
|
437
|
+
return nil if ! bad_uris.key? uri.host
|
438
|
+
|
439
|
+
entry = bad_uris[uri.host].clone
|
440
|
+
|
441
|
+
if entry.instance_of? String
|
442
|
+
# Older version of the bad URL cache, convert to new format with some
|
443
|
+
# "sensible" defaults.
|
444
|
+
|
445
|
+
entry = {
|
446
|
+
"state" => UriState::UNSUPPORTED,
|
447
|
+
"last_checked" => DateTime.parse(entry).to_time,
|
448
|
+
"attempts" => 1
|
449
|
+
}
|
450
|
+
else
|
451
|
+
# Otherwise, parse the check time into a real Time object before
|
452
|
+
# returning the entry.
|
453
|
+
#
|
454
|
+
# We convert to a Time object so we can do arithmetic on it later.
|
455
|
+
|
456
|
+
entry["last_checked"] = DateTime.parse(entry["last_checked"]).to_time
|
457
|
+
end
|
458
|
+
|
459
|
+
return entry
|
460
|
+
end
|
461
|
+
|
462
|
+
# Update the URI cache for this entry.
|
463
|
+
#
|
464
|
+
# If the state is UriState.SUCCESS or the URI is whitelisted or
|
465
|
+
# blacklisted, we delete any existing entries since no policy will
|
466
|
+
# apply. This ensures we reset the policy state when a webmention
|
467
|
+
# succeeds.
|
468
|
+
#
|
469
|
+
# Otherwise, we either create or update an entry for the URI, recording
|
470
|
+
# the state and the current attempt counter.
|
471
|
+
def self.update_uri_cache(uri, state)
|
357
472
|
uri = URI::Parser.new.parse(uri.to_s)
|
358
|
-
|
359
|
-
return if uri.host == "webmention.io"
|
473
|
+
uri_str = uri.to_s
|
360
474
|
|
361
475
|
cache_file = @cache_files["bad_uris"]
|
362
476
|
bad_uris = load_yaml(cache_file)
|
363
|
-
|
477
|
+
|
478
|
+
if state == UriState::SUCCESS or
|
479
|
+
@uri_whitelist.any? { |expr| expr.match uri_str } or
|
480
|
+
@uri_blacklist.any? { |expr| expr.match uri_str }
|
481
|
+
|
482
|
+
return if bad_uris.delete(uri.host).nil?
|
483
|
+
else
|
484
|
+
old_entry = get_bad_uri_cache_entry(bad_uris, uri) || {}
|
485
|
+
|
486
|
+
bad_uris[uri.host] = {
|
487
|
+
"state" => state,
|
488
|
+
"attempts" => old_entry.fetch("attempts", 0) + 1,
|
489
|
+
"last_checked" => Time.now.to_s
|
490
|
+
}
|
491
|
+
end
|
492
|
+
|
364
493
|
dump_yaml(cache_file, bad_uris)
|
365
494
|
end
|
366
495
|
|
496
|
+
# Check if we should attempt to send a webmention to the given URI based
|
497
|
+
# on the error handling policy and the last attempt.
|
367
498
|
def self.uri_ok?(uri)
|
368
499
|
uri = URI::Parser.new.parse(uri.to_s)
|
369
500
|
now = Time.now.to_s
|
501
|
+
uri_str = uri.to_s
|
502
|
+
|
503
|
+
# If the URI is whitelisted, it's always ok!
|
504
|
+
return true if @uri_whitelist.any? { |expr| expr.match uri_str }
|
505
|
+
|
506
|
+
# If the URI is blacklisted, it's never ok!
|
507
|
+
return false if @uri_blacklist.any? { |expr| expr.match uri_str }
|
508
|
+
|
370
509
|
bad_uris = load_yaml(@cache_files["bad_uris"])
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
510
|
+
entry = get_bad_uri_cache_entry(bad_uris, uri)
|
511
|
+
|
512
|
+
# If the entry isn't in our cache yet, then it's ok.
|
513
|
+
return true if entry.nil?
|
514
|
+
|
515
|
+
# Okay, the last time we tried to send a webmention to this URI it
|
516
|
+
# failed, so depending on what happened and the policy, we need to
|
517
|
+
# decide what to do.
|
518
|
+
#
|
519
|
+
# First pull the retry policy given the type of the last error for the URI
|
520
|
+
policy_entry = get_bad_uri_policy_entry(entry["state"])
|
521
|
+
policy = policy_entry["policy"]
|
522
|
+
|
523
|
+
if policy == UriPolicy::BAN
|
524
|
+
return false
|
525
|
+
elsif policy == UriPolicy::IGNORE
|
526
|
+
return true
|
527
|
+
elsif policy == UriPolicy::RETRY
|
528
|
+
now = Time.now
|
529
|
+
|
530
|
+
attempts = entry["attempts"]
|
531
|
+
max_attempts = policy_entry["max_attempts"]
|
532
|
+
|
533
|
+
if ! max_attempts.nil? and attempts >= max_attempts
|
534
|
+
# If there's a retry limit and we've hit it, URI is not ok.
|
535
|
+
log "msg", "Skipping #{uri}, attempted #{attempts} times and max is #{max_attempts}"
|
536
|
+
|
537
|
+
return false
|
538
|
+
end
|
539
|
+
|
540
|
+
retry_delay = policy_entry["retry_delay"]
|
541
|
+
|
542
|
+
# Sneaky trick. By clamping to the array length, the last entry in
|
543
|
+
# the retry_delay list is used for all remaining retries.
|
544
|
+
delay = retry_delay[(attempts - 1).clamp(0, retry_delay.length - 1)]
|
545
|
+
|
546
|
+
recheck_at = (entry["last_checked"] + delay * 3600)
|
547
|
+
|
548
|
+
if recheck_at.to_r > now.to_r
|
549
|
+
log "msg", "Skipping #{uri}, next attempt will happen after #{recheck_at}"
|
550
|
+
|
551
|
+
return false
|
552
|
+
end
|
553
|
+
else
|
554
|
+
log "error", "Invalid bad URI policy type: #{policy}"
|
376
555
|
end
|
556
|
+
|
377
557
|
return true
|
378
558
|
end
|
379
559
|
|
380
|
-
private_class_method :get_http_response,
|
560
|
+
private_class_method :get_http_response,
|
561
|
+
:get_bad_uri_policy_entry,
|
562
|
+
:get_bad_uri_cache_entry,
|
563
|
+
:update_uri_cache
|
381
564
|
end
|
382
565
|
end
|
383
566
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-webmention_io
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aaron Gustafson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-07-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: jekyll
|
@@ -65,19 +65,25 @@ dependencies:
|
|
65
65
|
- !ruby/object:Gem::Version
|
66
66
|
version: '4.0'
|
67
67
|
- !ruby/object:Gem::Dependency
|
68
|
-
name:
|
68
|
+
name: activesupport
|
69
69
|
requirement: !ruby/object:Gem::Requirement
|
70
70
|
requirements:
|
71
71
|
- - "~>"
|
72
72
|
- !ruby/object:Gem::Version
|
73
|
-
version: '0
|
73
|
+
version: '7.0'
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 7.0.4.3
|
74
77
|
type: :runtime
|
75
78
|
prerelease: false
|
76
79
|
version_requirements: !ruby/object:Gem::Requirement
|
77
80
|
requirements:
|
78
81
|
- - "~>"
|
79
82
|
- !ruby/object:Gem::Version
|
80
|
-
version: '0
|
83
|
+
version: '7.0'
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: 7.0.4.3
|
81
87
|
- !ruby/object:Gem::Dependency
|
82
88
|
name: htmlbeautifier
|
83
89
|
requirement: !ruby/object:Gem::Requirement
|
@@ -120,6 +126,20 @@ dependencies:
|
|
120
126
|
- - "~>"
|
121
127
|
- !ruby/object:Gem::Version
|
122
128
|
version: '7.0'
|
129
|
+
- !ruby/object:Gem::Dependency
|
130
|
+
name: jsonpath
|
131
|
+
requirement: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - "~>"
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: 1.0.1
|
136
|
+
type: :runtime
|
137
|
+
prerelease: false
|
138
|
+
version_requirements: !ruby/object:Gem::Requirement
|
139
|
+
requirements:
|
140
|
+
- - "~>"
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
version: 1.0.1
|
123
143
|
- !ruby/object:Gem::Dependency
|
124
144
|
name: bundler
|
125
145
|
requirement: !ruby/object:Gem::Requirement
|
@@ -154,14 +174,14 @@ dependencies:
|
|
154
174
|
requirements:
|
155
175
|
- - "~>"
|
156
176
|
- !ruby/object:Gem::Version
|
157
|
-
version: '
|
177
|
+
version: '13.0'
|
158
178
|
type: :development
|
159
179
|
prerelease: false
|
160
180
|
version_requirements: !ruby/object:Gem::Requirement
|
161
181
|
requirements:
|
162
182
|
- - "~>"
|
163
183
|
- !ruby/object:Gem::Version
|
164
|
-
version: '
|
184
|
+
version: '13.0'
|
165
185
|
- !ruby/object:Gem::Dependency
|
166
186
|
name: rspec
|
167
187
|
requirement: !ruby/object:Gem::Requirement
|