jekyll-webmention_io 3.3.7 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/jekyll/commands/webmention.rb +16 -8
- data/lib/jekyll/generators/compile_js.rb +1 -2
- data/lib/jekyll/generators/queue_webmentions.rb +146 -15
- data/lib/jekyll/tags/webmention.rb +1 -2
- data/lib/jekyll/webmention_io/version.rb +1 -1
- data/lib/jekyll/webmention_io/webmention_item.rb +5 -21
- data/lib/jekyll/webmention_io.rb +201 -18
- metadata +27 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '09dd03eeea574cdb024da147de7b2be165209a645a600aa709a650975b10d1b0'
|
4
|
+
data.tar.gz: 8493598855950605e57eb6142fa4e4a9e3f9b5b09323b100ed6823192ef3ece1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de0de587f76d245329a31d2800b7e9d4e53bfc7bbdf318e3adeaef0ade095c2ae865e670510e18d0ca77a32398f9eb7060b9a6acbf2afb78a4d62708576ab015
|
7
|
+
data.tar.gz: d3113c40d8d9674f5b842ce2f2ce777d33cfe3abab46ee1dc97474f2eb0788ea1cd7f8c60699dc0dcf10563cec3ac8963f3fc6715f64f0bfddabdba8b7455a53
|
@@ -26,18 +26,17 @@ module Jekyll
|
|
26
26
|
WebmentionIO.log "msg", "Getting ready to send webmentions (this may take a while)."
|
27
27
|
|
28
28
|
count = 0
|
29
|
+
max_attempts = WebmentionIO.max_attempts()
|
29
30
|
cached_outgoing = WebmentionIO.get_cache_file_path "outgoing"
|
30
31
|
if File.exist?(cached_outgoing)
|
31
32
|
outgoing = WebmentionIO.load_yaml(cached_outgoing)
|
32
33
|
outgoing.each do |source, targets|
|
33
34
|
targets.each do |target, response|
|
34
35
|
# skip ones we’ve handled
|
35
|
-
next unless response == false
|
36
|
+
next unless response == false or response.instance_of? Integer
|
36
37
|
|
37
|
-
#
|
38
|
-
if target.index("//").zero?
|
39
|
-
target = "http:#{target}"
|
40
|
-
end
|
38
|
+
# skip protocol-less links, we'll need to revisit this again later
|
39
|
+
next if target.index("//").zero?
|
41
40
|
|
42
41
|
# produce an escaped version of the target (in case of special
|
43
42
|
# characters, etc).
|
@@ -46,6 +45,17 @@ module Jekyll
|
|
46
45
|
# skip bad URLs
|
47
46
|
next unless WebmentionIO.uri_ok?(escaped)
|
48
47
|
|
48
|
+
# give up if we've attempted this too many times
|
49
|
+
response = (response || 0) + 1
|
50
|
+
|
51
|
+
if ! max_attempts.nil? and response > max_attempts
|
52
|
+
outgoing[source][target] = ""
|
53
|
+
WebmentionIO.log "msg", "Giving up sending from #{source} to #{target}."
|
54
|
+
next
|
55
|
+
else
|
56
|
+
outgoing[source][target] = response
|
57
|
+
end
|
58
|
+
|
49
59
|
# get the endpoint
|
50
60
|
endpoint = WebmentionIO.get_webmention_endpoint(escaped)
|
51
61
|
next unless endpoint
|
@@ -64,9 +74,7 @@ module Jekyll
|
|
64
74
|
count += 1
|
65
75
|
end
|
66
76
|
end
|
67
|
-
|
68
|
-
WebmentionIO.dump_yaml(cached_outgoing, outgoing)
|
69
|
-
end
|
77
|
+
WebmentionIO.dump_yaml(cached_outgoing, outgoing)
|
70
78
|
WebmentionIO.log "msg", "#{count} webmentions sent."
|
71
79
|
end # file exists (outgoing)
|
72
80
|
end # def process
|
@@ -18,7 +18,6 @@ module Jekyll
|
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
-
using StringInflection
|
22
21
|
class CompileJS < Generator
|
23
22
|
safe true
|
24
23
|
priority :low
|
@@ -61,7 +60,7 @@ module Jekyll
|
|
61
60
|
def add_webmention_types
|
62
61
|
js_types = []
|
63
62
|
WebmentionIO.types.each do |type|
|
64
|
-
js_types.push "'#{type}': '#{type
|
63
|
+
js_types.push "'#{type}': '#{ActiveSupport::Inflector.singularize(type)}'"
|
65
64
|
end
|
66
65
|
types_js = <<-EOF
|
67
66
|
;(function(window,JekyllWebmentionIO){
|
@@ -8,6 +8,8 @@
|
|
8
8
|
# This generator caches sites you mention so they can be mentioned
|
9
9
|
#
|
10
10
|
|
11
|
+
require "jsonpath"
|
12
|
+
|
11
13
|
module Jekyll
|
12
14
|
module WebmentionIO
|
13
15
|
class QueueWebmentions < Generator
|
@@ -17,6 +19,7 @@ module Jekyll
|
|
17
19
|
def generate(site)
|
18
20
|
@site = site
|
19
21
|
@site_url = site.config["url"].to_s
|
22
|
+
@syndication = site.config.dig("webmentions", "syndication")
|
20
23
|
|
21
24
|
if @site.config['serving']
|
22
25
|
Jekyll::WebmentionIO.log "msg", "Webmentions lookups are not run when running `jekyll serve`."
|
@@ -31,52 +34,180 @@ module Jekyll
|
|
31
34
|
return
|
32
35
|
end
|
33
36
|
|
34
|
-
if @
|
35
|
-
WebmentionIO.log "info", "Webmention lookups are currently paused."
|
36
|
-
return
|
37
|
-
end
|
37
|
+
compile_jsonpath_expressions() if ! @syndication.nil?
|
38
38
|
|
39
|
-
WebmentionIO.log "msg", "
|
39
|
+
WebmentionIO.log "msg", "Collecting webmentions you’ve made. This may take a while."
|
40
40
|
|
41
41
|
upgrade_outgoing_webmention_cache
|
42
42
|
|
43
|
-
posts = WebmentionIO.gather_documents(@site)
|
44
|
-
|
43
|
+
posts = WebmentionIO.gather_documents(@site).select { |p| ! p.data["draft"] }
|
45
44
|
gather_webmentions(posts)
|
46
45
|
end
|
47
46
|
|
48
47
|
private
|
49
48
|
|
49
|
+
def compile_jsonpath_expressions()
|
50
|
+
@syndication.each do | target, config |
|
51
|
+
next if ! config.key? "response_mapping"
|
52
|
+
|
53
|
+
mapping = config["response_mapping"]
|
54
|
+
|
55
|
+
mapping.clone.each do | key, pattern |
|
56
|
+
begin
|
57
|
+
mapping[key] = JsonPath.new(pattern)
|
58
|
+
rescue StandardError => e
|
59
|
+
WebmentionIO.log "error", "Ignoring invalid JsonPath expression #{pattern}: #{e}"
|
60
|
+
|
61
|
+
mapping.delete(key)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def combine_values(a, b)
|
68
|
+
return case [ a.instance_of?(Array), b.instance_of?(Array) ]
|
69
|
+
when [ false, false ]
|
70
|
+
[ a, b ]
|
71
|
+
when [ false, true ]
|
72
|
+
[ a ] + b
|
73
|
+
when [ true, false ]
|
74
|
+
a << b
|
75
|
+
when [ true, true ]
|
76
|
+
a + b
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def process_syndication(post, target, response)
|
81
|
+
# If this is a syndication target, and we have a response,
|
82
|
+
# and the syndication entry contains a response mapping, then
|
83
|
+
# go through that map and store the selected values into
|
84
|
+
# the page front matter.
|
85
|
+
|
86
|
+
response = JSON.generate(response)
|
87
|
+
|
88
|
+
target["response_mapping"].each do |key, pattern|
|
89
|
+
result = pattern.on(response)
|
90
|
+
|
91
|
+
if ! result
|
92
|
+
WebmentionIO.log "msg", "The path #{skey} doesn't exist in the response from #{target['endpoint']} for #{uri}"
|
93
|
+
next
|
94
|
+
elsif result.length == 1
|
95
|
+
result = result.first
|
96
|
+
end
|
97
|
+
|
98
|
+
if post.data[key].nil?
|
99
|
+
post.data[key] = result
|
100
|
+
else
|
101
|
+
post.data[key] = combine_values(post.data[key], result)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def get_collection_for_post(post)
|
107
|
+
@site.collections.each do |name, collection|
|
108
|
+
next if name == "posts"
|
109
|
+
|
110
|
+
return collection if collection.docs.include? post
|
111
|
+
end
|
112
|
+
|
113
|
+
return nil
|
114
|
+
end
|
115
|
+
|
116
|
+
def get_syndication_target(uri)
|
117
|
+
return nil if @syndication.nil?
|
118
|
+
|
119
|
+
@syndication.values.detect { |t| t["endpoint"] == uri }
|
120
|
+
end
|
121
|
+
|
50
122
|
def gather_webmentions(posts)
|
51
123
|
webmentions = WebmentionIO.read_cached_webmentions "outgoing"
|
52
124
|
|
53
125
|
posts.each do |post|
|
54
|
-
|
126
|
+
# Collect potential outgoing webmentions in this post.
|
55
127
|
mentions = get_mentioned_uris(post)
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
128
|
+
|
129
|
+
mentions.each do |mentioned_uri, response|
|
130
|
+
# If this webmention was a product of a syndication instruction,
|
131
|
+
# this goes back into the configuration and pulls that syndication
|
132
|
+
# target config out.
|
133
|
+
#
|
134
|
+
# If this is just a normal webmention, this will return nil.
|
135
|
+
target = get_syndication_target(mentioned_uri)
|
136
|
+
|
137
|
+
fulluri = File.join(@site_url, post.url)
|
138
|
+
shorturi = post.data["shorturl"] || fulluri
|
139
|
+
|
140
|
+
# Old cached responses might use either the full or short URIs so
|
141
|
+
# we need to check for both.
|
142
|
+
cached_response =
|
143
|
+
webmentions.dig(shorturi, mentioned_uri) ||
|
144
|
+
webmentions.dig(fulluri, mentioned_uri)
|
145
|
+
|
146
|
+
if cached_response.nil?
|
147
|
+
if ! target.nil?
|
148
|
+
uri = target["shorturl"] ? shorturi : fulluri
|
149
|
+
|
150
|
+
if target.key? "fragment"
|
151
|
+
uri += "#" + target["fragment"]
|
152
|
+
end
|
153
|
+
else
|
154
|
+
uri = fulluri
|
60
155
|
end
|
156
|
+
|
157
|
+
webmentions[uri] ||= {}
|
158
|
+
webmentions[uri][mentioned_uri] = response
|
159
|
+
elsif ! target.nil? and target.key? "response_mapping"
|
160
|
+
process_syndication(post, target, cached_response)
|
61
161
|
end
|
62
|
-
else
|
63
|
-
webmentions[uri] = mentions
|
64
162
|
end
|
65
163
|
end
|
66
164
|
|
67
|
-
|
165
|
+
# This check is moved down here because we still need the steps
|
166
|
+
# above to populate frontmatter during the site build, even
|
167
|
+
# if we're not going to modify the webmention cache.
|
168
|
+
|
169
|
+
if @site.config.dig("webmentions", "pause_lookups")
|
170
|
+
WebmentionIO.log "info", "Webmention lookups are currently paused."
|
171
|
+
return
|
172
|
+
else
|
173
|
+
WebmentionIO.cache_webmentions "outgoing", webmentions
|
174
|
+
end
|
68
175
|
end
|
69
176
|
|
70
177
|
def get_mentioned_uris(post)
|
178
|
+
collection = get_collection_for_post(post)
|
179
|
+
|
71
180
|
uris = {}
|
181
|
+
|
182
|
+
syndication_targets = []
|
183
|
+
syndication_targets += post.data["syndicate_to"] || []
|
184
|
+
|
185
|
+
if ! collection.nil?
|
186
|
+
syndication_targets += collection.metadata["syndicate_to"] || []
|
187
|
+
end
|
188
|
+
|
189
|
+
syndication_targets.each do |endpoint|
|
190
|
+
if @syndication.key? endpoint
|
191
|
+
uris[@syndication[endpoint]["endpoint"]] = false
|
192
|
+
else
|
193
|
+
WebmentionIO.log "msg", "Found reference to syndication endpoint \"#{endpoint}\" without matching entry in configuration."
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
72
197
|
if post.data["in_reply_to"]
|
73
198
|
uris[post.data["in_reply_to"]] = false
|
74
199
|
end
|
200
|
+
|
201
|
+
if post.data["bookmark_of"]
|
202
|
+
uris[post.data["bookmark_of"]] = false
|
203
|
+
end
|
204
|
+
|
75
205
|
post.content.scan(/(?:https?:)?\/\/[^\s)#\[\]{}<>%|\^"']+/) do |match|
|
76
206
|
unless uris.key? match
|
77
207
|
uris[match] = false
|
78
208
|
end
|
79
209
|
end
|
210
|
+
|
80
211
|
return uris
|
81
212
|
end
|
82
213
|
|
@@ -11,7 +11,6 @@ require "htmlbeautifier"
|
|
11
11
|
|
12
12
|
module Jekyll
|
13
13
|
module WebmentionIO
|
14
|
-
using StringInflection
|
15
14
|
class WebmentionTag < Liquid::Tag
|
16
15
|
def initialize(tag_name, text, tokens)
|
17
16
|
super
|
@@ -50,7 +49,7 @@ module Jekyll
|
|
50
49
|
if !WebmentionIO.types.include? type
|
51
50
|
WebmentionIO.log "warn", "#{type} are not extractable"
|
52
51
|
else
|
53
|
-
type = type
|
52
|
+
type = ActiveSupport::Inflector.singularize(type)
|
54
53
|
WebmentionIO.log "info", "Searching #{webmentions.length} webmentions for type==#{type}"
|
55
54
|
if webmentions.is_a? Hash
|
56
55
|
webmentions = webmentions.values
|
@@ -50,20 +50,6 @@ module Jekyll
|
|
50
50
|
@content = determine_content
|
51
51
|
end
|
52
52
|
|
53
|
-
def markdownify(string)
|
54
|
-
@converter ||= @site.find_converter_instance(Jekyll::Converters::Markdown)
|
55
|
-
|
56
|
-
if string
|
57
|
-
string = @converter.convert(string.to_s)
|
58
|
-
unless string.start_with?("<p")
|
59
|
-
string = string.sub(/^<[^>]+>/, "<p>").sub(/<\/[^>]+>$/, "</p>")
|
60
|
-
end
|
61
|
-
string.strip
|
62
|
-
else
|
63
|
-
string
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
53
|
def determine_uri
|
68
54
|
@raw["data"]["url"] || @raw["source"]
|
69
55
|
end
|
@@ -164,13 +150,11 @@ module Jekyll
|
|
164
150
|
end
|
165
151
|
|
166
152
|
def determine_content
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
markdownify(content)
|
153
|
+
if %w(post reply link).include? @type
|
154
|
+
@raw.dig("data", "content")
|
155
|
+
else
|
156
|
+
@raw.dig("activity", "sentence_html")
|
157
|
+
end
|
174
158
|
end
|
175
159
|
end
|
176
160
|
end
|
data/lib/jekyll/webmention_io.rb
CHANGED
@@ -15,16 +15,30 @@ require "json"
|
|
15
15
|
require "net/http"
|
16
16
|
require "uri"
|
17
17
|
require "openssl"
|
18
|
-
require "
|
18
|
+
require "active_support"
|
19
19
|
require "indieweb/endpoints"
|
20
20
|
require "webmention"
|
21
21
|
|
22
22
|
module Jekyll
|
23
23
|
module WebmentionIO
|
24
|
+
module UriState
|
25
|
+
UNSUPPORTED = "unsupported"
|
26
|
+
ERROR = "error"
|
27
|
+
FAILURE = "failure"
|
28
|
+
SUCCESS = "success"
|
29
|
+
end
|
30
|
+
|
31
|
+
module UriPolicy
|
32
|
+
BAN = "ban"
|
33
|
+
IGNORE = "ignore"
|
34
|
+
RETRY = "retry"
|
35
|
+
end
|
36
|
+
|
24
37
|
class << self
|
25
38
|
# define simple getters and setters
|
26
39
|
attr_reader :config, :jekyll_config, :cache_files, :cache_folder,
|
27
|
-
:file_prefix, :types, :supported_templates, :js_handler
|
40
|
+
:file_prefix, :types, :supported_templates, :js_handler,
|
41
|
+
:uri_whitelist, :uri_blacklist
|
28
42
|
attr_writer :api_suffix
|
29
43
|
end
|
30
44
|
|
@@ -69,6 +83,18 @@ module Jekyll
|
|
69
83
|
end
|
70
84
|
|
71
85
|
@js_handler = WebmentionIO::JSHandler.new(site)
|
86
|
+
|
87
|
+
@uri_whitelist = @config
|
88
|
+
.fetch("bad_uri_policy", {})
|
89
|
+
.fetch("whitelist", [])
|
90
|
+
.clone
|
91
|
+
.insert(-1, "^https?://webmention.io/")
|
92
|
+
.map { |expr| Regexp.new(expr) }
|
93
|
+
|
94
|
+
@uri_blacklist = @config
|
95
|
+
.fetch("bad_uri_policy", {})
|
96
|
+
.fetch("blacklist", [])
|
97
|
+
.map { |expr| Regexp.new(expr) }
|
72
98
|
end
|
73
99
|
|
74
100
|
# Setter
|
@@ -81,6 +107,10 @@ module Jekyll
|
|
81
107
|
Jekyll.sanitized_path(@cache_folder, "#{@file_prefix}#{filename}")
|
82
108
|
end
|
83
109
|
|
110
|
+
def self.max_attempts()
|
111
|
+
@config.dig("max_attempts")
|
112
|
+
end
|
113
|
+
|
84
114
|
def self.get_cache_file_path(key)
|
85
115
|
@cache_files[key] || false
|
86
116
|
end
|
@@ -213,11 +243,11 @@ module Jekyll
|
|
213
243
|
endpoint = IndieWeb::Endpoints.get(uri)[:webmention]
|
214
244
|
unless endpoint
|
215
245
|
log("info", "Could not find a webmention endpoint at #{uri}")
|
216
|
-
|
246
|
+
update_uri_cache(uri, UriState::UNSUPPORTED)
|
217
247
|
end
|
218
248
|
rescue StandardError => e
|
219
249
|
log "info", "Endpoint lookup failed for #{uri}: #{e.message}"
|
220
|
-
|
250
|
+
update_uri_cache(uri, UriState::FAILURE)
|
221
251
|
endpoint = false
|
222
252
|
end
|
223
253
|
endpoint
|
@@ -231,10 +261,24 @@ module Jekyll
|
|
231
261
|
case response.code
|
232
262
|
when 200, 201, 202
|
233
263
|
log "info", "Webmention successful!"
|
264
|
+
update_uri_cache(target, UriState::SUCCESS)
|
234
265
|
response.body
|
235
266
|
else
|
236
267
|
log "info", response.inspect
|
237
268
|
log "info", "Webmention failed, but will remain queued for next time"
|
269
|
+
|
270
|
+
if response.body
|
271
|
+
begin
|
272
|
+
body = JSON.parse(response.body)
|
273
|
+
|
274
|
+
if body.key? "error"
|
275
|
+
log "msg", "Endpoint returned error: #{body['error']}"
|
276
|
+
end
|
277
|
+
rescue
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
update_uri_cache(target, UriState::ERROR)
|
238
282
|
false
|
239
283
|
end
|
240
284
|
end
|
@@ -291,12 +335,12 @@ module Jekyll
|
|
291
335
|
redirect_to = redirect_to.relative? ? "#{original_uri.scheme}://#{original_uri.host}" + redirect_to.to_s : redirect_to.to_s
|
292
336
|
return get_uri_source(redirect_to, redirect_limit - 1, original_uri)
|
293
337
|
else
|
294
|
-
|
338
|
+
update_uri_cache(uri, UriState::FAILURE)
|
295
339
|
return false
|
296
340
|
end
|
297
341
|
else
|
298
342
|
log("warn", "too many redirects for #{original_uri}") if original_uri
|
299
|
-
|
343
|
+
update_uri_cache(uri, UriState::FAILURE)
|
300
344
|
return false
|
301
345
|
end
|
302
346
|
end
|
@@ -347,37 +391,176 @@ module Jekyll
|
|
347
391
|
return response
|
348
392
|
rescue *EXCEPTIONS => e
|
349
393
|
log "warn", "Got an error checking #{uri}: #{e}"
|
350
|
-
|
394
|
+
update_uri_cache(uri, UriState::FAILURE)
|
351
395
|
return false
|
352
396
|
end
|
353
397
|
end
|
354
398
|
|
355
|
-
#
|
356
|
-
|
399
|
+
# Given the provided state value (see UriState), retrieve the policy
|
400
|
+
# entry. If no entry exists, return a new default entry that
|
401
|
+
# indicates unlimited retries.
|
402
|
+
def self.get_bad_uri_policy_entry(state)
|
403
|
+
settings = @config.fetch("bad_uri_policy", {})
|
404
|
+
|
405
|
+
default_policy = { "policy" => UriPolicy::RETRY }
|
406
|
+
policy_entry = nil
|
407
|
+
|
408
|
+
# Retrieve the policy entry, the default entry, or the canned default
|
409
|
+
policy_entry = settings.fetch(state) {
|
410
|
+
settings.fetch("default", default_policy)
|
411
|
+
}
|
412
|
+
|
413
|
+
# Convert shorthand entry to full policy record
|
414
|
+
if policy_entry.instance_of? String
|
415
|
+
policy_entry = { "policy" => policy_entry }
|
416
|
+
end
|
417
|
+
|
418
|
+
if policy_entry["policy"] == UriPolicy::RETRY and ! policy_entry.key? "retry_delay"
|
419
|
+
# If this is a retry policy and no delay is set, set up the default
|
420
|
+
# delay policy. This inherits from the legacy cache_bad_uris_for
|
421
|
+
# setting to enable backward compatibility with older configurations.
|
422
|
+
#
|
423
|
+
# We do this here to make the rule enforcement logic a little tidier.
|
424
|
+
|
425
|
+
policy_entry["retry_delay"] = [ @config.fetch("cache_bad_uris_for", 1) * 24 ]
|
426
|
+
end
|
427
|
+
|
428
|
+
return policy_entry
|
429
|
+
end
|
430
|
+
|
431
|
+
# Retrieve the bad_uris cache entry for the given URI. This method
|
432
|
+
# takes the cache and a URI instance (i.e. parsing must already be done).
|
433
|
+
#
|
434
|
+
# If the URI has no entry in the cache, returns nil and *not* a default
|
435
|
+
# entry.
|
436
|
+
def self.get_bad_uri_cache_entry(bad_uris, uri)
|
437
|
+
return nil if ! bad_uris.key? uri.host
|
438
|
+
|
439
|
+
entry = bad_uris[uri.host].clone
|
440
|
+
|
441
|
+
if entry.instance_of? String
|
442
|
+
# Older version of the bad URL cache, convert to new format with some
|
443
|
+
# "sensible" defaults.
|
444
|
+
|
445
|
+
entry = {
|
446
|
+
"state" => UriState::UNSUPPORTED,
|
447
|
+
"last_checked" => DateTime.parse(entry).to_time,
|
448
|
+
"attempts" => 1
|
449
|
+
}
|
450
|
+
else
|
451
|
+
# Otherwise, parse the check time into a real Time object before
|
452
|
+
# returning the entry.
|
453
|
+
#
|
454
|
+
# We convert to a Time object so we can do arithmetic on it later.
|
455
|
+
|
456
|
+
entry["last_checked"] = DateTime.parse(entry["last_checked"]).to_time
|
457
|
+
end
|
458
|
+
|
459
|
+
return entry
|
460
|
+
end
|
461
|
+
|
462
|
+
# Update the URI cache for this entry.
|
463
|
+
#
|
464
|
+
# If the state is UriState.SUCCESS or the URI is whitelisted or
|
465
|
+
# blacklisted, we delete any existing entries since no policy will
|
466
|
+
# apply. This ensures we reset the policy state when a webmention
|
467
|
+
# succeeds.
|
468
|
+
#
|
469
|
+
# Otherwise, we either create or update an entry for the URI, recording
|
470
|
+
# the state and the current attempt counter.
|
471
|
+
def self.update_uri_cache(uri, state)
|
357
472
|
uri = URI::Parser.new.parse(uri.to_s)
|
358
|
-
|
359
|
-
return if uri.host == "webmention.io"
|
473
|
+
uri_str = uri.to_s
|
360
474
|
|
361
475
|
cache_file = @cache_files["bad_uris"]
|
362
476
|
bad_uris = load_yaml(cache_file)
|
363
|
-
|
477
|
+
|
478
|
+
if state == UriState::SUCCESS or
|
479
|
+
@uri_whitelist.any? { |expr| expr.match uri_str } or
|
480
|
+
@uri_blacklist.any? { |expr| expr.match uri_str }
|
481
|
+
|
482
|
+
return if bad_uris.delete(uri.host).nil?
|
483
|
+
else
|
484
|
+
old_entry = get_bad_uri_cache_entry(bad_uris, uri) || {}
|
485
|
+
|
486
|
+
bad_uris[uri.host] = {
|
487
|
+
"state" => state,
|
488
|
+
"attempts" => old_entry.fetch("attempts", 0) + 1,
|
489
|
+
"last_checked" => Time.now.to_s
|
490
|
+
}
|
491
|
+
end
|
492
|
+
|
364
493
|
dump_yaml(cache_file, bad_uris)
|
365
494
|
end
|
366
495
|
|
496
|
+
# Check if we should attempt to send a webmention to the given URI based
|
497
|
+
# on the error handling policy and the last attempt.
|
367
498
|
def self.uri_ok?(uri)
|
368
499
|
uri = URI::Parser.new.parse(uri.to_s)
|
369
500
|
now = Time.now.to_s
|
501
|
+
uri_str = uri.to_s
|
502
|
+
|
503
|
+
# If the URI is whitelisted, it's always ok!
|
504
|
+
return true if @uri_whitelist.any? { |expr| expr.match uri_str }
|
505
|
+
|
506
|
+
# If the URI is blacklisted, it's never ok!
|
507
|
+
return false if @uri_blacklist.any? { |expr| expr.match uri_str }
|
508
|
+
|
370
509
|
bad_uris = load_yaml(@cache_files["bad_uris"])
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
510
|
+
entry = get_bad_uri_cache_entry(bad_uris, uri)
|
511
|
+
|
512
|
+
# If the entry isn't in our cache yet, then it's ok.
|
513
|
+
return true if entry.nil?
|
514
|
+
|
515
|
+
# Okay, the last time we tried to send a webmention to this URI it
|
516
|
+
# failed, so depending on what happened and the policy, we need to
|
517
|
+
# decide what to do.
|
518
|
+
#
|
519
|
+
# First pull the retry policy given the type of the last error for the URI
|
520
|
+
policy_entry = get_bad_uri_policy_entry(entry["state"])
|
521
|
+
policy = policy_entry["policy"]
|
522
|
+
|
523
|
+
if policy == UriPolicy::BAN
|
524
|
+
return false
|
525
|
+
elsif policy == UriPolicy::IGNORE
|
526
|
+
return true
|
527
|
+
elsif policy == UriPolicy::RETRY
|
528
|
+
now = Time.now
|
529
|
+
|
530
|
+
attempts = entry["attempts"]
|
531
|
+
max_attempts = policy_entry["max_attempts"]
|
532
|
+
|
533
|
+
if ! max_attempts.nil? and attempts >= max_attempts
|
534
|
+
# If there's a retry limit and we've hit it, URI is not ok.
|
535
|
+
log "msg", "Skipping #{uri}, attempted #{attempts} times and max is #{max_attempts}"
|
536
|
+
|
537
|
+
return false
|
538
|
+
end
|
539
|
+
|
540
|
+
retry_delay = policy_entry["retry_delay"]
|
541
|
+
|
542
|
+
# Sneaky trick. By clamping to the array length, the last entry in
|
543
|
+
# the retry_delay list is used for all remaining retries.
|
544
|
+
delay = retry_delay[(attempts - 1).clamp(0, retry_delay.length - 1)]
|
545
|
+
|
546
|
+
recheck_at = (entry["last_checked"] + delay * 3600)
|
547
|
+
|
548
|
+
if recheck_at.to_r > now.to_r
|
549
|
+
log "msg", "Skipping #{uri}, next attempt will happen after #{recheck_at}"
|
550
|
+
|
551
|
+
return false
|
552
|
+
end
|
553
|
+
else
|
554
|
+
log "error", "Invalid bad URI policy type: #{policy}"
|
376
555
|
end
|
556
|
+
|
377
557
|
return true
|
378
558
|
end
|
379
559
|
|
380
|
-
private_class_method :get_http_response,
|
560
|
+
private_class_method :get_http_response,
|
561
|
+
:get_bad_uri_policy_entry,
|
562
|
+
:get_bad_uri_cache_entry,
|
563
|
+
:update_uri_cache
|
381
564
|
end
|
382
565
|
end
|
383
566
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-webmention_io
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aaron Gustafson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-07-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: jekyll
|
@@ -65,19 +65,25 @@ dependencies:
|
|
65
65
|
- !ruby/object:Gem::Version
|
66
66
|
version: '4.0'
|
67
67
|
- !ruby/object:Gem::Dependency
|
68
|
-
name:
|
68
|
+
name: activesupport
|
69
69
|
requirement: !ruby/object:Gem::Requirement
|
70
70
|
requirements:
|
71
71
|
- - "~>"
|
72
72
|
- !ruby/object:Gem::Version
|
73
|
-
version: '0
|
73
|
+
version: '7.0'
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 7.0.4.3
|
74
77
|
type: :runtime
|
75
78
|
prerelease: false
|
76
79
|
version_requirements: !ruby/object:Gem::Requirement
|
77
80
|
requirements:
|
78
81
|
- - "~>"
|
79
82
|
- !ruby/object:Gem::Version
|
80
|
-
version: '0
|
83
|
+
version: '7.0'
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: 7.0.4.3
|
81
87
|
- !ruby/object:Gem::Dependency
|
82
88
|
name: htmlbeautifier
|
83
89
|
requirement: !ruby/object:Gem::Requirement
|
@@ -120,6 +126,20 @@ dependencies:
|
|
120
126
|
- - "~>"
|
121
127
|
- !ruby/object:Gem::Version
|
122
128
|
version: '7.0'
|
129
|
+
- !ruby/object:Gem::Dependency
|
130
|
+
name: jsonpath
|
131
|
+
requirement: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - "~>"
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: 1.0.1
|
136
|
+
type: :runtime
|
137
|
+
prerelease: false
|
138
|
+
version_requirements: !ruby/object:Gem::Requirement
|
139
|
+
requirements:
|
140
|
+
- - "~>"
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
version: 1.0.1
|
123
143
|
- !ruby/object:Gem::Dependency
|
124
144
|
name: bundler
|
125
145
|
requirement: !ruby/object:Gem::Requirement
|
@@ -154,14 +174,14 @@ dependencies:
|
|
154
174
|
requirements:
|
155
175
|
- - "~>"
|
156
176
|
- !ruby/object:Gem::Version
|
157
|
-
version: '
|
177
|
+
version: '13.0'
|
158
178
|
type: :development
|
159
179
|
prerelease: false
|
160
180
|
version_requirements: !ruby/object:Gem::Requirement
|
161
181
|
requirements:
|
162
182
|
- - "~>"
|
163
183
|
- !ruby/object:Gem::Version
|
164
|
-
version: '
|
184
|
+
version: '13.0'
|
165
185
|
- !ruby/object:Gem::Dependency
|
166
186
|
name: rspec
|
167
187
|
requirement: !ruby/object:Gem::Requirement
|