directlink 0.0.8.6 → 0.0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/directlink +2 -10
- data/directlink.gemspec +6 -7
- data/lib/directlink.rb +93 -53
- data/test.rb +96 -43
- metadata +8 -31
- data/.bashrc +0 -4
- data/.travis.yml +0 -37
- data/Gemfile +0 -3
- data/README.md +0 -198
- data/Rakefile +0 -1
- data/api_tokens_for_travis.sh +0 -8
- data/gplus.txt +0 -1454
- data/reddit_token_for_travis.yaml +0 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f745bed3faf2b74dfe4532357fbb772fe32598b1
|
4
|
+
data.tar.gz: 203127452f8e51be364fff16a4c1eafa8a0a21df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 75bd9d5351c48c4f1df45b76f4835abc52bd057eabd01f4615a2056c16f4f9cf57546c3ff7123f479836206d1bb3164a740bebd2d81b84899b5507ee74d0e178
|
7
|
+
data.tar.gz: 6e20c47d7270b6bf3b0920ea964337b5143e255eb789baac70823218c9b689b13a6b16abdc729a3c134a2b5cf48fccfa074ec64bebac6ed835a32764b8f5726c
|
data/bin/directlink
CHANGED
@@ -53,6 +53,7 @@ abort "usage: directlink [--debug] [--json] [--github] [--ignore-meta] <link1> <
|
|
53
53
|
}" if [nil, "-h", "--help", "-v", "--version"].include? ARGV.first
|
54
54
|
|
55
55
|
begin
|
56
|
+
# Struct instances have #each and Array() ruins them so we use .is_a?(Array)
|
56
57
|
if json
|
57
58
|
require "json"
|
58
59
|
t = ARGV.map do |link|
|
@@ -67,16 +68,7 @@ begin
|
|
67
68
|
(t.is_a?(Array) ? t : [t]).each{ |s| puts "=> #{s.url}\n #{s.type} #{s.width}x#{s.height}" }
|
68
69
|
end
|
69
70
|
end
|
70
|
-
rescue
|
71
|
-
Net::OpenTimeout,
|
72
|
-
Errno::ECONNRESET,
|
73
|
-
NetHTTPUtils::Error,
|
74
|
-
FastImage::UnknownImageType,
|
75
|
-
FastImage::ImageFetchFailure,
|
76
|
-
# DirectLink::ErrorMissingEnvVar,
|
77
|
-
# DirectLink::ErrorAssert,
|
78
|
-
DirectLink::ErrorNotFound,
|
79
|
-
DirectLink::ErrorBadLink => e
|
71
|
+
rescue *DirectLink::NORMAL_EXCEPTIONS => e
|
80
72
|
puts e.backtrace if debug
|
81
73
|
cause = e.cause if e.cause if e.respond_to? :cause
|
82
74
|
c = e.class.to_s
|
data/directlink.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "directlink"
|
3
|
-
spec.version = "0.0.
|
4
|
-
spec.summary = "
|
3
|
+
spec.version = "0.0.9.2"
|
4
|
+
spec.summary = "obtains from any kind of hyperlink a link to an image, its format and resolution"
|
5
5
|
|
6
6
|
spec.author = "Victor Maslov aka Nakilon"
|
7
7
|
spec.email = "nakilon@gmail.com"
|
@@ -11,19 +11,18 @@ Gem::Specification.new do |spec|
|
|
11
11
|
|
12
12
|
spec.add_dependency "fastimage", "~>2.1.3"
|
13
13
|
spec.add_dependency "nokogiri"
|
14
|
-
spec.add_dependency "nethttputils", "~>0.
|
15
|
-
spec.add_dependency "reddit_bot", "~>1.7.
|
14
|
+
spec.add_dependency "nethttputils", "~>0.4.1.0"
|
15
|
+
spec.add_dependency "reddit_bot", "~>1.7.8"
|
16
16
|
spec.add_dependency "kramdown"
|
17
17
|
spec.add_dependency "addressable"
|
18
18
|
spec.add_development_dependency "minitest"
|
19
|
-
spec.add_development_dependency "byebug"
|
20
19
|
|
21
20
|
spec.require_path = "lib"
|
22
21
|
spec.bindir = "bin"
|
23
22
|
spec.executable = "directlink"
|
24
23
|
spec.test_file = "test.rb"
|
25
|
-
spec.files =
|
24
|
+
spec.files = %w{ LICENSE directlink.gemspec lib/directlink.rb bin/directlink }
|
26
25
|
|
27
|
-
spec.requirements << "you may
|
26
|
+
spec.requirements << "you may want to create apps and provide API tokens:"
|
28
27
|
spec.requirements << "IMGUR_CLIENT_ID, FLICKR_API_KEY, REDDIT_SECRETS"
|
29
28
|
end
|
data/lib/directlink.rb
CHANGED
@@ -2,11 +2,10 @@ module DirectLink
|
|
2
2
|
|
3
3
|
class << self
|
4
4
|
attr_accessor :silent
|
5
|
-
end
|
6
|
-
self.silent = false
|
7
|
-
class << self
|
8
5
|
attr_accessor :logger
|
6
|
+
attr_accessor :timeout
|
9
7
|
end
|
8
|
+
self.silent = false
|
10
9
|
self.logger = Object.new
|
11
10
|
self.logger.define_singleton_method :error do |str|
|
12
11
|
puts str unless Module.nesting.first.silent
|
@@ -35,6 +34,20 @@ module DirectLink
|
|
35
34
|
end
|
36
35
|
end
|
37
36
|
|
37
|
+
require "nethttputils"
|
38
|
+
require "fastimage"
|
39
|
+
NORMAL_EXCEPTIONS = [
|
40
|
+
SocketError,
|
41
|
+
Net::OpenTimeout,
|
42
|
+
Errno::ECONNRESET,
|
43
|
+
NetHTTPUtils::Error,
|
44
|
+
NetHTTPUtils::EOFError_from_rbuf_fill,
|
45
|
+
FastImage::UnknownImageType,
|
46
|
+
FastImage::ImageFetchFailure,
|
47
|
+
DirectLink::ErrorNotFound,
|
48
|
+
DirectLink::ErrorBadLink,
|
49
|
+
] # the only exceptions gem user should expect and handle
|
50
|
+
|
38
51
|
|
39
52
|
def self.google src, width = 0
|
40
53
|
# this can handle links without schema because it's used for parsing community HTML pages
|
@@ -47,7 +60,7 @@ module DirectLink
|
|
47
60
|
when /\A(\/\/lh3\.googleusercontent\.com\/cOh2Nsv7EGo0QbuoKxoKZVZO_NcBzufuvPtzirMJfPmAzCzMtnEncfA7zGIDTJfkc1YZFX2MhgKnjA=)w530-h398-p\z/
|
48
61
|
"https:#{$1}s#{width}/"
|
49
62
|
when /\A(\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9-]{11}\/[VW][a-zA-Z0-9_-]{9}I\/AAAAAAA[AC][a-zA-Z0-9]{3}\/[a-zA-Z0-9_-]{32}[gwAQ]CJoC\/)w530-h[23]\d\d-p\/[^\/]+\z/,
|
50
|
-
/\A(?:https?:)?(\/\/[1-4]\.bp\.blogspot\.com\/-[a-zA-Z0-9_-]{11}\/[UVWX][a-zA-Z0-9_-]{9}I\/AAAAAAAA[A-Z][a-zA-Z0-9_-]{2}\/[a-zA-Z0-9_-]{33}C(?:EwYBhgL|(?:Lc|Kg)BGAs)\/)(?:s640|w\d
|
63
|
+
/\A(?:https?:)?(\/\/[1-4]\.bp\.blogspot\.com\/-[a-zA-Z0-9_-]{11}\/[UVWX][a-zA-Z0-9_-]{9}I\/AAAAAAAA[A-Z][a-zA-Z0-9_-]{2}\/[a-zA-Z0-9_-]{33}C(?:EwYBhgL|(?:Lc|Kg)BGAs(?:YHQ)?)\/)(?:s640|w\d{2,4}-h\d\d\d?-p(?:-k-no-nu)?)\/[^\/]+\z/,
|
51
64
|
/\A(?:https?:)?(\/\/[1-4]\.bp\.blogspot\.com\/-[a-zA-Z0-9-]{11}\/[UV][a-zA-Z0-9_-]{9}I\/AAAAAAAA[A-Z][a-zA-Z0-9]{2}\/[a-zA-Z0-9-]{11}\/)w72-h72-p-k-no-nu\/[^\/]+\z/
|
52
65
|
"https:#{$1}s#{width}/"
|
53
66
|
when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9_]{11}\/AAAAAAAAAAI\/AAAAAAAAAAQ\/[a-zA-Z0-9_]{11}\/)w530-h[13]\d\d-n\/[^\/]+\z/,
|
@@ -76,10 +89,9 @@ module DirectLink
|
|
76
89
|
end
|
77
90
|
|
78
91
|
require "json"
|
79
|
-
require "nethttputils"
|
80
92
|
|
81
93
|
# TODO make the timeout handling respect the way the Directlink method works with timeouts
|
82
|
-
def self.imgur link, timeout =
|
94
|
+
def self.imgur link, timeout = 2000
|
83
95
|
raise ErrorMissingEnvVar.new "define IMGUR_CLIENT_ID env var" unless ENV["IMGUR_CLIENT_ID"]
|
84
96
|
|
85
97
|
request_data = lambda do |url|
|
@@ -107,16 +119,16 @@ module DirectLink
|
|
107
119
|
elsif data["images"]
|
108
120
|
raise ErrorNotFound.new link.inspect if data["images"].empty?
|
109
121
|
data["images"]
|
110
|
-
elsif data["type"] && data["type"]
|
122
|
+
elsif data["type"] && %w{ image/jpeg image/png image/gif video/mp4 }.include?(data["type"])
|
111
123
|
# TODO check if this branch is possible at all
|
112
124
|
[ data ]
|
113
125
|
# elsif data["comment"]
|
114
126
|
# fi["https://imgur.com/" + data["image_id"]]
|
115
127
|
else
|
116
128
|
# one day single-video item should hit this but somehow it didn't yet
|
117
|
-
raise ErrorAssert.new "unknown data format #{
|
129
|
+
raise ErrorAssert.new "unknown data format #{json} for #{link}"
|
118
130
|
end
|
119
|
-
when /\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/([a-zA-Z0-9]{7,8})(?:\.(?:gifv|
|
131
|
+
when /\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/([a-zA-Z0-9]{7,8})(?:\.(?:gifv|jpe?g(?:\?fb)?|png))?\z/,
|
120
132
|
/\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/([a-zA-Z0-9]{5})\.mp4\z/,
|
121
133
|
/\Ahttps?:\/\/imgur\.com\/([a-zA-Z0-9]{5}(?:[a-zA-Z0-9]{2})?)\z/,
|
122
134
|
/\Ahttps?:\/\/imgur\.com\/([a-zA-Z0-9]{7})(?:\?\S+)?\z/,
|
@@ -128,7 +140,7 @@ module DirectLink
|
|
128
140
|
raise ErrorBadLink.new link
|
129
141
|
end.map do |image|
|
130
142
|
case image["type"]
|
131
|
-
when
|
143
|
+
when *%w{ image/jpeg image/png image/gif video/mp4 }
|
132
144
|
image.values_at "link", "width", "height", "type"
|
133
145
|
else
|
134
146
|
raise ErrorAssert.new "unknown type of #{link}: #{image}"
|
@@ -137,7 +149,7 @@ module DirectLink
|
|
137
149
|
end
|
138
150
|
|
139
151
|
def self._500px link
|
140
|
-
raise ErrorBadLink.new link unless %r{\Ahttps://500px\.com/photo/(?<id>[^/]+)/[
|
152
|
+
raise ErrorBadLink.new link unless %r{\Ahttps://500px\.com/photo/(?<id>[^/]+)/[-[a-zA-Z0-9]%]+\/?\z} =~ link
|
141
153
|
require "nokogiri"
|
142
154
|
resp = NetHTTPUtils.request_data link
|
143
155
|
f = lambda do |form|
|
@@ -191,11 +203,11 @@ module DirectLink
|
|
191
203
|
attr_accessor :reddit_bot
|
192
204
|
end
|
193
205
|
def self.reddit link, timeout = 1000
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
206
|
+
return [true, link] if URI(link).host &&
|
207
|
+
URI(link).host.split(?.) == %w{ i redd it } &&
|
208
|
+
URI(link).path[/\A\/[a-z0-9]{12,13}\.(gif|jpg)\z/]
|
209
|
+
unless id = link[/\Ahttps:\/\/www\.reddit\.com\/gallery\/([0-9a-z]{5,6})\z/, 1]
|
210
|
+
raise DirectLink::ErrorBadLink.new link unless id = URI(link).path[/\A(?:\/r\/[0-9a-zA-Z_]+)?(?:\/comments|\/duplicates)?\/([0-9a-z]{5,6})(?:\/|\z)/, 1]
|
199
211
|
end
|
200
212
|
retry_on_json_parseerror = lambda do |&b|
|
201
213
|
t = 1
|
@@ -221,14 +233,19 @@ module DirectLink
|
|
221
233
|
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" unless json.size == 2
|
222
234
|
json.find{ |_| _["data"]["children"].first["kind"] == "t3" }
|
223
235
|
end
|
236
|
+
# TODO: do we handle linking Imgur albums?
|
224
237
|
data = json["data"]["children"].first["data"]
|
225
|
-
if data["media"]
|
226
|
-
return [true, data["media"]["reddit_video"]["fallback_url"]]
|
227
|
-
else
|
238
|
+
if data["media"]
|
239
|
+
return [true, data["media"]["reddit_video"]["fallback_url"]] if data["media"]["reddit_video"]
|
228
240
|
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" unless data["media"].keys.sort == %w{ oembed type } && %w{ youtube.com gfycat.com imgur.com }.include?(data["media"]["type"])
|
229
241
|
return [true, data["media"]["oembed"]["thumbnail_url"]]
|
230
|
-
end
|
231
|
-
return [true, data["
|
242
|
+
end
|
243
|
+
return [true, data["media_metadata"].values.map do |media|
|
244
|
+
next if media == {"status"=>"failed"}
|
245
|
+
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" unless media["status"] == "valid"
|
246
|
+
[media["m"], *media["s"].values_at("x", "y"), CGI.unescapeHTML(media["s"]["u"])]
|
247
|
+
end.compact] if data["media_metadata"]
|
248
|
+
return [true, "#{"https://www.reddit.com" if /\A\/r\/[0-9a-zA-Z_]+\/comments\/[0-9a-z]{5,6}\// =~ data["url"]}#{data["url"]}"] if data["crosspost_parent"]
|
232
249
|
return [true, data["url"]] unless data["is_self"]
|
233
250
|
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" if data["url"] != "https://www.reddit.com" + data["permalink"]
|
234
251
|
return [false, data["selftext"]]
|
@@ -236,23 +253,30 @@ module DirectLink
|
|
236
253
|
|
237
254
|
def self.vk link
|
238
255
|
id, mtd, field, f = case link
|
239
|
-
when %r{\Ahttps://vk\.com/id(?<user_id>\d+)\?z=photo(?<id>\k<user_id>_\d+)(%2F(album\k<user_id>_0|photos\k<user_id>))
|
256
|
+
when %r{\Ahttps://vk\.com/id(?<user_id>\d+)\?z=photo(?<id>\k<user_id>_\d+)(%2F(album\k<user_id>_0|photos\k<user_id>))?\z},
|
240
257
|
%r{\Ahttps://vk\.com/[a-z_]+\?z=photo(?<_>)(?<id>(?<user_id>\d+)_\d+)%2Fphotos\k<user_id>\z},
|
241
|
-
%r{\Ahttps://vk\.com/photo(?<_>)(?<id>-?\d+_\d+)(\?all=1)?\z},
|
258
|
+
%r{\Ahttps://vk\.com/photo(?<_>)(?<id>-?\d+_\d+)(\?(?:all|rev)=1)?\z},
|
242
259
|
%r{\Ahttps://vk\.com/feed\?section=likes&z=photo(?<_>)(?<id>-(?<user_id>\d+)_\d+)%2F(liked\d+|album\k<user_id>_0)\z},
|
243
260
|
%r{\Ahttps://vk\.com/[a-z_]+\?z=photo(?<_>)(?<id>(?<user_id>-\d+)_\d+)%2F(wall\k<user_id>_\d+|album\k<user_id>_0)\z},
|
244
261
|
%r{\Ahttps://vk\.com/wall(?<user_id>-\d+)_\d+\?z=photo(?<id>\k<user_id>_\d+)%2F(wall\k<user_id>_\d+|album\k<user_id>_00%2Frev|\d+)\z}
|
245
262
|
[$2, :photos, :photos, lambda do |t|
|
246
263
|
raise ErrorAssert.new "our knowledge about VK API seems to be outdated" unless 1 == t.size
|
247
|
-
t
|
264
|
+
t
|
248
265
|
end ]
|
249
|
-
when %r{\Ahttps://vk\.com/wall(?<id
|
266
|
+
when %r{\Ahttps://vk\.com/wall(?<id>-?\d+_\d+)\z},
|
267
|
+
%r{\Ahttps://vk\.com/[a-z\.]+\?w=wall(?<id>\d+_\d+)\z}
|
250
268
|
[$1, :wall, :posts, lambda do |t|
|
251
|
-
t.first.fetch("attachments").
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
269
|
+
t.first.fetch("attachments").select do |item|
|
270
|
+
case item.keys
|
271
|
+
when %w{ type photo }
|
272
|
+
raise ErrorAssert.new "our knowledge about VK API seems to be outdated" unless item["type"] == "photo"
|
273
|
+
next true
|
274
|
+
when %w{ type audio }
|
275
|
+
raise ErrorAssert.new "our knowledge about VK API seems to be outdated" unless item["type"] == "audio"
|
276
|
+
else
|
277
|
+
raise ErrorAssert.new "our knowledge about VK API seems to be outdated"
|
278
|
+
end
|
279
|
+
end.map{ |i| i.fetch "photo" }
|
256
280
|
end ]
|
257
281
|
else
|
258
282
|
raise ErrorBadLink.new link
|
@@ -261,21 +285,22 @@ module DirectLink
|
|
261
285
|
sleep 0.25 # "error_msg"=>"Too many requests per second"
|
262
286
|
f.call( JSON.load( NetHTTPUtils.request_data "https://api.vk.com/method/#{mtd}.getById",
|
263
287
|
:POST, form: { field => id, :access_token => ENV["VK_ACCESS_TOKEN"], :client_secret => ENV["VK_CLIENT_SECRET"], :v => "5.101" }
|
264
|
-
).fetch("response") ).
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
288
|
+
).fetch("response") ).map do |photos|
|
289
|
+
photos.fetch("sizes").map do |size|
|
290
|
+
size.values_at("width", "height", "url").tap do |whu|
|
291
|
+
w, h, u = whu
|
292
|
+
whu[0, 2] = FastImage.new(u, raise_on_failure: true).size if [w, h].include? 0
|
293
|
+
end
|
294
|
+
end.max_by{ |w, h, u| w * h }
|
295
|
+
end
|
270
296
|
end
|
271
297
|
|
272
298
|
class_variable_set :@@directlink, Struct.new(:url, :width, :height, :type)
|
273
299
|
end
|
274
300
|
|
275
301
|
|
276
|
-
|
277
|
-
|
278
|
-
def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
302
|
+
def DirectLink link, timeout = nil, proxy = nil, giveup: false, ignore_meta: false
|
303
|
+
timeout ||= DirectLink.timeout
|
279
304
|
ArgumentError.new("link should be a <String>, not <#{link.class}>") unless link.is_a? String
|
280
305
|
begin
|
281
306
|
URI link
|
@@ -313,7 +338,7 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
313
338
|
**( %w{ reddit com } == URI(link).host.split(?.).last(2) ||
|
314
339
|
%w{ redd it } == URI(link).host.split(?.) ? {Cookie: "over18=1"} : {} ),
|
315
340
|
}
|
316
|
-
head = NetHTTPUtils.request_data link, :
|
341
|
+
head = NetHTTPUtils.request_data link, :HEAD, header: header, **(proxy ? {proxy: proxy} : {}), **(timeout ? {
|
317
342
|
timeout: timeout,
|
318
343
|
max_start_http_retry_delay: timeout,
|
319
344
|
max_read_retry_delay: timeout
|
@@ -369,29 +394,40 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
369
394
|
raise DirectLink::ErrorBadLink.new link if giveup # TODO: print original url in such cases if there was a recursion
|
370
395
|
f = ->_{ _.type == :a ? _.attr["href"] : _.children.flat_map(&f) }
|
371
396
|
require "kramdown"
|
372
|
-
return f[Kramdown::Document.new(u).root].
|
373
|
-
DirectLink URI.join(link, sublink).to_s, timeout, giveup: giveup
|
397
|
+
return f[Kramdown::Document.new(u).root].flat_map do |sublink|
|
398
|
+
DirectLink URI.join(link, sublink).to_s, timeout, giveup: giveup # TODO: maybe subtract from timeout the time we've already wasted
|
399
|
+
end
|
400
|
+
end
|
401
|
+
if u.is_a? Hash
|
402
|
+
return struct.new *u.values_at(*%w{ fallback_url width height }), "video"
|
403
|
+
elsif u.is_a? Array
|
404
|
+
return u.map do |t, x, y, u|
|
405
|
+
struct.new u, x, y, t
|
374
406
|
end
|
375
407
|
end
|
376
|
-
|
377
|
-
return DirectLink u
|
378
|
-
fail if link == u
|
408
|
+
raise DirectLink::ErrorNotFound.new link.inspect if link == u
|
409
|
+
return DirectLink u, timeout, giveup: giveup
|
379
410
|
rescue DirectLink::ErrorMissingEnvVar
|
380
411
|
end if %w{ reddit com } == URI(link).host.split(?.).last(2) ||
|
381
412
|
%w{ redd it } == URI(link).host.split(?.)
|
382
413
|
|
383
414
|
begin
|
384
|
-
w, h, u
|
385
|
-
|
415
|
+
return DirectLink.vk(link).map do |w, h, u|
|
416
|
+
struct.new u, w, h
|
417
|
+
end
|
386
418
|
rescue DirectLink::ErrorMissingEnvVar
|
387
419
|
end if %w{ vk com } == URI(link).host.split(?.)
|
388
420
|
|
389
421
|
begin
|
390
|
-
f = FastImage.new
|
422
|
+
f = FastImage.new link,
|
423
|
+
raise_on_failure: true,
|
424
|
+
timeout: timeout,
|
425
|
+
**(proxy ? {proxy: "http://#{proxy}"} : {}),
|
426
|
+
http_header: {"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"}
|
391
427
|
rescue FastImage::UnknownImageType
|
392
428
|
raise if giveup
|
393
429
|
require "nokogiri"
|
394
|
-
head = NetHTTPUtils.request_data link, :
|
430
|
+
head = NetHTTPUtils.request_data link, :HEAD, header: {"User-Agent" => "Mozilla"},
|
395
431
|
max_start_http_retry_delay: timeout,
|
396
432
|
timeout: timeout, # NetHTTPUtild passes this as read_timeout to Net::HTTP.start
|
397
433
|
max_read_retry_delay: timeout # and then compares accumulated delay to this
|
@@ -402,7 +438,10 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
402
438
|
end
|
403
439
|
html = Nokogiri::HTML NetHTTPUtils.request_data link, header: {"User-Agent" => "Mozilla"}
|
404
440
|
if t = html.at_css("meta[@property='og:image']")
|
405
|
-
|
441
|
+
begin
|
442
|
+
return DirectLink URI.join(link, t[:content]).to_s, nil, *proxy, giveup: true
|
443
|
+
rescue URI::InvalidURIError
|
444
|
+
end
|
406
445
|
end unless ignore_meta
|
407
446
|
h = {} # TODO: maybe move it outside because of possible img[:src] recursion?...
|
408
447
|
l = lambda do |node, s = []|
|
@@ -416,9 +455,10 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
416
455
|
end
|
417
456
|
end
|
418
457
|
end
|
419
|
-
l[html].
|
420
|
-
raise if results.empty?
|
421
|
-
|
458
|
+
l[html].
|
459
|
+
tap{ |results| raise if results.empty? }.
|
460
|
+
group_by(&:first).map{ |k, v| [k.join(?>), v.map(&:last)] }.
|
461
|
+
max_by{ |_, v| v.map{ |i| i.width * i.height }.inject(:+) }.last
|
422
462
|
else
|
423
463
|
# TODO: maybe move this to right before `rescue` line
|
424
464
|
w, h = f.size
|
data/test.rb
CHANGED
@@ -13,6 +13,8 @@ fail unless ENV.include? "REDDIT_SECRETS"
|
|
13
13
|
|
14
14
|
require_relative "lib/directlink"
|
15
15
|
DirectLink.silent = true
|
16
|
+
DirectLink.timeout = 30 # TODO: tests about this attribute
|
17
|
+
|
16
18
|
describe DirectLink do
|
17
19
|
|
18
20
|
describe "./lib" do
|
@@ -171,6 +173,7 @@ describe DirectLink do
|
|
171
173
|
http://1.bp.blogspot.com/-iSU4orVuR9Y/VFYrwQZ5qYI/AAAAAAAAMnc/WY4VfCaeplw/w72-h72-p-k-no-nu/Wolf%2Bphotography2.jpg
|
172
174
|
http://1.bp.blogspot.com/-vPQSh6RKijU/VEi7r3D-jJI/AAAAAAAAL2Q/bGHmyuoDp5M/w72-h72-p-k-no-nu/Building%2BIn%2BLondon1-4__880.jpeg
|
173
175
|
http://1.bp.blogspot.com/-W4xKJSsVf3M/Uz73jPlctbI/AAAAAAAAGz4/K8Tw6PILMeY/w72-h72-p-k-no-nu/Beautiful+Japanese+places4.jpg
|
176
|
+
https://1.bp.blogspot.com/-__qsdLxNtcQ/XhaOQle-ECI/AAAAAAAABQ4/S_7SGG_K8eQ7VXIU2wyPvTj9OyBfr_1sQCLcBGAsYHQ/w1200-h630-p-k-no-nu/iceland_poppies_orange_flowers_field-wallpaper-3840x2160.jpg
|
174
177
|
https://lh3.googleusercontent.com/-tV86KJvppss/XE2Nb2Z2aAI/AAAAAAAAGu4/94E_AuB4YWAaJ59n43wmmd9rFa--OUuSQCJoC/w530-h338-n/IMG_6845%252C.png
|
175
178
|
https://lh3.googleusercontent.com/-cr-2ZSQGMPg/XFWLfetwr7I/AAAAAAAAQQQ/TbwDk56BBIwb4IDDO0SwfArFSZyDG0i0wCJoC/w530-h360-n/DSC07294.JPG
|
176
179
|
}.each_with_index do |link, i|
|
@@ -222,7 +225,7 @@ describe DirectLink do
|
|
222
225
|
|
223
226
|
# TODO: expand this for every branch in lib
|
224
227
|
%w{
|
225
|
-
https_long_blogspot https://
|
228
|
+
https_long_blogspot https://1.bp.blogspot.com/-__qsdLxNtcQ/XhaOQle-ECI/AAAAAAAABQ4/S_7SGG_K8eQ7VXIU2wyPvTj9OyBfr_1sQCLcBGAsYHQ/w1200-h630-p-k-no-nu/iceland_poppies_orange_flowers_field-wallpaper-3840x2160.jpg https://1.bp.blogspot.com/-__qsdLxNtcQ/XhaOQle-ECI/AAAAAAAABQ4/S_7SGG_K8eQ7VXIU2wyPvTj9OyBfr_1sQCLcBGAsYHQ/s0/
|
226
229
|
http_short_blogspot http://4.bp.blogspot.com/-poH-QXn7YGg/U-3ZTDkeF_I/AAAAAAAAISE/ms2gNIb-v-g/w72-h72-p-k-no-nu/Top-24-Inspired-181.jpg https://4.bp.blogspot.com/-poH-QXn7YGg/U-3ZTDkeF_I/AAAAAAAAISE/ms2gNIb-v-g/s0/
|
227
230
|
just_gplus https://lh3.googleusercontent.com/-NiGph3ObOPg/XE3DgnavXlI/AAAAAAABvgE/pcPPCe88rsU1r941wwP76TVf_o89i74kwCJoC/w530-h353-n/DSCF0753.JPG https://lh3.googleusercontent.com/-NiGph3ObOPg/XE3DgnavXlI/AAAAAAABvgE/pcPPCe88rsU1r941wwP76TVf_o89i74kwCJoC/s0/
|
228
231
|
google_keep https://lh5.googleusercontent.com/fRmAL_04p7oomNHCiV4tH4-agHSDBtLaWi_Tb6bgE5ZSHVu5OjQF3iRn06nNwP3ywZwdFP92zWM-o8yw0cn6m0tDTBARuO6F9e0wYu_1=s685 https://lh5.googleusercontent.com/fRmAL_04p7oomNHCiV4tH4-agHSDBtLaWi_Tb6bgE5ZSHVu5OjQF3iRn06nNwP3ywZwdFP92zWM-o8yw0cn6m0tDTBARuO6F9e0wYu_1=s0
|
@@ -328,9 +331,11 @@ describe DirectLink do
|
|
328
331
|
["https://imgur.com/9yaMdJq", "https://i.imgur.com/9yaMdJq.mp4", 720, 404, "video/mp4"],
|
329
332
|
["http://imgur.com/gallery/dCQprEq/new", "https://i.imgur.com/dCQprEq.jpg", 5760, 3840, "image/jpeg"],
|
330
333
|
["https://i.imgur.com/fFUTSJu.jpg?fb", "https://i.imgur.com/fFUTSJu.jpg", 1469, 2200, "image/jpeg"], # from reddit.com/93mtba
|
334
|
+
["https://i.imgur.com/IxUrhGX.jpeg", "https://i.imgur.com/IxUrhGX.jpg", 4384, 3012, "image/jpeg"], # jpEg
|
335
|
+
["https://imgur.com/gallery/9f2s9EE", "https://i.imgur.com/9f2s9EE.mp4", 960, 1438, "video/mp4"], # mp4
|
331
336
|
].each_with_index do |t, i|
|
332
337
|
url, n, first, last, type = t
|
333
|
-
it "##{i + 1}" do
|
338
|
+
it "kinds of post ##{i + 1}" do
|
334
339
|
case last
|
335
340
|
when NilClass
|
336
341
|
if n.is_a? Class
|
@@ -363,6 +368,8 @@ describe DirectLink do
|
|
363
368
|
[
|
364
369
|
[ :_500px, [
|
365
370
|
["https://500px.com/photo/264092015/morning-rider-by-tiger-seo", [1200, 800, "https://drscdn.500px.org/photo/264092015/m%3D900/v2?sig=68a9206477f573d8e2838faa6a929e7267f22dc5f9e98f1771f7a8a63efa2ed7", "jpeg"]],
|
371
|
+
["https://500px.com/photo/1017579834/-poppies-flowers-by-David-Dubnitskiy/", [1819, 2500, "https://drscdn.500px.org/photo/1017579834/m%3D900/v2?sig=022e3e9dd836ffd8c1d31ae26c83735e4e42b4c8733d0c4380d8270aebbca44e", "jpeg"]],
|
372
|
+
["https://500px.com/photo/1017557263/iss%E5%87%8C%E6%97%A5%E5%81%8F%E9%A3%9F-by-%E7%A7%8B%E8%A3%A4Choku-/", [2048, 2048, "https://drscdn.500px.org/photo/1017557263/m%3D2048/v2?sig=1994a09e33794117082e91fa58c40614a2bfd19d3e0dd78e067968d38aca92be", "jpeg"]]
|
366
373
|
] ],
|
367
374
|
[ :flickr, [
|
368
375
|
["https://www.flickr.com/photos/tomas-/17220613278/", DirectLink::ErrorNotFound],
|
@@ -374,7 +381,7 @@ describe DirectLink do
|
|
374
381
|
["https://www.flickr.com/photos/130019700@N03/18848891351/in/dateposted-public/", [4621, 3081, "https://live.staticflickr.com/3796/18848891351_f751b35aeb_o.jpg"]], # userid in-public
|
375
382
|
["https://www.flickr.com/photos/frank3/3778768209/in/photolist-6KVb92-eCDTCr-ur8K-7qbL5z-c71afh-c6YvXW-7mHG2L-c71ak9-c71aTq-c71azf-c71aq5-ur8Q-6F6YkR-eCDZsD-eCEakg-eCE6DK-4ymYku-7ubEt-51rUuc-buujQE-ur8x-9fuNu7-6uVeiK-qrmcC6-ur8D-eCEbei-eCDY9P-eCEhCk-eCE5a2-eCH457-eCHrcq-eCEdZ4-eCH6Sd-c71b5o-c71auE-eCHa8m-eCDSbz-eCH1dC-eCEg3v-7JZ4rh-9KwxYL-6KV9yR-9tUSbU-p4UKp7-eCHfwS-6KVbAH-5FrdbP-eeQ39v-eeQ1UR-4jHAGN", [4096, 2723, "https://live.staticflickr.com/2499/3778768209_dfa75a41cc_4k.jpg"]],
|
376
383
|
["https://www.flickr.com/photos/patricksloan/18230541413/sizes/l", [2048, 491, "https://live.staticflickr.com/5572/18230541413_fec4783d79_k.jpg"]],
|
377
|
-
["https://flic.kr/p/vPvCWJ", [
|
384
|
+
["https://flic.kr/p/vPvCWJ", [5120, 3413, "https://live.staticflickr.com/507/19572004110_1bd49c5ebd_5k.jpg"]],
|
378
385
|
] ],
|
379
386
|
[ :wiki, [
|
380
387
|
["https://en.wikipedia.org/wiki/Prostitution_by_country#/media/File:Prostitution_laws_of_the_world.PNG", "https://upload.wikimedia.org/wikipedia/commons/e/e8/Prostitution_laws_of_the_world.PNG"],
|
@@ -393,24 +400,18 @@ describe DirectLink do
|
|
393
400
|
["http://redd.it/988889", [true, "https://i.redd.it/3h5xls6ehrg11.jpg"]],
|
394
401
|
["https://www.reddit.com/r/CatsStandingUp/duplicates/abn0ua/cat/", [true, "https://v.redd.it/s9b86afb6w721/DASH_2_4_M?source=fallback"]],
|
395
402
|
["https://www.reddit.com/r/hangers/comments/97you5/tara_radovic/", [true, "https://i.imgur.com/rbLqgOu.jpg"]], # "crossport" from Imgur
|
396
|
-
|
397
|
-
|
398
|
-
["https://
|
399
|
-
["https://
|
400
|
-
["https://
|
401
|
-
|
402
|
-
["https://vk.com/photo533531776_456239427?all=1", [750, 938, "https://sun9-25.userapi.com/c849416/v849416600/14b949/V01Ch1gYjhc.jpg"]],
|
403
|
-
["https://vk.com/photo-155488973_456242404", [1486, 1000, "https://sun9-7.userapi.com/c852132/v852132877/8578e/m6AJWiskiKE.jpg"]],
|
404
|
-
["https://vk.com/id2272074?z=photo2272074_264578776%2Fphotos2272074", [604, 484, "https://sun9-10.userapi.com/c10472/u2272074/-7/x_407b2ba2.jpg"]],
|
405
|
-
["https://vk.com/feed?section=likes&z=photo-117564754_456261460%2Fliked3902406", [1024, 1335, "https://sun9-72.userapi.com/c854028/v854028353/895b6/izQJresLdf0.jpg"]],
|
406
|
-
["https://vk.com/likizimy?z=photo-42543351_456239941%2Fwall-42543351_1908", [1179, 1731, "https://sun9-47.userapi.com/c855036/v855036571/60f7b/ryCPJIMyMkI.jpg"]],
|
407
|
-
["https://vk.com/e_rod?z=photo298742340_457247118%2Fphotos298742340", [1728, 2160, "https://sun9-53.userapi.com/c858320/v858320596/c7714/oImGe4o1ZJI.jpg"]],
|
403
|
+
["https://www.reddit.com/gallery/i1u6rb", [true, [["image/jpg", 1440, 1440, "https://preview.redd.it/x31msdj6vee51.jpg?width=1440&format=pjpg&auto=webp&s=b79952f8364bb98692d978944347f19e28774d1b"], ["image/jpg", 2441, 2441, "https://preview.redd.it/mwkzq6j6vee51.jpg?width=2441&format=pjpg&auto=webp&s=455e669356550351e6b8768d8009de616c11142a"], ["image/jpg", 1440, 1440, "https://preview.redd.it/0ws1j8j6vee51.jpg?width=1440&format=pjpg&auto=webp&s=061582da8478e7601a7ce7a97fa1663852873726"], ["image/jpg", 1440, 1440, "https://preview.redd.it/2un68aj6vee51.jpg?width=1440&format=pjpg&auto=webp&s=a980f0e5814c2360f5d7a0fb12f391e304942c06"], ["image/jpg", 3024, 3780, "https://preview.redd.it/5bsfaej6vee51.jpg?width=3024&format=pjpg&auto=webp&s=9b96b4b7262eebacc7571a9f0ad902e2034bf990"], ["image/jpg", 1440, 1440, "https://preview.redd.it/0z010ej6vee51.jpg?width=1440&format=pjpg&auto=webp&s=f0c29be6ec98b835a482c7584cca43fd16217bc8"], ["image/jpg", 1440, 1440, "https://preview.redd.it/aylm2ej6vee51.jpg?width=1440&format=pjpg&auto=webp&s=39cf471b14020a1f137bc9bbb294bf5489cab3e7"]]]], # TODO: find smaller gallery
|
404
|
+
["https://www.reddit.com/i1u6rb", [true, [["image/jpg", 1440, 1440, "https://preview.redd.it/x31msdj6vee51.jpg?width=1440&format=pjpg&auto=webp&s=b79952f8364bb98692d978944347f19e28774d1b"], ["image/jpg", 2441, 2441, "https://preview.redd.it/mwkzq6j6vee51.jpg?width=2441&format=pjpg&auto=webp&s=455e669356550351e6b8768d8009de616c11142a"], ["image/jpg", 1440, 1440, "https://preview.redd.it/0ws1j8j6vee51.jpg?width=1440&format=pjpg&auto=webp&s=061582da8478e7601a7ce7a97fa1663852873726"], ["image/jpg", 1440, 1440, "https://preview.redd.it/2un68aj6vee51.jpg?width=1440&format=pjpg&auto=webp&s=a980f0e5814c2360f5d7a0fb12f391e304942c06"], ["image/jpg", 3024, 3780, "https://preview.redd.it/5bsfaej6vee51.jpg?width=3024&format=pjpg&auto=webp&s=9b96b4b7262eebacc7571a9f0ad902e2034bf990"], ["image/jpg", 1440, 1440, "https://preview.redd.it/0z010ej6vee51.jpg?width=1440&format=pjpg&auto=webp&s=f0c29be6ec98b835a482c7584cca43fd16217bc8"], ["image/jpg", 1440, 1440, "https://preview.redd.it/aylm2ej6vee51.jpg?width=1440&format=pjpg&auto=webp&s=39cf471b14020a1f137bc9bbb294bf5489cab3e7"]]]], # TODO: find smaller gallery
|
405
|
+
["https://www.reddit.com/gallery/i3y7pc", [true, "https://www.reddit.com/gallery/i3y7pc"]], # deleted gallery
|
406
|
+
["https://www.reddit.com/ik6c6a", [true, "https://www.reddit.com/r/Firewatch/comments/ik6brf/new_wallpaper_for_my_triple_monitor_setup/"]], # deleted gallery
|
407
|
+
["https://www.reddit.com/kbjdwc", [true, [["image/jpg", 500, 500, "https://preview.redd.it/71t8ljeexo461.jpg?width=500&format=pjpg&auto=webp&s=df211fe0699e3970681ffe493ed1af79725857e8"], ["image/jpg", 720, 446, "https://preview.redd.it/c11nt7hexo461.jpg?width=720&format=pjpg&auto=webp&s=5e34ab0e6d54c0acfdb47f1daaf283087c5ad6a6"], ["image/jpg", 713, 588, "https://preview.redd.it/67mqvllexo461.jpg?width=713&format=pjpg&auto=webp&s=969dfb52bedd6f0055249aa8b7454b23adaa946e"]]]], # failed media
|
408
|
+
# TODO: empty result? https://redd.it/9hhtsq
|
408
409
|
] ],
|
409
410
|
].each do |method, tests|
|
410
|
-
next if method == :vk && ENV.include?("
|
411
|
-
describe method do
|
411
|
+
next if method == :vk && ENV.include?("CI")
|
412
|
+
describe "kinds of links #{method}" do
|
412
413
|
tests.each_with_index do |(input, expectation), i|
|
413
|
-
it "
|
414
|
+
it "##{i + 1}" do
|
414
415
|
if expectation.is_a? Class
|
415
416
|
assert_raises expectation, input do
|
416
417
|
DirectLink.method(method).call input
|
@@ -424,6 +425,37 @@ describe DirectLink do
|
|
424
425
|
end
|
425
426
|
end
|
426
427
|
|
428
|
+
describe "kinds of links vk" do
|
429
|
+
next if ENV.include? "CI"
|
430
|
+
[
|
431
|
+
["https://vk.com/wall-105984091_7806", [960, 1280, "https://userapi.com/impf/c855224/v855224900/a72f1/7OZ8ux9Wcwo.jpg"]],
|
432
|
+
# ["https://vk.com/wall298742340_4715", [1080, 1080, "https://userapi.com/impf/c857136/v857136625/15e38b/CsCqsJD174A.jpg"]], # TODO: it's now 404
|
433
|
+
["https://vk.com/wall-185182611_454?z=photo-185182611_457239340%2Fwall-185182611_454", [1280, 960, "https://userapi.com/impf/c851028/v851028578/1a62f6/VB4SdR1O6Tg.jpg"]],
|
434
|
+
["https://vk.com/wall-105984091_7946?z=photo-105984091_457243312%2Falbum-105984091_00%2Frev", [1280, 875, "https://userapi.com/impf/c852020/v852020134/1b6b36/0IsDFb-Hda4.jpg"]],
|
435
|
+
["https://vk.com/id57030827?z=photo57030827_456241143%2Falbum57030827_0", [1920, 1440, "https://userapi.com/impf/c845322/v845322944/167836/bP9z41BybhI.jpg"]],
|
436
|
+
["https://vk.com/id57030827?z=photo57030827_456241143", [1920, 1440, "https://userapi.com/impf/c845322/v845322944/167836/bP9z41BybhI.jpg"]],
|
437
|
+
["https://vk.com/photo1_215187843?all=1", [2560, 1913, "https://userapi.com/impf/c210/v210001/6/53_VwoACy4I.jpg"]],
|
438
|
+
["https://vk.com/photo298742340_456243948?rev=1", [1583, 1080, "https://userapi.com/impf/c852224/v852224479/321be/9rZaJ2QTdz4.jpg"]],
|
439
|
+
["https://vk.com/photo-155488973_456242404", [1486, 1000, "https://userapi.com/impf/c852132/v852132877/8578e/m6AJWiskiKE.jpg"]],
|
440
|
+
# ["https://vk.com/id2272074?z=photo2272074_264578776%2Fphotos2272074", [604, 484, "https://userapi.com/impf/c10472/u2272074/-7/x_407b2ba2.jpg"]], # TODO: it's now 404
|
441
|
+
["https://vk.com/feed?section=likes&z=photo-117564754_456261460%2Fliked3902406", [1024, 1335, "https://userapi.com/impf/c854028/v854028353/895b6/izQJresLdf0.jpg"]],
|
442
|
+
["https://vk.com/likizimy?z=photo-42543351_456239941%2Fwall-42543351_1908", [1179, 1731, "https://userapi.com/impf/c855036/v855036571/60f7b/ryCPJIMyMkI.jpg"]],
|
443
|
+
["https://vk.com/e_rod?z=photo298742340_457247118%2Fphotos298742340", [1728, 2160, "https://userapi.com/impf/c858320/v858320596/c7714/oImGe4o1ZJI.jpg"]],
|
444
|
+
].each_with_index do |(input, expectation), i|
|
445
|
+
it "##{i + 1}" do
|
446
|
+
result = DirectLink.method(:vk).call input
|
447
|
+
assert_equal 1, result.size
|
448
|
+
result[0][-1].tap do |url|
|
449
|
+
url.replace( URI.parse(url).tap do |_|
|
450
|
+
_.host = _.host.split(?.).drop(1).join(?.)
|
451
|
+
_.query = nil
|
452
|
+
end.to_s )
|
453
|
+
end
|
454
|
+
assert_equal [expectation], result, "#{input} :: #{result.inspect} != #{expectation.inspect}"
|
455
|
+
end
|
456
|
+
end
|
457
|
+
end
|
458
|
+
|
427
459
|
{
|
428
460
|
google: [
|
429
461
|
"https://lh3.googleusercontent.com/-NVJgqmI_2Is/WqMM2OMYg-I/AAAAAAAALrk/5-p3JL3iZt0Ho9dOf_p3gpddzqwr3Wp0ACJoC/w424-h318-n/001",
|
@@ -441,7 +473,7 @@ describe DirectLink do
|
|
441
473
|
["https://goo.gl/ySqUb5", "https://i.imgur.com/QpOBvRY.png"],
|
442
474
|
],
|
443
475
|
_500px: [
|
444
|
-
%w{ https://500px.com/photo/112134597/milky-way-by-tom-hall https://
|
476
|
+
%w{ https://500px.com/photo/112134597/milky-way-by-tom-hall https://500px.com/photo/112134597/milky-way-by-tom-hall },
|
445
477
|
],
|
446
478
|
flickr: [
|
447
479
|
"https://www.flickr.com/photos/59880970@N07/15773941043/in/dateposted-public/",
|
@@ -456,13 +488,13 @@ describe DirectLink do
|
|
456
488
|
["http://redd.it/32tq0i", "https://www.reddit.com/comments/32tq0i"],
|
457
489
|
["https://reddit.com/123456", "https://www.reddit.com/r/funny/comments/123456/im_thinking_about_getting_a_dog_and_youtubed_ways/"],
|
458
490
|
# ["https://www.reddit.com/r/travel/988889", "https://www.reddit.com/r/travel/comments/988889/playa_miramar_in_guaymas_sonora/"],
|
459
|
-
"https://www.reddit.com/r/
|
491
|
+
"https://www.reddit.com/r/PareidoliaGoneWild/comments/hzrlq6/beard_trimmer_on_display_at_best_buy_they_knew/", # NSFW causes redirect to /over_18? if the special cookie not provided
|
460
492
|
],
|
461
493
|
vk: [
|
462
494
|
"https://vk.com/id57030827?z=photo57030827_456241143",
|
463
495
|
],
|
464
496
|
}.each do |method, tests|
|
465
|
-
describe "DirectLink() calls #{method}" do
|
497
|
+
describe "DirectLink() sees domain name and calls #{method}" do
|
466
498
|
tests.each_with_index do |(input, expected), i|
|
467
499
|
it "##{i + 1}" do
|
468
500
|
DirectLink.stub method, ->link{
|
@@ -598,6 +630,16 @@ describe DirectLink do
|
|
598
630
|
)
|
599
631
|
end
|
600
632
|
|
633
|
+
it "throws ErrorNotFound when Reddit gallery is removed" do
|
634
|
+
assert_raises DirectLink::ErrorNotFound do
|
635
|
+
DirectLink "https://www.reddit.com/gallery/i3y7pc"
|
636
|
+
end
|
637
|
+
end
|
638
|
+
|
639
|
+
it "follows Reddit crosspost" do
|
640
|
+
assert_equal %w{ image/png image/png }, DirectLink("https://www.reddit.com/ik6c6a").map(&:type)
|
641
|
+
end
|
642
|
+
|
601
643
|
it "throws ErrorBadLink if link is invalid" do
|
602
644
|
assert_equal "test".inspect, (
|
603
645
|
assert_raises DirectLink::ErrorBadLink do
|
@@ -648,9 +690,9 @@ describe DirectLink do
|
|
648
690
|
|
649
691
|
describe "other domains tests" do
|
650
692
|
[
|
651
|
-
["http://www.aeronautica.difesa.it/organizzazione/REPARTI/divolo/PublishingImages/6%C2%B0%20Stormo/2013-decollo%20al%20tramonto%20REX%201280.jpg", ["http://www.aeronautica.difesa.it/organizzazione/REPARTI/divolo/PublishingImages/6%C2%B0%20Stormo/2013-decollo%20al%20tramonto%20REX%201280.jpg", 1280, 853, :jpeg], nil, 1],
|
693
|
+
# ["http://www.aeronautica.difesa.it/organizzazione/REPARTI/divolo/PublishingImages/6%C2%B0%20Stormo/2013-decollo%20al%20tramonto%20REX%201280.jpg", ["http://www.aeronautica.difesa.it/organizzazione/REPARTI/divolo/PublishingImages/6%C2%B0%20Stormo/2013-decollo%20al%20tramonto%20REX%201280.jpg", 1280, 853, :jpeg], nil, 1], # website is dead?
|
652
694
|
# ["http://minus.com/lkP3hgRJd9npi", SocketError, /nodename nor servname provided, or not known|No address associated with hostname/, 0],
|
653
|
-
["http://www.cutehalloweencostumeideas.org/wp-content/uploads/2017/10/Niagara-Falls_04.jpg", SocketError, /nodename nor servname provided, or not known|Name or service not known/, 0],
|
695
|
+
["http://www.cutehalloweencostumeideas.org/wp-content/uploads/2017/10/Niagara-Falls_04.jpg", SocketError, /nodename nor servname provided, or not known|Name or service not known|getaddrinfo: Name does not resolve/, 0],
|
654
696
|
].each_with_index do |(input, expectation, message_string_or_regex, max_redirect_resolving_retry_delay), i|
|
655
697
|
it "##{i + 1}" do
|
656
698
|
if expectation.is_a? Class
|
@@ -674,37 +716,40 @@ describe DirectLink do
|
|
674
716
|
describe "giving up" do
|
675
717
|
[
|
676
718
|
["http://example.com", FastImage::UnknownImageType],
|
677
|
-
["https://www.tic.com/index.html", FastImage::UnknownImageType, true],
|
678
|
-
["https://www.tic.com/index.html", 2],
|
719
|
+
# ["https://www.tic.com/index.html", FastImage::UnknownImageType, true], # needs new test or stub
|
720
|
+
# ["https://www.tic.com/index.html", 2], # needs new test or stub
|
679
721
|
["http://imgur.com/HQHBBBD", FastImage::UnknownImageType, true],
|
680
722
|
["http://imgur.com/HQHBBBD", "https://i.imgur.com/HQHBBBD.jpg?fb"], # .at_css("meta[@property='og:image']")
|
681
723
|
["https://www.deviantart.com/nadyasonika/art/Asuka-Langley-Beach-Time-590134861", FastImage::UnknownImageType, true],
|
682
|
-
["https://www.deviantart.com/nadyasonika/art/Asuka-Langley-Beach-Time-590134861", "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/943f66cb-78ad-40f2-a086-44420b98b431/d9rcmz1-5cbc5670-0193-485b-ac14-755ddb9562f4.jpg/v1/fill/w_1024,h_732,q_75,strp/asuka_langley_beach_time_by_nadyasonika_d9rcmz1-fullview.jpg?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.
|
724
|
+
["https://www.deviantart.com/nadyasonika/art/Asuka-Langley-Beach-Time-590134861", "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/943f66cb-78ad-40f2-a086-44420b98b431/d9rcmz1-5cbc5670-0193-485b-ac14-755ddb9562f4.jpg/v1/fill/w_1024,h_732,q_75,strp/asuka_langley_beach_time_by_nadyasonika_d9rcmz1-fullview.jpg?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOiIsImlzcyI6InVybjphcHA6Iiwib2JqIjpbW3siaGVpZ2h0IjoiPD03MzIiLCJwYXRoIjoiXC9mXC85NDNmNjZjYi03OGFkLTQwZjItYTA4Ni00NDQyMGI5OGI0MzFcL2Q5cmNtejEtNWNiYzU2NzAtMDE5My00ODViLWFjMTQtNzU1ZGRiOTU2MmY0LmpwZyIsIndpZHRoIjoiPD0xMDI0In1dXSwiYXVkIjpbInVybjpzZXJ2aWNlOmltYWdlLm9wZXJhdGlvbnMiXX0.L6OhXuQZ_9ovKOdjjuQbvxpD0mG8M_KiqV4ljEDfW3Q"],
|
683
725
|
["https://calgary.skyrisecities.com/news/2019/11/blue-morning-light", "https://cdn.skyrisecities.com/sites/default/files/images/articles/2019/11/39834/39834-132071.jpg"], # og:image without scheme
|
726
|
+
["https://www.reddit.com/r/darksouls3/comments/e59djh/hand_it_over_that_thing_your_wallpaper/", DirectLink::ErrorBadLink, true],
|
727
|
+
["https://www.reddit.com/r/darksouls3/comments/e59djh/hand_it_over_that_thing_your_wallpaper/", 6],
|
684
728
|
].each_with_index do |(input, expectation, giveup), i|
|
685
729
|
it "##{i + 1} (#{URI(input).host}) (giveup=#{!!giveup})" do # to match with minitest `-n` run flag
|
686
|
-
ti = ENV.delete "IMGUR_CLIENT_ID"
|
687
|
-
tr = ENV.delete "REDDIT_SECRETS"
|
730
|
+
ti = ENV.delete "IMGUR_CLIENT_ID" if %w{ imgur com } == URI(input).host.split(?.).last(2)
|
731
|
+
tr = ENV.delete "REDDIT_SECRETS" if %w{ reddit com } == URI(input).host.split(?.).last(2)
|
688
732
|
begin
|
689
733
|
case expectation
|
690
734
|
when Class
|
691
735
|
e = assert_raises expectation, "for #{input} (giveup = #{giveup})" do
|
692
|
-
DirectLink input,
|
736
|
+
DirectLink input, 5, *ENV["PROXY"], giveup: giveup
|
693
737
|
end
|
694
738
|
assert_equal expectation.to_s, e.class.to_s, "for #{input} (giveup = #{giveup})"
|
695
739
|
when String
|
696
|
-
result = DirectLink input,
|
740
|
+
result = DirectLink input, 5, *ENV["PROXY"], giveup: giveup
|
697
741
|
assert_equal expectation, result.url, "for #{input} (giveup = #{giveup})"
|
698
742
|
else
|
699
|
-
result = DirectLink input,
|
743
|
+
result = DirectLink input, 5, *ENV["PROXY"], giveup: giveup
|
700
744
|
result = [result] unless result.is_a? Array # we can't do `Array(<Struct>)` because it splats by elements
|
701
745
|
assert_equal expectation, result.size, ->{
|
702
746
|
"for #{input} (giveup = #{giveup}): #{result.map &:url}"
|
703
747
|
}
|
704
748
|
end
|
749
|
+
# weird that this test may take longer than 5 sec
|
705
750
|
ensure
|
706
|
-
ENV["IMGUR_CLIENT_ID"] = ti
|
707
|
-
ENV["REDDIT_SECRETS"] = tr
|
751
|
+
ENV["IMGUR_CLIENT_ID"] = ti if ti
|
752
|
+
ENV["REDDIT_SECRETS"] = tr if tr
|
708
753
|
end
|
709
754
|
end
|
710
755
|
end
|
@@ -744,21 +789,28 @@ describe DirectLink do
|
|
744
789
|
|
745
790
|
describe "fails" do
|
746
791
|
[
|
747
|
-
[1, "http://example.com/",
|
748
|
-
[1, "http://example.com/404",
|
792
|
+
[1, "http://example.com/", /\AFastImage::UnknownImageType\n\z/],
|
793
|
+
[1, "http://example.com/404", /\ANetHTTPUtils::Error: HTTP error #404 \n\z/],
|
749
794
|
|
750
795
|
# TODO: a test when the giveup=false fails and reraises the DirectLink::ErrorMissingEnvVar
|
751
796
|
# maybe put it to ./lib tests
|
752
797
|
|
753
798
|
# by design it should be impossible to write a test for DirectLink::ErrorAssert
|
754
|
-
[1, "https://flic.kr/p/DirectLinkErrorNotFound",
|
799
|
+
[1, "https://flic.kr/p/DirectLinkErrorNotFound", /\ANetHTTPUtils::Error: HTTP error #404 \n\z/],
|
755
800
|
|
756
|
-
[1, "https://imgur.com/a/badlinkpattern",
|
801
|
+
[1, "https://imgur.com/a/badlinkpattern", /\ANetHTTPUtils::Error: HTTP error #404 \n\z/],
|
757
802
|
# TODO: a test that it appends the `exception.cause`
|
803
|
+
|
804
|
+
[1, "https://groundingpositivity.com/2020/08/13/new-quantum-app-will-make-you-wonder-do-we-live-in-a-simulation/", (
|
805
|
+
Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.4.0") ?
|
806
|
+
/\ANetHTTPUtils::EOFError_from_rbuf_fill: probably the old Ruby empty backtrace EOFError exception from net\/protocol\.rb: end of file reached\n\z/ :
|
807
|
+
/\A\S+\/net\/protocol\.rb:\d+:in `rbuf_fill': end of file reached \(EOFError\)\n/
|
808
|
+
) ], # TODO: add also a test to nethttputils gem
|
758
809
|
].each_with_index do |(expected_exit_code, link, expected_output, unset), i| # TODO: unset is not used anymore or I have to go sleep?
|
759
810
|
it "##{i + 1}" do
|
760
811
|
string, status = Open3.capture2e "export #{(File.read("api_tokens_for_travis.sh") + File.read("vk.secret")).scan(/(?<=^export )\S+=\S+/).join(" ")}#{unset} && RUBYOPT='-rbundler/setup #{$-I.map{ |i| "-I #{i}" }.join " "}' ./bin/directlink #{link}"
|
761
|
-
assert_equal
|
812
|
+
assert_equal expected_exit_code, status.exitstatus, "for #{link}"
|
813
|
+
assert string[expected_output], "for #{link}"
|
762
814
|
end
|
763
815
|
end
|
764
816
|
end
|
@@ -813,12 +865,13 @@ describe DirectLink do
|
|
813
865
|
# TODO: test about --json
|
814
866
|
it "uses <meta> tag" do
|
815
867
|
string, status = Open3.capture2e "RUBYOPT='-rbundler/setup' ./bin/directlink --json https://www.kp.ru/daily/26342.7/3222103/"
|
816
|
-
assert_equal [0, "https://
|
817
|
-
end
|
818
|
-
it "ignores <meta> tag" do
|
819
|
-
string, status = Open3.capture2e "RUBYOPT='-rbundler/setup' ./bin/directlink --json --ignore-meta https://www.kp.ru/daily/26342.7/3222103/"
|
820
|
-
assert_equal [0, 21, "https://s11.stc.all.kpcdn.net/share/i/12/8024261/wx1080.jpg"], [status.exitstatus, JSON.load(string).size, JSON.load(string).first.fetch("url")]
|
868
|
+
assert_equal [0, "https://s11.stc.all.kpcdn.net/share/i/12/8054352/cr-1200-630.wm-asnplfru-100-tr-0-0.t-13-3222103-ttps-54-14-0083CD-1010-l-85-b-42.t-13-3222103-ttps-54-14-FFF-1010-l-85-b-42.t-207-5-asb-37-10-FFF-788-l-370-t-68.m2018-03-14T02-10-20.jpg"], [status.exitstatus, JSON.load(string).fetch("url")]
|
821
869
|
end
|
870
|
+
# TODO: kp.ru broke the page -- images are gone
|
871
|
+
# it "ignores <meta> tag" do
|
872
|
+
# string, status = Open3.capture2e "RUBYOPT='-rbundler/setup' ./bin/directlink --json --ignore-meta https://www.kp.ru/daily/26342.7/3222103/"
|
873
|
+
# assert_equal [0, 21, "https://s11.stc.all.kpcdn.net/share/i/12/8024261/inx960x640.jpg"], [status.exitstatus, JSON.load(string).size, JSON.load(string).first.fetch("url")]
|
874
|
+
# end
|
822
875
|
|
823
876
|
end
|
824
877
|
|