directlink 0.0.8.6 → 0.0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/directlink +2 -10
- data/directlink.gemspec +6 -7
- data/lib/directlink.rb +93 -53
- data/test.rb +96 -43
- metadata +8 -31
- data/.bashrc +0 -4
- data/.travis.yml +0 -37
- data/Gemfile +0 -3
- data/README.md +0 -198
- data/Rakefile +0 -1
- data/api_tokens_for_travis.sh +0 -8
- data/gplus.txt +0 -1454
- data/reddit_token_for_travis.yaml +0 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f745bed3faf2b74dfe4532357fbb772fe32598b1
|
4
|
+
data.tar.gz: 203127452f8e51be364fff16a4c1eafa8a0a21df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 75bd9d5351c48c4f1df45b76f4835abc52bd057eabd01f4615a2056c16f4f9cf57546c3ff7123f479836206d1bb3164a740bebd2d81b84899b5507ee74d0e178
|
7
|
+
data.tar.gz: 6e20c47d7270b6bf3b0920ea964337b5143e255eb789baac70823218c9b689b13a6b16abdc729a3c134a2b5cf48fccfa074ec64bebac6ed835a32764b8f5726c
|
data/bin/directlink
CHANGED
@@ -53,6 +53,7 @@ abort "usage: directlink [--debug] [--json] [--github] [--ignore-meta] <link1> <
|
|
53
53
|
}" if [nil, "-h", "--help", "-v", "--version"].include? ARGV.first
|
54
54
|
|
55
55
|
begin
|
56
|
+
# Struct instances have #each and Array() ruins them so we use .is_a?(Array)
|
56
57
|
if json
|
57
58
|
require "json"
|
58
59
|
t = ARGV.map do |link|
|
@@ -67,16 +68,7 @@ begin
|
|
67
68
|
(t.is_a?(Array) ? t : [t]).each{ |s| puts "=> #{s.url}\n #{s.type} #{s.width}x#{s.height}" }
|
68
69
|
end
|
69
70
|
end
|
70
|
-
rescue
|
71
|
-
Net::OpenTimeout,
|
72
|
-
Errno::ECONNRESET,
|
73
|
-
NetHTTPUtils::Error,
|
74
|
-
FastImage::UnknownImageType,
|
75
|
-
FastImage::ImageFetchFailure,
|
76
|
-
# DirectLink::ErrorMissingEnvVar,
|
77
|
-
# DirectLink::ErrorAssert,
|
78
|
-
DirectLink::ErrorNotFound,
|
79
|
-
DirectLink::ErrorBadLink => e
|
71
|
+
rescue *DirectLink::NORMAL_EXCEPTIONS => e
|
80
72
|
puts e.backtrace if debug
|
81
73
|
cause = e.cause if e.cause if e.respond_to? :cause
|
82
74
|
c = e.class.to_s
|
data/directlink.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "directlink"
|
3
|
-
spec.version = "0.0.
|
4
|
-
spec.summary = "
|
3
|
+
spec.version = "0.0.9.2"
|
4
|
+
spec.summary = "obtains from any kind of hyperlink a link to an image, its format and resolution"
|
5
5
|
|
6
6
|
spec.author = "Victor Maslov aka Nakilon"
|
7
7
|
spec.email = "nakilon@gmail.com"
|
@@ -11,19 +11,18 @@ Gem::Specification.new do |spec|
|
|
11
11
|
|
12
12
|
spec.add_dependency "fastimage", "~>2.1.3"
|
13
13
|
spec.add_dependency "nokogiri"
|
14
|
-
spec.add_dependency "nethttputils", "~>0.
|
15
|
-
spec.add_dependency "reddit_bot", "~>1.7.
|
14
|
+
spec.add_dependency "nethttputils", "~>0.4.1.0"
|
15
|
+
spec.add_dependency "reddit_bot", "~>1.7.8"
|
16
16
|
spec.add_dependency "kramdown"
|
17
17
|
spec.add_dependency "addressable"
|
18
18
|
spec.add_development_dependency "minitest"
|
19
|
-
spec.add_development_dependency "byebug"
|
20
19
|
|
21
20
|
spec.require_path = "lib"
|
22
21
|
spec.bindir = "bin"
|
23
22
|
spec.executable = "directlink"
|
24
23
|
spec.test_file = "test.rb"
|
25
|
-
spec.files =
|
24
|
+
spec.files = %w{ LICENSE directlink.gemspec lib/directlink.rb bin/directlink }
|
26
25
|
|
27
|
-
spec.requirements << "you may
|
26
|
+
spec.requirements << "you may want to create apps and provide API tokens:"
|
28
27
|
spec.requirements << "IMGUR_CLIENT_ID, FLICKR_API_KEY, REDDIT_SECRETS"
|
29
28
|
end
|
data/lib/directlink.rb
CHANGED
@@ -2,11 +2,10 @@ module DirectLink
|
|
2
2
|
|
3
3
|
class << self
|
4
4
|
attr_accessor :silent
|
5
|
-
end
|
6
|
-
self.silent = false
|
7
|
-
class << self
|
8
5
|
attr_accessor :logger
|
6
|
+
attr_accessor :timeout
|
9
7
|
end
|
8
|
+
self.silent = false
|
10
9
|
self.logger = Object.new
|
11
10
|
self.logger.define_singleton_method :error do |str|
|
12
11
|
puts str unless Module.nesting.first.silent
|
@@ -35,6 +34,20 @@ module DirectLink
|
|
35
34
|
end
|
36
35
|
end
|
37
36
|
|
37
|
+
require "nethttputils"
|
38
|
+
require "fastimage"
|
39
|
+
NORMAL_EXCEPTIONS = [
|
40
|
+
SocketError,
|
41
|
+
Net::OpenTimeout,
|
42
|
+
Errno::ECONNRESET,
|
43
|
+
NetHTTPUtils::Error,
|
44
|
+
NetHTTPUtils::EOFError_from_rbuf_fill,
|
45
|
+
FastImage::UnknownImageType,
|
46
|
+
FastImage::ImageFetchFailure,
|
47
|
+
DirectLink::ErrorNotFound,
|
48
|
+
DirectLink::ErrorBadLink,
|
49
|
+
] # the only exceptions gem user should expect and handle
|
50
|
+
|
38
51
|
|
39
52
|
def self.google src, width = 0
|
40
53
|
# this can handle links without schema because it's used for parsing community HTML pages
|
@@ -47,7 +60,7 @@ module DirectLink
|
|
47
60
|
when /\A(\/\/lh3\.googleusercontent\.com\/cOh2Nsv7EGo0QbuoKxoKZVZO_NcBzufuvPtzirMJfPmAzCzMtnEncfA7zGIDTJfkc1YZFX2MhgKnjA=)w530-h398-p\z/
|
48
61
|
"https:#{$1}s#{width}/"
|
49
62
|
when /\A(\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9-]{11}\/[VW][a-zA-Z0-9_-]{9}I\/AAAAAAA[AC][a-zA-Z0-9]{3}\/[a-zA-Z0-9_-]{32}[gwAQ]CJoC\/)w530-h[23]\d\d-p\/[^\/]+\z/,
|
50
|
-
/\A(?:https?:)?(\/\/[1-4]\.bp\.blogspot\.com\/-[a-zA-Z0-9_-]{11}\/[UVWX][a-zA-Z0-9_-]{9}I\/AAAAAAAA[A-Z][a-zA-Z0-9_-]{2}\/[a-zA-Z0-9_-]{33}C(?:EwYBhgL|(?:Lc|Kg)BGAs)\/)(?:s640|w\d
|
63
|
+
/\A(?:https?:)?(\/\/[1-4]\.bp\.blogspot\.com\/-[a-zA-Z0-9_-]{11}\/[UVWX][a-zA-Z0-9_-]{9}I\/AAAAAAAA[A-Z][a-zA-Z0-9_-]{2}\/[a-zA-Z0-9_-]{33}C(?:EwYBhgL|(?:Lc|Kg)BGAs(?:YHQ)?)\/)(?:s640|w\d{2,4}-h\d\d\d?-p(?:-k-no-nu)?)\/[^\/]+\z/,
|
51
64
|
/\A(?:https?:)?(\/\/[1-4]\.bp\.blogspot\.com\/-[a-zA-Z0-9-]{11}\/[UV][a-zA-Z0-9_-]{9}I\/AAAAAAAA[A-Z][a-zA-Z0-9]{2}\/[a-zA-Z0-9-]{11}\/)w72-h72-p-k-no-nu\/[^\/]+\z/
|
52
65
|
"https:#{$1}s#{width}/"
|
53
66
|
when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9_]{11}\/AAAAAAAAAAI\/AAAAAAAAAAQ\/[a-zA-Z0-9_]{11}\/)w530-h[13]\d\d-n\/[^\/]+\z/,
|
@@ -76,10 +89,9 @@ module DirectLink
|
|
76
89
|
end
|
77
90
|
|
78
91
|
require "json"
|
79
|
-
require "nethttputils"
|
80
92
|
|
81
93
|
# TODO make the timeout handling respect the way the Directlink method works with timeouts
|
82
|
-
def self.imgur link, timeout =
|
94
|
+
def self.imgur link, timeout = 2000
|
83
95
|
raise ErrorMissingEnvVar.new "define IMGUR_CLIENT_ID env var" unless ENV["IMGUR_CLIENT_ID"]
|
84
96
|
|
85
97
|
request_data = lambda do |url|
|
@@ -107,16 +119,16 @@ module DirectLink
|
|
107
119
|
elsif data["images"]
|
108
120
|
raise ErrorNotFound.new link.inspect if data["images"].empty?
|
109
121
|
data["images"]
|
110
|
-
elsif data["type"] && data["type"]
|
122
|
+
elsif data["type"] && %w{ image/jpeg image/png image/gif video/mp4 }.include?(data["type"])
|
111
123
|
# TODO check if this branch is possible at all
|
112
124
|
[ data ]
|
113
125
|
# elsif data["comment"]
|
114
126
|
# fi["https://imgur.com/" + data["image_id"]]
|
115
127
|
else
|
116
128
|
# one day single-video item should hit this but somehow it didn't yet
|
117
|
-
raise ErrorAssert.new "unknown data format #{
|
129
|
+
raise ErrorAssert.new "unknown data format #{json} for #{link}"
|
118
130
|
end
|
119
|
-
when /\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/([a-zA-Z0-9]{7,8})(?:\.(?:gifv|
|
131
|
+
when /\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/([a-zA-Z0-9]{7,8})(?:\.(?:gifv|jpe?g(?:\?fb)?|png))?\z/,
|
120
132
|
/\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/([a-zA-Z0-9]{5})\.mp4\z/,
|
121
133
|
/\Ahttps?:\/\/imgur\.com\/([a-zA-Z0-9]{5}(?:[a-zA-Z0-9]{2})?)\z/,
|
122
134
|
/\Ahttps?:\/\/imgur\.com\/([a-zA-Z0-9]{7})(?:\?\S+)?\z/,
|
@@ -128,7 +140,7 @@ module DirectLink
|
|
128
140
|
raise ErrorBadLink.new link
|
129
141
|
end.map do |image|
|
130
142
|
case image["type"]
|
131
|
-
when
|
143
|
+
when *%w{ image/jpeg image/png image/gif video/mp4 }
|
132
144
|
image.values_at "link", "width", "height", "type"
|
133
145
|
else
|
134
146
|
raise ErrorAssert.new "unknown type of #{link}: #{image}"
|
@@ -137,7 +149,7 @@ module DirectLink
|
|
137
149
|
end
|
138
150
|
|
139
151
|
def self._500px link
|
140
|
-
raise ErrorBadLink.new link unless %r{\Ahttps://500px\.com/photo/(?<id>[^/]+)/[
|
152
|
+
raise ErrorBadLink.new link unless %r{\Ahttps://500px\.com/photo/(?<id>[^/]+)/[-[a-zA-Z0-9]%]+\/?\z} =~ link
|
141
153
|
require "nokogiri"
|
142
154
|
resp = NetHTTPUtils.request_data link
|
143
155
|
f = lambda do |form|
|
@@ -191,11 +203,11 @@ module DirectLink
|
|
191
203
|
attr_accessor :reddit_bot
|
192
204
|
end
|
193
205
|
def self.reddit link, timeout = 1000
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
206
|
+
return [true, link] if URI(link).host &&
|
207
|
+
URI(link).host.split(?.) == %w{ i redd it } &&
|
208
|
+
URI(link).path[/\A\/[a-z0-9]{12,13}\.(gif|jpg)\z/]
|
209
|
+
unless id = link[/\Ahttps:\/\/www\.reddit\.com\/gallery\/([0-9a-z]{5,6})\z/, 1]
|
210
|
+
raise DirectLink::ErrorBadLink.new link unless id = URI(link).path[/\A(?:\/r\/[0-9a-zA-Z_]+)?(?:\/comments|\/duplicates)?\/([0-9a-z]{5,6})(?:\/|\z)/, 1]
|
199
211
|
end
|
200
212
|
retry_on_json_parseerror = lambda do |&b|
|
201
213
|
t = 1
|
@@ -221,14 +233,19 @@ module DirectLink
|
|
221
233
|
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" unless json.size == 2
|
222
234
|
json.find{ |_| _["data"]["children"].first["kind"] == "t3" }
|
223
235
|
end
|
236
|
+
# TODO: do we handle linking Imgur albums?
|
224
237
|
data = json["data"]["children"].first["data"]
|
225
|
-
if data["media"]
|
226
|
-
return [true, data["media"]["reddit_video"]["fallback_url"]]
|
227
|
-
else
|
238
|
+
if data["media"]
|
239
|
+
return [true, data["media"]["reddit_video"]["fallback_url"]] if data["media"]["reddit_video"]
|
228
240
|
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" unless data["media"].keys.sort == %w{ oembed type } && %w{ youtube.com gfycat.com imgur.com }.include?(data["media"]["type"])
|
229
241
|
return [true, data["media"]["oembed"]["thumbnail_url"]]
|
230
|
-
end
|
231
|
-
return [true, data["
|
242
|
+
end
|
243
|
+
return [true, data["media_metadata"].values.map do |media|
|
244
|
+
next if media == {"status"=>"failed"}
|
245
|
+
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" unless media["status"] == "valid"
|
246
|
+
[media["m"], *media["s"].values_at("x", "y"), CGI.unescapeHTML(media["s"]["u"])]
|
247
|
+
end.compact] if data["media_metadata"]
|
248
|
+
return [true, "#{"https://www.reddit.com" if /\A\/r\/[0-9a-zA-Z_]+\/comments\/[0-9a-z]{5,6}\// =~ data["url"]}#{data["url"]}"] if data["crosspost_parent"]
|
232
249
|
return [true, data["url"]] unless data["is_self"]
|
233
250
|
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" if data["url"] != "https://www.reddit.com" + data["permalink"]
|
234
251
|
return [false, data["selftext"]]
|
@@ -236,23 +253,30 @@ module DirectLink
|
|
236
253
|
|
237
254
|
def self.vk link
|
238
255
|
id, mtd, field, f = case link
|
239
|
-
when %r{\Ahttps://vk\.com/id(?<user_id>\d+)\?z=photo(?<id>\k<user_id>_\d+)(%2F(album\k<user_id>_0|photos\k<user_id>))
|
256
|
+
when %r{\Ahttps://vk\.com/id(?<user_id>\d+)\?z=photo(?<id>\k<user_id>_\d+)(%2F(album\k<user_id>_0|photos\k<user_id>))?\z},
|
240
257
|
%r{\Ahttps://vk\.com/[a-z_]+\?z=photo(?<_>)(?<id>(?<user_id>\d+)_\d+)%2Fphotos\k<user_id>\z},
|
241
|
-
%r{\Ahttps://vk\.com/photo(?<_>)(?<id>-?\d+_\d+)(\?all=1)?\z},
|
258
|
+
%r{\Ahttps://vk\.com/photo(?<_>)(?<id>-?\d+_\d+)(\?(?:all|rev)=1)?\z},
|
242
259
|
%r{\Ahttps://vk\.com/feed\?section=likes&z=photo(?<_>)(?<id>-(?<user_id>\d+)_\d+)%2F(liked\d+|album\k<user_id>_0)\z},
|
243
260
|
%r{\Ahttps://vk\.com/[a-z_]+\?z=photo(?<_>)(?<id>(?<user_id>-\d+)_\d+)%2F(wall\k<user_id>_\d+|album\k<user_id>_0)\z},
|
244
261
|
%r{\Ahttps://vk\.com/wall(?<user_id>-\d+)_\d+\?z=photo(?<id>\k<user_id>_\d+)%2F(wall\k<user_id>_\d+|album\k<user_id>_00%2Frev|\d+)\z}
|
245
262
|
[$2, :photos, :photos, lambda do |t|
|
246
263
|
raise ErrorAssert.new "our knowledge about VK API seems to be outdated" unless 1 == t.size
|
247
|
-
t
|
264
|
+
t
|
248
265
|
end ]
|
249
|
-
when %r{\Ahttps://vk\.com/wall(?<id
|
266
|
+
when %r{\Ahttps://vk\.com/wall(?<id>-?\d+_\d+)\z},
|
267
|
+
%r{\Ahttps://vk\.com/[a-z\.]+\?w=wall(?<id>\d+_\d+)\z}
|
250
268
|
[$1, :wall, :posts, lambda do |t|
|
251
|
-
t.first.fetch("attachments").
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
269
|
+
t.first.fetch("attachments").select do |item|
|
270
|
+
case item.keys
|
271
|
+
when %w{ type photo }
|
272
|
+
raise ErrorAssert.new "our knowledge about VK API seems to be outdated" unless item["type"] == "photo"
|
273
|
+
next true
|
274
|
+
when %w{ type audio }
|
275
|
+
raise ErrorAssert.new "our knowledge about VK API seems to be outdated" unless item["type"] == "audio"
|
276
|
+
else
|
277
|
+
raise ErrorAssert.new "our knowledge about VK API seems to be outdated"
|
278
|
+
end
|
279
|
+
end.map{ |i| i.fetch "photo" }
|
256
280
|
end ]
|
257
281
|
else
|
258
282
|
raise ErrorBadLink.new link
|
@@ -261,21 +285,22 @@ module DirectLink
|
|
261
285
|
sleep 0.25 # "error_msg"=>"Too many requests per second"
|
262
286
|
f.call( JSON.load( NetHTTPUtils.request_data "https://api.vk.com/method/#{mtd}.getById",
|
263
287
|
:POST, form: { field => id, :access_token => ENV["VK_ACCESS_TOKEN"], :client_secret => ENV["VK_CLIENT_SECRET"], :v => "5.101" }
|
264
|
-
).fetch("response") ).
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
288
|
+
).fetch("response") ).map do |photos|
|
289
|
+
photos.fetch("sizes").map do |size|
|
290
|
+
size.values_at("width", "height", "url").tap do |whu|
|
291
|
+
w, h, u = whu
|
292
|
+
whu[0, 2] = FastImage.new(u, raise_on_failure: true).size if [w, h].include? 0
|
293
|
+
end
|
294
|
+
end.max_by{ |w, h, u| w * h }
|
295
|
+
end
|
270
296
|
end
|
271
297
|
|
272
298
|
class_variable_set :@@directlink, Struct.new(:url, :width, :height, :type)
|
273
299
|
end
|
274
300
|
|
275
301
|
|
276
|
-
|
277
|
-
|
278
|
-
def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
302
|
+
def DirectLink link, timeout = nil, proxy = nil, giveup: false, ignore_meta: false
|
303
|
+
timeout ||= DirectLink.timeout
|
279
304
|
ArgumentError.new("link should be a <String>, not <#{link.class}>") unless link.is_a? String
|
280
305
|
begin
|
281
306
|
URI link
|
@@ -313,7 +338,7 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
313
338
|
**( %w{ reddit com } == URI(link).host.split(?.).last(2) ||
|
314
339
|
%w{ redd it } == URI(link).host.split(?.) ? {Cookie: "over18=1"} : {} ),
|
315
340
|
}
|
316
|
-
head = NetHTTPUtils.request_data link, :
|
341
|
+
head = NetHTTPUtils.request_data link, :HEAD, header: header, **(proxy ? {proxy: proxy} : {}), **(timeout ? {
|
317
342
|
timeout: timeout,
|
318
343
|
max_start_http_retry_delay: timeout,
|
319
344
|
max_read_retry_delay: timeout
|
@@ -369,29 +394,40 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
369
394
|
raise DirectLink::ErrorBadLink.new link if giveup # TODO: print original url in such cases if there was a recursion
|
370
395
|
f = ->_{ _.type == :a ? _.attr["href"] : _.children.flat_map(&f) }
|
371
396
|
require "kramdown"
|
372
|
-
return f[Kramdown::Document.new(u).root].
|
373
|
-
DirectLink URI.join(link, sublink).to_s, timeout, giveup: giveup
|
397
|
+
return f[Kramdown::Document.new(u).root].flat_map do |sublink|
|
398
|
+
DirectLink URI.join(link, sublink).to_s, timeout, giveup: giveup # TODO: maybe subtract from timeout the time we've already wasted
|
399
|
+
end
|
400
|
+
end
|
401
|
+
if u.is_a? Hash
|
402
|
+
return struct.new *u.values_at(*%w{ fallback_url width height }), "video"
|
403
|
+
elsif u.is_a? Array
|
404
|
+
return u.map do |t, x, y, u|
|
405
|
+
struct.new u, x, y, t
|
374
406
|
end
|
375
407
|
end
|
376
|
-
|
377
|
-
return DirectLink u
|
378
|
-
fail if link == u
|
408
|
+
raise DirectLink::ErrorNotFound.new link.inspect if link == u
|
409
|
+
return DirectLink u, timeout, giveup: giveup
|
379
410
|
rescue DirectLink::ErrorMissingEnvVar
|
380
411
|
end if %w{ reddit com } == URI(link).host.split(?.).last(2) ||
|
381
412
|
%w{ redd it } == URI(link).host.split(?.)
|
382
413
|
|
383
414
|
begin
|
384
|
-
w, h, u
|
385
|
-
|
415
|
+
return DirectLink.vk(link).map do |w, h, u|
|
416
|
+
struct.new u, w, h
|
417
|
+
end
|
386
418
|
rescue DirectLink::ErrorMissingEnvVar
|
387
419
|
end if %w{ vk com } == URI(link).host.split(?.)
|
388
420
|
|
389
421
|
begin
|
390
|
-
f = FastImage.new
|
422
|
+
f = FastImage.new link,
|
423
|
+
raise_on_failure: true,
|
424
|
+
timeout: timeout,
|
425
|
+
**(proxy ? {proxy: "http://#{proxy}"} : {}),
|
426
|
+
http_header: {"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"}
|
391
427
|
rescue FastImage::UnknownImageType
|
392
428
|
raise if giveup
|
393
429
|
require "nokogiri"
|
394
|
-
head = NetHTTPUtils.request_data link, :
|
430
|
+
head = NetHTTPUtils.request_data link, :HEAD, header: {"User-Agent" => "Mozilla"},
|
395
431
|
max_start_http_retry_delay: timeout,
|
396
432
|
timeout: timeout, # NetHTTPUtild passes this as read_timeout to Net::HTTP.start
|
397
433
|
max_read_retry_delay: timeout # and then compares accumulated delay to this
|
@@ -402,7 +438,10 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
402
438
|
end
|
403
439
|
html = Nokogiri::HTML NetHTTPUtils.request_data link, header: {"User-Agent" => "Mozilla"}
|
404
440
|
if t = html.at_css("meta[@property='og:image']")
|
405
|
-
|
441
|
+
begin
|
442
|
+
return DirectLink URI.join(link, t[:content]).to_s, nil, *proxy, giveup: true
|
443
|
+
rescue URI::InvalidURIError
|
444
|
+
end
|
406
445
|
end unless ignore_meta
|
407
446
|
h = {} # TODO: maybe move it outside because of possible img[:src] recursion?...
|
408
447
|
l = lambda do |node, s = []|
|
@@ -416,9 +455,10 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
416
455
|
end
|
417
456
|
end
|
418
457
|
end
|
419
|
-
l[html].
|
420
|
-
raise if results.empty?
|
421
|
-
|
458
|
+
l[html].
|
459
|
+
tap{ |results| raise if results.empty? }.
|
460
|
+
group_by(&:first).map{ |k, v| [k.join(?>), v.map(&:last)] }.
|
461
|
+
max_by{ |_, v| v.map{ |i| i.width * i.height }.inject(:+) }.last
|
422
462
|
else
|
423
463
|
# TODO: maybe move this to right before `rescue` line
|
424
464
|
w, h = f.size
|
data/test.rb
CHANGED
@@ -13,6 +13,8 @@ fail unless ENV.include? "REDDIT_SECRETS"
|
|
13
13
|
|
14
14
|
require_relative "lib/directlink"
|
15
15
|
DirectLink.silent = true
|
16
|
+
DirectLink.timeout = 30 # TODO: tests about this attribute
|
17
|
+
|
16
18
|
describe DirectLink do
|
17
19
|
|
18
20
|
describe "./lib" do
|
@@ -171,6 +173,7 @@ describe DirectLink do
|
|
171
173
|
http://1.bp.blogspot.com/-iSU4orVuR9Y/VFYrwQZ5qYI/AAAAAAAAMnc/WY4VfCaeplw/w72-h72-p-k-no-nu/Wolf%2Bphotography2.jpg
|
172
174
|
http://1.bp.blogspot.com/-vPQSh6RKijU/VEi7r3D-jJI/AAAAAAAAL2Q/bGHmyuoDp5M/w72-h72-p-k-no-nu/Building%2BIn%2BLondon1-4__880.jpeg
|
173
175
|
http://1.bp.blogspot.com/-W4xKJSsVf3M/Uz73jPlctbI/AAAAAAAAGz4/K8Tw6PILMeY/w72-h72-p-k-no-nu/Beautiful+Japanese+places4.jpg
|
176
|
+
https://1.bp.blogspot.com/-__qsdLxNtcQ/XhaOQle-ECI/AAAAAAAABQ4/S_7SGG_K8eQ7VXIU2wyPvTj9OyBfr_1sQCLcBGAsYHQ/w1200-h630-p-k-no-nu/iceland_poppies_orange_flowers_field-wallpaper-3840x2160.jpg
|
174
177
|
https://lh3.googleusercontent.com/-tV86KJvppss/XE2Nb2Z2aAI/AAAAAAAAGu4/94E_AuB4YWAaJ59n43wmmd9rFa--OUuSQCJoC/w530-h338-n/IMG_6845%252C.png
|
175
178
|
https://lh3.googleusercontent.com/-cr-2ZSQGMPg/XFWLfetwr7I/AAAAAAAAQQQ/TbwDk56BBIwb4IDDO0SwfArFSZyDG0i0wCJoC/w530-h360-n/DSC07294.JPG
|
176
179
|
}.each_with_index do |link, i|
|
@@ -222,7 +225,7 @@ describe DirectLink do
|
|
222
225
|
|
223
226
|
# TODO: expand this for every branch in lib
|
224
227
|
%w{
|
225
|
-
https_long_blogspot https://
|
228
|
+
https_long_blogspot https://1.bp.blogspot.com/-__qsdLxNtcQ/XhaOQle-ECI/AAAAAAAABQ4/S_7SGG_K8eQ7VXIU2wyPvTj9OyBfr_1sQCLcBGAsYHQ/w1200-h630-p-k-no-nu/iceland_poppies_orange_flowers_field-wallpaper-3840x2160.jpg https://1.bp.blogspot.com/-__qsdLxNtcQ/XhaOQle-ECI/AAAAAAAABQ4/S_7SGG_K8eQ7VXIU2wyPvTj9OyBfr_1sQCLcBGAsYHQ/s0/
|
226
229
|
http_short_blogspot http://4.bp.blogspot.com/-poH-QXn7YGg/U-3ZTDkeF_I/AAAAAAAAISE/ms2gNIb-v-g/w72-h72-p-k-no-nu/Top-24-Inspired-181.jpg https://4.bp.blogspot.com/-poH-QXn7YGg/U-3ZTDkeF_I/AAAAAAAAISE/ms2gNIb-v-g/s0/
|
227
230
|
just_gplus https://lh3.googleusercontent.com/-NiGph3ObOPg/XE3DgnavXlI/AAAAAAABvgE/pcPPCe88rsU1r941wwP76TVf_o89i74kwCJoC/w530-h353-n/DSCF0753.JPG https://lh3.googleusercontent.com/-NiGph3ObOPg/XE3DgnavXlI/AAAAAAABvgE/pcPPCe88rsU1r941wwP76TVf_o89i74kwCJoC/s0/
|
228
231
|
google_keep https://lh5.googleusercontent.com/fRmAL_04p7oomNHCiV4tH4-agHSDBtLaWi_Tb6bgE5ZSHVu5OjQF3iRn06nNwP3ywZwdFP92zWM-o8yw0cn6m0tDTBARuO6F9e0wYu_1=s685 https://lh5.googleusercontent.com/fRmAL_04p7oomNHCiV4tH4-agHSDBtLaWi_Tb6bgE5ZSHVu5OjQF3iRn06nNwP3ywZwdFP92zWM-o8yw0cn6m0tDTBARuO6F9e0wYu_1=s0
|
@@ -328,9 +331,11 @@ describe DirectLink do
|
|
328
331
|
["https://imgur.com/9yaMdJq", "https://i.imgur.com/9yaMdJq.mp4", 720, 404, "video/mp4"],
|
329
332
|
["http://imgur.com/gallery/dCQprEq/new", "https://i.imgur.com/dCQprEq.jpg", 5760, 3840, "image/jpeg"],
|
330
333
|
["https://i.imgur.com/fFUTSJu.jpg?fb", "https://i.imgur.com/fFUTSJu.jpg", 1469, 2200, "image/jpeg"], # from reddit.com/93mtba
|
334
|
+
["https://i.imgur.com/IxUrhGX.jpeg", "https://i.imgur.com/IxUrhGX.jpg", 4384, 3012, "image/jpeg"], # jpEg
|
335
|
+
["https://imgur.com/gallery/9f2s9EE", "https://i.imgur.com/9f2s9EE.mp4", 960, 1438, "video/mp4"], # mp4
|
331
336
|
].each_with_index do |t, i|
|
332
337
|
url, n, first, last, type = t
|
333
|
-
it "##{i + 1}" do
|
338
|
+
it "kinds of post ##{i + 1}" do
|
334
339
|
case last
|
335
340
|
when NilClass
|
336
341
|
if n.is_a? Class
|
@@ -363,6 +368,8 @@ describe DirectLink do
|
|
363
368
|
[
|
364
369
|
[ :_500px, [
|
365
370
|
["https://500px.com/photo/264092015/morning-rider-by-tiger-seo", [1200, 800, "https://drscdn.500px.org/photo/264092015/m%3D900/v2?sig=68a9206477f573d8e2838faa6a929e7267f22dc5f9e98f1771f7a8a63efa2ed7", "jpeg"]],
|
371
|
+
["https://500px.com/photo/1017579834/-poppies-flowers-by-David-Dubnitskiy/", [1819, 2500, "https://drscdn.500px.org/photo/1017579834/m%3D900/v2?sig=022e3e9dd836ffd8c1d31ae26c83735e4e42b4c8733d0c4380d8270aebbca44e", "jpeg"]],
|
372
|
+
["https://500px.com/photo/1017557263/iss%E5%87%8C%E6%97%A5%E5%81%8F%E9%A3%9F-by-%E7%A7%8B%E8%A3%A4Choku-/", [2048, 2048, "https://drscdn.500px.org/photo/1017557263/m%3D2048/v2?sig=1994a09e33794117082e91fa58c40614a2bfd19d3e0dd78e067968d38aca92be", "jpeg"]]
|
366
373
|
] ],
|
367
374
|
[ :flickr, [
|
368
375
|
["https://www.flickr.com/photos/tomas-/17220613278/", DirectLink::ErrorNotFound],
|
@@ -374,7 +381,7 @@ describe DirectLink do
|
|
374
381
|
["https://www.flickr.com/photos/130019700@N03/18848891351/in/dateposted-public/", [4621, 3081, "https://live.staticflickr.com/3796/18848891351_f751b35aeb_o.jpg"]], # userid in-public
|
375
382
|
["https://www.flickr.com/photos/frank3/3778768209/in/photolist-6KVb92-eCDTCr-ur8K-7qbL5z-c71afh-c6YvXW-7mHG2L-c71ak9-c71aTq-c71azf-c71aq5-ur8Q-6F6YkR-eCDZsD-eCEakg-eCE6DK-4ymYku-7ubEt-51rUuc-buujQE-ur8x-9fuNu7-6uVeiK-qrmcC6-ur8D-eCEbei-eCDY9P-eCEhCk-eCE5a2-eCH457-eCHrcq-eCEdZ4-eCH6Sd-c71b5o-c71auE-eCHa8m-eCDSbz-eCH1dC-eCEg3v-7JZ4rh-9KwxYL-6KV9yR-9tUSbU-p4UKp7-eCHfwS-6KVbAH-5FrdbP-eeQ39v-eeQ1UR-4jHAGN", [4096, 2723, "https://live.staticflickr.com/2499/3778768209_dfa75a41cc_4k.jpg"]],
|
376
383
|
["https://www.flickr.com/photos/patricksloan/18230541413/sizes/l", [2048, 491, "https://live.staticflickr.com/5572/18230541413_fec4783d79_k.jpg"]],
|
377
|
-
["https://flic.kr/p/vPvCWJ", [
|
384
|
+
["https://flic.kr/p/vPvCWJ", [5120, 3413, "https://live.staticflickr.com/507/19572004110_1bd49c5ebd_5k.jpg"]],
|
378
385
|
] ],
|
379
386
|
[ :wiki, [
|
380
387
|
["https://en.wikipedia.org/wiki/Prostitution_by_country#/media/File:Prostitution_laws_of_the_world.PNG", "https://upload.wikimedia.org/wikipedia/commons/e/e8/Prostitution_laws_of_the_world.PNG"],
|
@@ -393,24 +400,18 @@ describe DirectLink do
|
|
393
400
|
["http://redd.it/988889", [true, "https://i.redd.it/3h5xls6ehrg11.jpg"]],
|
394
401
|
["https://www.reddit.com/r/CatsStandingUp/duplicates/abn0ua/cat/", [true, "https://v.redd.it/s9b86afb6w721/DASH_2_4_M?source=fallback"]],
|
395
402
|
["https://www.reddit.com/r/hangers/comments/97you5/tara_radovic/", [true, "https://i.imgur.com/rbLqgOu.jpg"]], # "crossport" from Imgur
|
396
|
-
|
397
|
-
|
398
|
-
["https://
|
399
|
-
["https://
|
400
|
-
["https://
|
401
|
-
|
402
|
-
["https://vk.com/photo533531776_456239427?all=1", [750, 938, "https://sun9-25.userapi.com/c849416/v849416600/14b949/V01Ch1gYjhc.jpg"]],
|
403
|
-
["https://vk.com/photo-155488973_456242404", [1486, 1000, "https://sun9-7.userapi.com/c852132/v852132877/8578e/m6AJWiskiKE.jpg"]],
|
404
|
-
["https://vk.com/id2272074?z=photo2272074_264578776%2Fphotos2272074", [604, 484, "https://sun9-10.userapi.com/c10472/u2272074/-7/x_407b2ba2.jpg"]],
|
405
|
-
["https://vk.com/feed?section=likes&z=photo-117564754_456261460%2Fliked3902406", [1024, 1335, "https://sun9-72.userapi.com/c854028/v854028353/895b6/izQJresLdf0.jpg"]],
|
406
|
-
["https://vk.com/likizimy?z=photo-42543351_456239941%2Fwall-42543351_1908", [1179, 1731, "https://sun9-47.userapi.com/c855036/v855036571/60f7b/ryCPJIMyMkI.jpg"]],
|
407
|
-
["https://vk.com/e_rod?z=photo298742340_457247118%2Fphotos298742340", [1728, 2160, "https://sun9-53.userapi.com/c858320/v858320596/c7714/oImGe4o1ZJI.jpg"]],
|
403
|
+
["https://www.reddit.com/gallery/i1u6rb", [true, [["image/jpg", 1440, 1440, "https://preview.redd.it/x31msdj6vee51.jpg?width=1440&format=pjpg&auto=webp&s=b79952f8364bb98692d978944347f19e28774d1b"], ["image/jpg", 2441, 2441, "https://preview.redd.it/mwkzq6j6vee51.jpg?width=2441&format=pjpg&auto=webp&s=455e669356550351e6b8768d8009de616c11142a"], ["image/jpg", 1440, 1440, "https://preview.redd.it/0ws1j8j6vee51.jpg?width=1440&format=pjpg&auto=webp&s=061582da8478e7601a7ce7a97fa1663852873726"], ["image/jpg", 1440, 1440, "https://preview.redd.it/2un68aj6vee51.jpg?width=1440&format=pjpg&auto=webp&s=a980f0e5814c2360f5d7a0fb12f391e304942c06"], ["image/jpg", 3024, 3780, "https://preview.redd.it/5bsfaej6vee51.jpg?width=3024&format=pjpg&auto=webp&s=9b96b4b7262eebacc7571a9f0ad902e2034bf990"], ["image/jpg", 1440, 1440, "https://preview.redd.it/0z010ej6vee51.jpg?width=1440&format=pjpg&auto=webp&s=f0c29be6ec98b835a482c7584cca43fd16217bc8"], ["image/jpg", 1440, 1440, "https://preview.redd.it/aylm2ej6vee51.jpg?width=1440&format=pjpg&auto=webp&s=39cf471b14020a1f137bc9bbb294bf5489cab3e7"]]]], # TODO: find smaller gallery
|
404
|
+
["https://www.reddit.com/i1u6rb", [true, [["image/jpg", 1440, 1440, "https://preview.redd.it/x31msdj6vee51.jpg?width=1440&format=pjpg&auto=webp&s=b79952f8364bb98692d978944347f19e28774d1b"], ["image/jpg", 2441, 2441, "https://preview.redd.it/mwkzq6j6vee51.jpg?width=2441&format=pjpg&auto=webp&s=455e669356550351e6b8768d8009de616c11142a"], ["image/jpg", 1440, 1440, "https://preview.redd.it/0ws1j8j6vee51.jpg?width=1440&format=pjpg&auto=webp&s=061582da8478e7601a7ce7a97fa1663852873726"], ["image/jpg", 1440, 1440, "https://preview.redd.it/2un68aj6vee51.jpg?width=1440&format=pjpg&auto=webp&s=a980f0e5814c2360f5d7a0fb12f391e304942c06"], ["image/jpg", 3024, 3780, "https://preview.redd.it/5bsfaej6vee51.jpg?width=3024&format=pjpg&auto=webp&s=9b96b4b7262eebacc7571a9f0ad902e2034bf990"], ["image/jpg", 1440, 1440, "https://preview.redd.it/0z010ej6vee51.jpg?width=1440&format=pjpg&auto=webp&s=f0c29be6ec98b835a482c7584cca43fd16217bc8"], ["image/jpg", 1440, 1440, "https://preview.redd.it/aylm2ej6vee51.jpg?width=1440&format=pjpg&auto=webp&s=39cf471b14020a1f137bc9bbb294bf5489cab3e7"]]]], # TODO: find smaller gallery
|
405
|
+
["https://www.reddit.com/gallery/i3y7pc", [true, "https://www.reddit.com/gallery/i3y7pc"]], # deleted gallery
|
406
|
+
["https://www.reddit.com/ik6c6a", [true, "https://www.reddit.com/r/Firewatch/comments/ik6brf/new_wallpaper_for_my_triple_monitor_setup/"]], # deleted gallery
|
407
|
+
["https://www.reddit.com/kbjdwc", [true, [["image/jpg", 500, 500, "https://preview.redd.it/71t8ljeexo461.jpg?width=500&format=pjpg&auto=webp&s=df211fe0699e3970681ffe493ed1af79725857e8"], ["image/jpg", 720, 446, "https://preview.redd.it/c11nt7hexo461.jpg?width=720&format=pjpg&auto=webp&s=5e34ab0e6d54c0acfdb47f1daaf283087c5ad6a6"], ["image/jpg", 713, 588, "https://preview.redd.it/67mqvllexo461.jpg?width=713&format=pjpg&auto=webp&s=969dfb52bedd6f0055249aa8b7454b23adaa946e"]]]], # failed media
|
408
|
+
# TODO: empty result? https://redd.it/9hhtsq
|
408
409
|
] ],
|
409
410
|
].each do |method, tests|
|
410
|
-
next if method == :vk && ENV.include?("
|
411
|
-
describe method do
|
411
|
+
next if method == :vk && ENV.include?("CI")
|
412
|
+
describe "kinds of links #{method}" do
|
412
413
|
tests.each_with_index do |(input, expectation), i|
|
413
|
-
it "
|
414
|
+
it "##{i + 1}" do
|
414
415
|
if expectation.is_a? Class
|
415
416
|
assert_raises expectation, input do
|
416
417
|
DirectLink.method(method).call input
|
@@ -424,6 +425,37 @@ describe DirectLink do
|
|
424
425
|
end
|
425
426
|
end
|
426
427
|
|
428
|
+
describe "kinds of links vk" do
|
429
|
+
next if ENV.include? "CI"
|
430
|
+
[
|
431
|
+
["https://vk.com/wall-105984091_7806", [960, 1280, "https://userapi.com/impf/c855224/v855224900/a72f1/7OZ8ux9Wcwo.jpg"]],
|
432
|
+
# ["https://vk.com/wall298742340_4715", [1080, 1080, "https://userapi.com/impf/c857136/v857136625/15e38b/CsCqsJD174A.jpg"]], # TODO: it's now 404
|
433
|
+
["https://vk.com/wall-185182611_454?z=photo-185182611_457239340%2Fwall-185182611_454", [1280, 960, "https://userapi.com/impf/c851028/v851028578/1a62f6/VB4SdR1O6Tg.jpg"]],
|
434
|
+
["https://vk.com/wall-105984091_7946?z=photo-105984091_457243312%2Falbum-105984091_00%2Frev", [1280, 875, "https://userapi.com/impf/c852020/v852020134/1b6b36/0IsDFb-Hda4.jpg"]],
|
435
|
+
["https://vk.com/id57030827?z=photo57030827_456241143%2Falbum57030827_0", [1920, 1440, "https://userapi.com/impf/c845322/v845322944/167836/bP9z41BybhI.jpg"]],
|
436
|
+
["https://vk.com/id57030827?z=photo57030827_456241143", [1920, 1440, "https://userapi.com/impf/c845322/v845322944/167836/bP9z41BybhI.jpg"]],
|
437
|
+
["https://vk.com/photo1_215187843?all=1", [2560, 1913, "https://userapi.com/impf/c210/v210001/6/53_VwoACy4I.jpg"]],
|
438
|
+
["https://vk.com/photo298742340_456243948?rev=1", [1583, 1080, "https://userapi.com/impf/c852224/v852224479/321be/9rZaJ2QTdz4.jpg"]],
|
439
|
+
["https://vk.com/photo-155488973_456242404", [1486, 1000, "https://userapi.com/impf/c852132/v852132877/8578e/m6AJWiskiKE.jpg"]],
|
440
|
+
# ["https://vk.com/id2272074?z=photo2272074_264578776%2Fphotos2272074", [604, 484, "https://userapi.com/impf/c10472/u2272074/-7/x_407b2ba2.jpg"]], # TODO: it's now 404
|
441
|
+
["https://vk.com/feed?section=likes&z=photo-117564754_456261460%2Fliked3902406", [1024, 1335, "https://userapi.com/impf/c854028/v854028353/895b6/izQJresLdf0.jpg"]],
|
442
|
+
["https://vk.com/likizimy?z=photo-42543351_456239941%2Fwall-42543351_1908", [1179, 1731, "https://userapi.com/impf/c855036/v855036571/60f7b/ryCPJIMyMkI.jpg"]],
|
443
|
+
["https://vk.com/e_rod?z=photo298742340_457247118%2Fphotos298742340", [1728, 2160, "https://userapi.com/impf/c858320/v858320596/c7714/oImGe4o1ZJI.jpg"]],
|
444
|
+
].each_with_index do |(input, expectation), i|
|
445
|
+
it "##{i + 1}" do
|
446
|
+
result = DirectLink.method(:vk).call input
|
447
|
+
assert_equal 1, result.size
|
448
|
+
result[0][-1].tap do |url|
|
449
|
+
url.replace( URI.parse(url).tap do |_|
|
450
|
+
_.host = _.host.split(?.).drop(1).join(?.)
|
451
|
+
_.query = nil
|
452
|
+
end.to_s )
|
453
|
+
end
|
454
|
+
assert_equal [expectation], result, "#{input} :: #{result.inspect} != #{expectation.inspect}"
|
455
|
+
end
|
456
|
+
end
|
457
|
+
end
|
458
|
+
|
427
459
|
{
|
428
460
|
google: [
|
429
461
|
"https://lh3.googleusercontent.com/-NVJgqmI_2Is/WqMM2OMYg-I/AAAAAAAALrk/5-p3JL3iZt0Ho9dOf_p3gpddzqwr3Wp0ACJoC/w424-h318-n/001",
|
@@ -441,7 +473,7 @@ describe DirectLink do
|
|
441
473
|
["https://goo.gl/ySqUb5", "https://i.imgur.com/QpOBvRY.png"],
|
442
474
|
],
|
443
475
|
_500px: [
|
444
|
-
%w{ https://500px.com/photo/112134597/milky-way-by-tom-hall https://
|
476
|
+
%w{ https://500px.com/photo/112134597/milky-way-by-tom-hall https://500px.com/photo/112134597/milky-way-by-tom-hall },
|
445
477
|
],
|
446
478
|
flickr: [
|
447
479
|
"https://www.flickr.com/photos/59880970@N07/15773941043/in/dateposted-public/",
|
@@ -456,13 +488,13 @@ describe DirectLink do
|
|
456
488
|
["http://redd.it/32tq0i", "https://www.reddit.com/comments/32tq0i"],
|
457
489
|
["https://reddit.com/123456", "https://www.reddit.com/r/funny/comments/123456/im_thinking_about_getting_a_dog_and_youtubed_ways/"],
|
458
490
|
# ["https://www.reddit.com/r/travel/988889", "https://www.reddit.com/r/travel/comments/988889/playa_miramar_in_guaymas_sonora/"],
|
459
|
-
"https://www.reddit.com/r/
|
491
|
+
"https://www.reddit.com/r/PareidoliaGoneWild/comments/hzrlq6/beard_trimmer_on_display_at_best_buy_they_knew/", # NSFW causes redirect to /over_18? if the special cookie not provided
|
460
492
|
],
|
461
493
|
vk: [
|
462
494
|
"https://vk.com/id57030827?z=photo57030827_456241143",
|
463
495
|
],
|
464
496
|
}.each do |method, tests|
|
465
|
-
describe "DirectLink() calls #{method}" do
|
497
|
+
describe "DirectLink() sees domain name and calls #{method}" do
|
466
498
|
tests.each_with_index do |(input, expected), i|
|
467
499
|
it "##{i + 1}" do
|
468
500
|
DirectLink.stub method, ->link{
|
@@ -598,6 +630,16 @@ describe DirectLink do
|
|
598
630
|
)
|
599
631
|
end
|
600
632
|
|
633
|
+
it "throws ErrorNotFound when Reddit gallery is removed" do
|
634
|
+
assert_raises DirectLink::ErrorNotFound do
|
635
|
+
DirectLink "https://www.reddit.com/gallery/i3y7pc"
|
636
|
+
end
|
637
|
+
end
|
638
|
+
|
639
|
+
it "follows Reddit crosspost" do
|
640
|
+
assert_equal %w{ image/png image/png }, DirectLink("https://www.reddit.com/ik6c6a").map(&:type)
|
641
|
+
end
|
642
|
+
|
601
643
|
it "throws ErrorBadLink if link is invalid" do
|
602
644
|
assert_equal "test".inspect, (
|
603
645
|
assert_raises DirectLink::ErrorBadLink do
|
@@ -648,9 +690,9 @@ describe DirectLink do
|
|
648
690
|
|
649
691
|
describe "other domains tests" do
|
650
692
|
[
|
651
|
-
["http://www.aeronautica.difesa.it/organizzazione/REPARTI/divolo/PublishingImages/6%C2%B0%20Stormo/2013-decollo%20al%20tramonto%20REX%201280.jpg", ["http://www.aeronautica.difesa.it/organizzazione/REPARTI/divolo/PublishingImages/6%C2%B0%20Stormo/2013-decollo%20al%20tramonto%20REX%201280.jpg", 1280, 853, :jpeg], nil, 1],
|
693
|
+
# ["http://www.aeronautica.difesa.it/organizzazione/REPARTI/divolo/PublishingImages/6%C2%B0%20Stormo/2013-decollo%20al%20tramonto%20REX%201280.jpg", ["http://www.aeronautica.difesa.it/organizzazione/REPARTI/divolo/PublishingImages/6%C2%B0%20Stormo/2013-decollo%20al%20tramonto%20REX%201280.jpg", 1280, 853, :jpeg], nil, 1], # website is dead?
|
652
694
|
# ["http://minus.com/lkP3hgRJd9npi", SocketError, /nodename nor servname provided, or not known|No address associated with hostname/, 0],
|
653
|
-
["http://www.cutehalloweencostumeideas.org/wp-content/uploads/2017/10/Niagara-Falls_04.jpg", SocketError, /nodename nor servname provided, or not known|Name or service not known/, 0],
|
695
|
+
["http://www.cutehalloweencostumeideas.org/wp-content/uploads/2017/10/Niagara-Falls_04.jpg", SocketError, /nodename nor servname provided, or not known|Name or service not known|getaddrinfo: Name does not resolve/, 0],
|
654
696
|
].each_with_index do |(input, expectation, message_string_or_regex, max_redirect_resolving_retry_delay), i|
|
655
697
|
it "##{i + 1}" do
|
656
698
|
if expectation.is_a? Class
|
@@ -674,37 +716,40 @@ describe DirectLink do
|
|
674
716
|
describe "giving up" do
|
675
717
|
[
|
676
718
|
["http://example.com", FastImage::UnknownImageType],
|
677
|
-
["https://www.tic.com/index.html", FastImage::UnknownImageType, true],
|
678
|
-
["https://www.tic.com/index.html", 2],
|
719
|
+
# ["https://www.tic.com/index.html", FastImage::UnknownImageType, true], # needs new test or stub
|
720
|
+
# ["https://www.tic.com/index.html", 2], # needs new test or stub
|
679
721
|
["http://imgur.com/HQHBBBD", FastImage::UnknownImageType, true],
|
680
722
|
["http://imgur.com/HQHBBBD", "https://i.imgur.com/HQHBBBD.jpg?fb"], # .at_css("meta[@property='og:image']")
|
681
723
|
["https://www.deviantart.com/nadyasonika/art/Asuka-Langley-Beach-Time-590134861", FastImage::UnknownImageType, true],
|
682
|
-
["https://www.deviantart.com/nadyasonika/art/Asuka-Langley-Beach-Time-590134861", "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/943f66cb-78ad-40f2-a086-44420b98b431/d9rcmz1-5cbc5670-0193-485b-ac14-755ddb9562f4.jpg/v1/fill/w_1024,h_732,q_75,strp/asuka_langley_beach_time_by_nadyasonika_d9rcmz1-fullview.jpg?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.
|
724
|
+
["https://www.deviantart.com/nadyasonika/art/Asuka-Langley-Beach-Time-590134861", "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/943f66cb-78ad-40f2-a086-44420b98b431/d9rcmz1-5cbc5670-0193-485b-ac14-755ddb9562f4.jpg/v1/fill/w_1024,h_732,q_75,strp/asuka_langley_beach_time_by_nadyasonika_d9rcmz1-fullview.jpg?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOiIsImlzcyI6InVybjphcHA6Iiwib2JqIjpbW3siaGVpZ2h0IjoiPD03MzIiLCJwYXRoIjoiXC9mXC85NDNmNjZjYi03OGFkLTQwZjItYTA4Ni00NDQyMGI5OGI0MzFcL2Q5cmNtejEtNWNiYzU2NzAtMDE5My00ODViLWFjMTQtNzU1ZGRiOTU2MmY0LmpwZyIsIndpZHRoIjoiPD0xMDI0In1dXSwiYXVkIjpbInVybjpzZXJ2aWNlOmltYWdlLm9wZXJhdGlvbnMiXX0.L6OhXuQZ_9ovKOdjjuQbvxpD0mG8M_KiqV4ljEDfW3Q"],
|
683
725
|
["https://calgary.skyrisecities.com/news/2019/11/blue-morning-light", "https://cdn.skyrisecities.com/sites/default/files/images/articles/2019/11/39834/39834-132071.jpg"], # og:image without scheme
|
726
|
+
["https://www.reddit.com/r/darksouls3/comments/e59djh/hand_it_over_that_thing_your_wallpaper/", DirectLink::ErrorBadLink, true],
|
727
|
+
["https://www.reddit.com/r/darksouls3/comments/e59djh/hand_it_over_that_thing_your_wallpaper/", 6],
|
684
728
|
].each_with_index do |(input, expectation, giveup), i|
|
685
729
|
it "##{i + 1} (#{URI(input).host}) (giveup=#{!!giveup})" do # to match with minitest `-n` run flag
|
686
|
-
ti = ENV.delete "IMGUR_CLIENT_ID"
|
687
|
-
tr = ENV.delete "REDDIT_SECRETS"
|
730
|
+
ti = ENV.delete "IMGUR_CLIENT_ID" if %w{ imgur com } == URI(input).host.split(?.).last(2)
|
731
|
+
tr = ENV.delete "REDDIT_SECRETS" if %w{ reddit com } == URI(input).host.split(?.).last(2)
|
688
732
|
begin
|
689
733
|
case expectation
|
690
734
|
when Class
|
691
735
|
e = assert_raises expectation, "for #{input} (giveup = #{giveup})" do
|
692
|
-
DirectLink input,
|
736
|
+
DirectLink input, 5, *ENV["PROXY"], giveup: giveup
|
693
737
|
end
|
694
738
|
assert_equal expectation.to_s, e.class.to_s, "for #{input} (giveup = #{giveup})"
|
695
739
|
when String
|
696
|
-
result = DirectLink input,
|
740
|
+
result = DirectLink input, 5, *ENV["PROXY"], giveup: giveup
|
697
741
|
assert_equal expectation, result.url, "for #{input} (giveup = #{giveup})"
|
698
742
|
else
|
699
|
-
result = DirectLink input,
|
743
|
+
result = DirectLink input, 5, *ENV["PROXY"], giveup: giveup
|
700
744
|
result = [result] unless result.is_a? Array # we can't do `Array(<Struct>)` because it splats by elements
|
701
745
|
assert_equal expectation, result.size, ->{
|
702
746
|
"for #{input} (giveup = #{giveup}): #{result.map &:url}"
|
703
747
|
}
|
704
748
|
end
|
749
|
+
# weird that this test may take longer than 5 sec
|
705
750
|
ensure
|
706
|
-
ENV["IMGUR_CLIENT_ID"] = ti
|
707
|
-
ENV["REDDIT_SECRETS"] = tr
|
751
|
+
ENV["IMGUR_CLIENT_ID"] = ti if ti
|
752
|
+
ENV["REDDIT_SECRETS"] = tr if tr
|
708
753
|
end
|
709
754
|
end
|
710
755
|
end
|
@@ -744,21 +789,28 @@ describe DirectLink do
|
|
744
789
|
|
745
790
|
describe "fails" do
|
746
791
|
[
|
747
|
-
[1, "http://example.com/",
|
748
|
-
[1, "http://example.com/404",
|
792
|
+
[1, "http://example.com/", /\AFastImage::UnknownImageType\n\z/],
|
793
|
+
[1, "http://example.com/404", /\ANetHTTPUtils::Error: HTTP error #404 \n\z/],
|
749
794
|
|
750
795
|
# TODO: a test when the giveup=false fails and reraises the DirectLink::ErrorMissingEnvVar
|
751
796
|
# maybe put it to ./lib tests
|
752
797
|
|
753
798
|
# by design it should be impossible to write a test for DirectLink::ErrorAssert
|
754
|
-
[1, "https://flic.kr/p/DirectLinkErrorNotFound",
|
799
|
+
[1, "https://flic.kr/p/DirectLinkErrorNotFound", /\ANetHTTPUtils::Error: HTTP error #404 \n\z/],
|
755
800
|
|
756
|
-
[1, "https://imgur.com/a/badlinkpattern",
|
801
|
+
[1, "https://imgur.com/a/badlinkpattern", /\ANetHTTPUtils::Error: HTTP error #404 \n\z/],
|
757
802
|
# TODO: a test that it appends the `exception.cause`
|
803
|
+
|
804
|
+
[1, "https://groundingpositivity.com/2020/08/13/new-quantum-app-will-make-you-wonder-do-we-live-in-a-simulation/", (
|
805
|
+
Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.4.0") ?
|
806
|
+
/\ANetHTTPUtils::EOFError_from_rbuf_fill: probably the old Ruby empty backtrace EOFError exception from net\/protocol\.rb: end of file reached\n\z/ :
|
807
|
+
/\A\S+\/net\/protocol\.rb:\d+:in `rbuf_fill': end of file reached \(EOFError\)\n/
|
808
|
+
) ], # TODO: add also a test to nethttputils gem
|
758
809
|
].each_with_index do |(expected_exit_code, link, expected_output, unset), i| # TODO: unset is not used anymore or I have to go sleep?
|
759
810
|
it "##{i + 1}" do
|
760
811
|
string, status = Open3.capture2e "export #{(File.read("api_tokens_for_travis.sh") + File.read("vk.secret")).scan(/(?<=^export )\S+=\S+/).join(" ")}#{unset} && RUBYOPT='-rbundler/setup #{$-I.map{ |i| "-I #{i}" }.join " "}' ./bin/directlink #{link}"
|
761
|
-
assert_equal
|
812
|
+
assert_equal expected_exit_code, status.exitstatus, "for #{link}"
|
813
|
+
assert string[expected_output], "for #{link}"
|
762
814
|
end
|
763
815
|
end
|
764
816
|
end
|
@@ -813,12 +865,13 @@ describe DirectLink do
|
|
813
865
|
# TODO: test about --json
|
814
866
|
it "uses <meta> tag" do
|
815
867
|
string, status = Open3.capture2e "RUBYOPT='-rbundler/setup' ./bin/directlink --json https://www.kp.ru/daily/26342.7/3222103/"
|
816
|
-
assert_equal [0, "https://
|
817
|
-
end
|
818
|
-
it "ignores <meta> tag" do
|
819
|
-
string, status = Open3.capture2e "RUBYOPT='-rbundler/setup' ./bin/directlink --json --ignore-meta https://www.kp.ru/daily/26342.7/3222103/"
|
820
|
-
assert_equal [0, 21, "https://s11.stc.all.kpcdn.net/share/i/12/8024261/wx1080.jpg"], [status.exitstatus, JSON.load(string).size, JSON.load(string).first.fetch("url")]
|
868
|
+
assert_equal [0, "https://s11.stc.all.kpcdn.net/share/i/12/8054352/cr-1200-630.wm-asnplfru-100-tr-0-0.t-13-3222103-ttps-54-14-0083CD-1010-l-85-b-42.t-13-3222103-ttps-54-14-FFF-1010-l-85-b-42.t-207-5-asb-37-10-FFF-788-l-370-t-68.m2018-03-14T02-10-20.jpg"], [status.exitstatus, JSON.load(string).fetch("url")]
|
821
869
|
end
|
870
|
+
# TODO: kp.ru broke the page -- images are gone
|
871
|
+
# it "ignores <meta> tag" do
|
872
|
+
# string, status = Open3.capture2e "RUBYOPT='-rbundler/setup' ./bin/directlink --json --ignore-meta https://www.kp.ru/daily/26342.7/3222103/"
|
873
|
+
# assert_equal [0, 21, "https://s11.stc.all.kpcdn.net/share/i/12/8024261/inx960x640.jpg"], [status.exitstatus, JSON.load(string).size, JSON.load(string).first.fetch("url")]
|
874
|
+
# end
|
822
875
|
|
823
876
|
end
|
824
877
|
|