directlink 0.0.9.2 → 0.0.11.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/directlink +4 -0
- data/directlink.gemspec +11 -8
- data/lib/directlink.rb +35 -24
- data/unit.test.rb +1553 -0
- metadata +24 -24
- data/test.rb +0 -878
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d67d884598ed66d3f4b2c09e278a7add58986e5
|
4
|
+
data.tar.gz: bec90a37e04dcc70c1e47c4b5906990603f3e0fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 71f30a22295f7a04f3da474d3f1f42489e084de1b889e10ee3d10ab0cf2ab85d9e57c93067cf4225bf40751e6391f9cabe6ab1f42d77ebd1e98b88a6e16ed6c5
|
7
|
+
data.tar.gz: 27e321100578f58934237bd6aa0e63617b3dfa3fd618c23ee4d46e4b200b6c625c6c0a93a3839d8cdd585cb0f7eb8cfa34e160573301a0847c23ee39cb03fa70
|
data/bin/directlink
CHANGED
@@ -73,4 +73,8 @@ rescue *DirectLink::NORMAL_EXCEPTIONS => e
|
|
73
73
|
cause = e.cause if e.cause if e.respond_to? :cause
|
74
74
|
c = e.class.to_s
|
75
75
|
abort "#{c}#{": #{e}" if c != e.to_s}#{": #{cause}" if cause && c != cause.to_s}"
|
76
|
+
rescue
|
77
|
+
raise unless $!.cause
|
78
|
+
raise $!.cause unless $!.cause.cause
|
79
|
+
raise $!.cause.cause
|
76
80
|
end
|
data/directlink.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "directlink"
|
3
|
-
spec.version = "0.0.
|
3
|
+
spec.version = "0.0.11.2"
|
4
4
|
spec.summary = "obtains from any kind of hyperlink a link to an image, its format and resolution"
|
5
5
|
|
6
6
|
spec.author = "Victor Maslov aka Nakilon"
|
@@ -9,20 +9,23 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.homepage = "https://github.com/nakilon/directlink"
|
10
10
|
spec.metadata = {"source_code_uri" => "https://github.com/nakilon/directlink"}
|
11
11
|
|
12
|
-
spec.
|
13
|
-
|
14
|
-
spec.add_dependency "
|
15
|
-
spec.add_dependency "
|
12
|
+
spec.required_ruby_version = ">=2.3" # because <<~ heredocs in tests
|
13
|
+
|
14
|
+
spec.add_dependency "fastimage", "~>2.2.0"
|
15
|
+
spec.add_dependency "nokogiri", "<1.11" # 1.11 requires ruby 2.5 # TODO: switch to Oga?
|
16
|
+
# spec.add_dependency "nethttputils", "~>0.4.2.0"
|
17
|
+
spec.add_dependency "reddit_bot", "~>1.10.0"
|
16
18
|
spec.add_dependency "kramdown"
|
17
19
|
spec.add_dependency "addressable"
|
18
|
-
spec.add_development_dependency "minitest"
|
20
|
+
spec.add_development_dependency "minitest-around"
|
21
|
+
spec.add_development_dependency "webmock"
|
19
22
|
|
20
23
|
spec.require_path = "lib"
|
21
24
|
spec.bindir = "bin"
|
22
25
|
spec.executable = "directlink"
|
23
|
-
spec.test_file = "test.rb"
|
26
|
+
spec.test_file = "unit.test.rb"
|
24
27
|
spec.files = %w{ LICENSE directlink.gemspec lib/directlink.rb bin/directlink }
|
25
28
|
|
26
29
|
spec.requirements << "you may want to create apps and provide API tokens:"
|
27
|
-
spec.requirements << "IMGUR_CLIENT_ID, FLICKR_API_KEY, REDDIT_SECRETS"
|
30
|
+
spec.requirements << "IMGUR_CLIENT_ID, FLICKR_API_KEY, REDDIT_SECRETS, VK_ACCESS_TOKEN, VK_CLIENT_SECRET"
|
28
31
|
end
|
data/lib/directlink.rb
CHANGED
@@ -11,7 +11,7 @@ module DirectLink
|
|
11
11
|
puts str unless Module.nesting.first.silent
|
12
12
|
end
|
13
13
|
|
14
|
-
class ErrorAssert < RuntimeError
|
14
|
+
class ErrorAssert < RuntimeError # gem user should not face this error
|
15
15
|
def initialize msg
|
16
16
|
super "#{msg} -- consider reporting this issue to GitHub"
|
17
17
|
end
|
@@ -40,13 +40,15 @@ module DirectLink
|
|
40
40
|
SocketError,
|
41
41
|
Net::OpenTimeout,
|
42
42
|
Errno::ECONNRESET,
|
43
|
+
Errno::ECONNREFUSED,
|
44
|
+
Errno::ETIMEDOUT, # from FastImage
|
43
45
|
NetHTTPUtils::Error,
|
44
46
|
NetHTTPUtils::EOFError_from_rbuf_fill,
|
45
47
|
FastImage::UnknownImageType,
|
46
48
|
FastImage::ImageFetchFailure,
|
47
49
|
DirectLink::ErrorNotFound,
|
48
50
|
DirectLink::ErrorBadLink,
|
49
|
-
] #
|
51
|
+
] # all known exceptions that can be raised while using Directlink but not as its fault
|
50
52
|
|
51
53
|
|
52
54
|
def self.google src, width = 0
|
@@ -57,7 +59,7 @@ module DirectLink
|
|
57
59
|
"#{$1}s#{width}/"
|
58
60
|
when /\A(\/\/lh3\.googleusercontent\.com\/proxy\/[a-zA-Z0-9_-]{66,523}=)(?:w(?:[45]\d\d)-h\d\d\d-[np]|s530-p|s110-p-k)\z/
|
59
61
|
"https:#{$1}s#{width}/"
|
60
|
-
when /\A(\/\/lh3\.googleusercontent\.com\/
|
62
|
+
when /\A(\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9]{24}_[a-zA-Z]{30}7zGIDTJfkc1YZFX2MhgKnjA=)w530-h398-p\z/
|
61
63
|
"https:#{$1}s#{width}/"
|
62
64
|
when /\A(\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9-]{11}\/[VW][a-zA-Z0-9_-]{9}I\/AAAAAAA[AC][a-zA-Z0-9]{3}\/[a-zA-Z0-9_-]{32}[gwAQ]CJoC\/)w530-h[23]\d\d-p\/[^\/]+\z/,
|
63
65
|
/\A(?:https?:)?(\/\/[1-4]\.bp\.blogspot\.com\/-[a-zA-Z0-9_-]{11}\/[UVWX][a-zA-Z0-9_-]{9}I\/AAAAAAAA[A-Z][a-zA-Z0-9_-]{2}\/[a-zA-Z0-9_-]{33}C(?:EwYBhgL|(?:Lc|Kg)BGAs(?:YHQ)?)\/)(?:s640|w\d{2,4}-h\d\d\d?-p(?:-k-no-nu)?)\/[^\/]+\z/,
|
@@ -78,20 +80,27 @@ module DirectLink
|
|
78
80
|
/\A(https:\/\/lh[356]\.googleusercontent\.com\/-[a-zA-Z0-9]{11}\/AAAAAAAAAAI\/AAAAAAAAAAA\/[a-zA-Z0-9_]{34}\/)s(?:46|64)-c(?:-k(?:-no)?)?-mo\/photo\.jpg\z/
|
79
81
|
"#{$1}s#{width}/"
|
80
82
|
# Google Keep
|
81
|
-
when /\A(https:\/\/lh\d\.googleusercontent\.com\/[a-zA-Z0-9_-]{104,106}=s)\d\d\d\d?\
|
83
|
+
when /\A(https:\/\/lh\d\.googleusercontent\.com\/[a-zA-Z0-9_-]{104,106}=s)\d\d\d\d?\z/
|
82
84
|
"#{$1}#{width}"
|
85
|
+
# opensea
|
86
|
+
when /\A(https:\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9]{78}-nGx_jf_XGqqiVANe_Jr8u2g=)w1400-k\z/
|
87
|
+
"#{$1}s#{width}"
|
83
88
|
# mp4
|
84
89
|
when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z]{11}\/W[a-zA-Z0-9]{9}I\/AAAAAAAAODw\/[a-zA-Z0-9]{32}QCJoC\/)w530-h883-n-k-no\/[^\/]+\.mp4\z/
|
85
90
|
"#{$1}s#{width}/"
|
91
|
+
# something else
|
92
|
+
when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9_]{11}\/X-[a-zA-Z0-9]{8}I\/AAAAAAAAALE\/[a-zA-Z0-9]{23}_[a-zA-Z0-9]{19}\/)w1200-h630-p-k-no-nu\/[\d-]+\.png\z/
|
93
|
+
"#{$1}s#{width}/"
|
86
94
|
else
|
87
95
|
raise ErrorBadLink.new src
|
88
96
|
end
|
89
97
|
end
|
90
98
|
|
99
|
+
|
91
100
|
require "json"
|
92
101
|
|
93
102
|
# TODO make the timeout handling respect the way the Directlink method works with timeouts
|
94
|
-
def self.imgur link, timeout =
|
103
|
+
def self.imgur link, timeout = 1000
|
95
104
|
raise ErrorMissingEnvVar.new "define IMGUR_CLIENT_ID env var" unless ENV["IMGUR_CLIENT_ID"]
|
96
105
|
|
97
106
|
request_data = lambda do |url|
|
@@ -100,7 +109,7 @@ module DirectLink
|
|
100
109
|
NetHTTPUtils.request_data url, header: { Authorization: "Client-ID #{ENV["IMGUR_CLIENT_ID"]}" }
|
101
110
|
rescue NetHTTPUtils::Error => e
|
102
111
|
raise ErrorNotFound.new url.inspect if 404 == e.code
|
103
|
-
if t < timeout && [400, 500, 503].include?(e.code)
|
112
|
+
if t < timeout && [400, 500, 502, 503].include?(e.code)
|
104
113
|
logger.error "retrying in #{t} seconds because of Imgur HTTP ERROR #{e.code}"
|
105
114
|
sleep t
|
106
115
|
t *= 2
|
@@ -151,7 +160,6 @@ module DirectLink
|
|
151
160
|
def self._500px link
|
152
161
|
raise ErrorBadLink.new link unless %r{\Ahttps://500px\.com/photo/(?<id>[^/]+)/[-[a-zA-Z0-9]%]+\/?\z} =~ link
|
153
162
|
require "nokogiri"
|
154
|
-
resp = NetHTTPUtils.request_data link
|
155
163
|
f = lambda do |form|
|
156
164
|
JSON.load(NetHTTPUtils.request_data "https://api.500px.com/v1/photos", form: form).fetch("photos").values.first
|
157
165
|
end
|
@@ -240,13 +248,15 @@ module DirectLink
|
|
240
248
|
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" unless data["media"].keys.sort == %w{ oembed type } && %w{ youtube.com gfycat.com imgur.com }.include?(data["media"]["type"])
|
241
249
|
return [true, data["media"]["oembed"]["thumbnail_url"]]
|
242
250
|
end
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
251
|
+
if data["media_metadata"]
|
252
|
+
return [true, data["media_metadata"].values.map do |media|
|
253
|
+
next if media == {"status"=>"failed"} || media == {"status"=>"unprocessed"}
|
254
|
+
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated (media == #{media.inspect})" unless media["status"] == "valid"
|
255
|
+
[media["m"], *media["s"].values_at("x", "y"), CGI.unescapeHTML(media["s"][media["m"]=="image/gif" ? "gif" : "u"])]
|
256
|
+
end.compact]
|
257
|
+
end
|
248
258
|
return [true, "#{"https://www.reddit.com" if /\A\/r\/[0-9a-zA-Z_]+\/comments\/[0-9a-z]{5,6}\// =~ data["url"]}#{data["url"]}"] if data["crosspost_parent"]
|
249
|
-
return [true, data["url"]] unless data["is_self"]
|
259
|
+
return [true, CGI.unescapeHTML(data["url"])] unless data["is_self"]
|
250
260
|
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" if data["url"] != "https://www.reddit.com" + data["permalink"]
|
251
261
|
return [false, data["selftext"]]
|
252
262
|
end
|
@@ -255,8 +265,8 @@ module DirectLink
|
|
255
265
|
id, mtd, field, f = case link
|
256
266
|
when %r{\Ahttps://vk\.com/id(?<user_id>\d+)\?z=photo(?<id>\k<user_id>_\d+)(%2F(album\k<user_id>_0|photos\k<user_id>))?\z},
|
257
267
|
%r{\Ahttps://vk\.com/[a-z_]+\?z=photo(?<_>)(?<id>(?<user_id>\d+)_\d+)%2Fphotos\k<user_id>\z},
|
258
|
-
%r{\Ahttps://vk\.com/photo(?<_>)(?<id>-?\d+_\d+)(\?(
|
259
|
-
%r{\Ahttps://vk\.com/feed\?section=likes&z=photo(?<_>)(?<id
|
268
|
+
%r{\Ahttps://vk\.com/photo(?<_>)(?<id>-?\d+_\d+)(\?(all|rev)=1)?\z},
|
269
|
+
%r{\Ahttps://vk\.com/feed\?(?:section=likes&)?z=photo(?<_>)(?<id>(?<user_id>-?\d+)_\d+)%2F(liked\d+|album\k<user_id>_0(0%2Frev)?)\z},
|
260
270
|
%r{\Ahttps://vk\.com/[a-z_]+\?z=photo(?<_>)(?<id>(?<user_id>-\d+)_\d+)%2F(wall\k<user_id>_\d+|album\k<user_id>_0)\z},
|
261
271
|
%r{\Ahttps://vk\.com/wall(?<user_id>-\d+)_\d+\?z=photo(?<id>\k<user_id>_\d+)%2F(wall\k<user_id>_\d+|album\k<user_id>_00%2Frev|\d+)\z}
|
262
272
|
[$2, :photos, :photos, lambda do |t|
|
@@ -264,7 +274,7 @@ module DirectLink
|
|
264
274
|
t
|
265
275
|
end ]
|
266
276
|
when %r{\Ahttps://vk\.com/wall(?<id>-?\d+_\d+)\z},
|
267
|
-
%r{\Ahttps://vk\.com/[a-z\.]+\?w=wall(?<id
|
277
|
+
%r{\Ahttps://vk\.com/[a-z\.]+\?w=wall(?<id>-?\d+_\d+)\z}
|
268
278
|
[$1, :wall, :posts, lambda do |t|
|
269
279
|
t.first.fetch("attachments").select do |item|
|
270
280
|
case item.keys
|
@@ -282,14 +292,14 @@ module DirectLink
|
|
282
292
|
raise ErrorBadLink.new link
|
283
293
|
end
|
284
294
|
raise ErrorMissingEnvVar.new "define VK_ACCESS_TOKEN and VK_CLIENT_SECRET env vars" unless ENV["VK_ACCESS_TOKEN"] && ENV["VK_CLIENT_SECRET"]
|
285
|
-
sleep 0.25 # "error_msg"=>"Too many requests per second"
|
295
|
+
sleep 0.25 unless ENV["CI"] # "error_msg"=>"Too many requests per second"
|
286
296
|
f.call( JSON.load( NetHTTPUtils.request_data "https://api.vk.com/method/#{mtd}.getById",
|
287
297
|
:POST, form: { field => id, :access_token => ENV["VK_ACCESS_TOKEN"], :client_secret => ENV["VK_CLIENT_SECRET"], :v => "5.101" }
|
288
298
|
).fetch("response") ).map do |photos|
|
289
299
|
photos.fetch("sizes").map do |size|
|
290
300
|
size.values_at("width", "height", "url").tap do |whu|
|
291
301
|
w, h, u = whu
|
292
|
-
whu[0, 2] = FastImage.new(u, raise_on_failure: true).size if [w, h].include? 0
|
302
|
+
whu[0, 2] = FastImage.new(u, raise_on_failure: true).size if [w, h].include? 0 # wtf?
|
293
303
|
end
|
294
304
|
end.max_by{ |w, h, u| w * h }
|
295
305
|
end
|
@@ -341,12 +351,13 @@ def DirectLink link, timeout = nil, proxy = nil, giveup: false, ignore_meta: fal
|
|
341
351
|
head = NetHTTPUtils.request_data link, :HEAD, header: header, **(proxy ? {proxy: proxy} : {}), **(timeout ? {
|
342
352
|
timeout: timeout,
|
343
353
|
max_start_http_retry_delay: timeout,
|
344
|
-
max_read_retry_delay: timeout
|
354
|
+
max_read_retry_delay: timeout,
|
345
355
|
} : {})
|
346
|
-
rescue Net::ReadTimeout
|
356
|
+
rescue Net::ReadTimeout, Errno::ETIMEDOUT
|
347
357
|
rescue NetHTTPUtils::Error => e
|
348
358
|
raise unless 418 == e.code
|
349
359
|
else
|
360
|
+
raise DirectLink::ErrorAssert.new "last_response.uri is not set" unless head.instance_variable_get(:@last_response).uri
|
350
361
|
link = head.instance_variable_get(:@last_response).uri.to_s
|
351
362
|
end
|
352
363
|
|
@@ -354,10 +365,10 @@ def DirectLink link, timeout = nil, proxy = nil, giveup: false, ignore_meta: fal
|
|
354
365
|
# because they can be hidden behind URL shorteners
|
355
366
|
# also it can resolve NetHTTPUtils::Error(404) before trying the adapter
|
356
367
|
|
357
|
-
t = google_without_schema_crutch[] and return t
|
368
|
+
t = google_without_schema_crutch[] and return t # TODO: why again?
|
358
369
|
|
359
370
|
begin
|
360
|
-
imgur = DirectLink.imgur(link).sort_by{ |u, w, h, t| - w * h }.map do |u, w, h, t|
|
371
|
+
imgur = DirectLink.imgur(link, timeout).sort_by{ |u, w, h, t| - w * h }.map do |u, w, h, t|
|
361
372
|
struct.new u, w, h, t
|
362
373
|
end
|
363
374
|
# `DirectLink.imgur` return value is always an Array
|
@@ -432,11 +443,11 @@ def DirectLink link, timeout = nil, proxy = nil, giveup: false, ignore_meta: fal
|
|
432
443
|
timeout: timeout, # NetHTTPUtild passes this as read_timeout to Net::HTTP.start
|
433
444
|
max_read_retry_delay: timeout # and then compares accumulated delay to this
|
434
445
|
# if we use :get here we will download megabytes of files just to giveup on content_type we can't process
|
435
|
-
case head.instance_variable_get(:@last_response).content_type
|
446
|
+
case head.instance_variable_get(:@last_response).content_type # webmock should provide this
|
436
447
|
when "text/html" ; nil
|
437
448
|
else ; raise
|
438
449
|
end
|
439
|
-
html = Nokogiri::HTML NetHTTPUtils.request_data link, header: {"User-Agent" => "Mozilla"}
|
450
|
+
html = Nokogiri::HTML NetHTTPUtils.request_data link, :GET, header: {"User-Agent" => "Mozilla"}
|
440
451
|
if t = html.at_css("meta[@property='og:image']")
|
441
452
|
begin
|
442
453
|
return DirectLink URI.join(link, t[:content]).to_s, nil, *proxy, giveup: true
|