directlink 0.0.9.2 → 0.0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/directlink +4 -0
- data/directlink.gemspec +11 -8
- data/lib/directlink.rb +35 -24
- data/unit.test.rb +1553 -0
- metadata +24 -24
- data/test.rb +0 -878
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d67d884598ed66d3f4b2c09e278a7add58986e5
|
4
|
+
data.tar.gz: bec90a37e04dcc70c1e47c4b5906990603f3e0fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 71f30a22295f7a04f3da474d3f1f42489e084de1b889e10ee3d10ab0cf2ab85d9e57c93067cf4225bf40751e6391f9cabe6ab1f42d77ebd1e98b88a6e16ed6c5
|
7
|
+
data.tar.gz: 27e321100578f58934237bd6aa0e63617b3dfa3fd618c23ee4d46e4b200b6c625c6c0a93a3839d8cdd585cb0f7eb8cfa34e160573301a0847c23ee39cb03fa70
|
data/bin/directlink
CHANGED
@@ -73,4 +73,8 @@ rescue *DirectLink::NORMAL_EXCEPTIONS => e
|
|
73
73
|
cause = e.cause if e.cause if e.respond_to? :cause
|
74
74
|
c = e.class.to_s
|
75
75
|
abort "#{c}#{": #{e}" if c != e.to_s}#{": #{cause}" if cause && c != cause.to_s}"
|
76
|
+
rescue
|
77
|
+
raise unless $!.cause
|
78
|
+
raise $!.cause unless $!.cause.cause
|
79
|
+
raise $!.cause.cause
|
76
80
|
end
|
data/directlink.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "directlink"
|
3
|
-
spec.version = "0.0.
|
3
|
+
spec.version = "0.0.11.2"
|
4
4
|
spec.summary = "obtains from any kind of hyperlink a link to an image, its format and resolution"
|
5
5
|
|
6
6
|
spec.author = "Victor Maslov aka Nakilon"
|
@@ -9,20 +9,23 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.homepage = "https://github.com/nakilon/directlink"
|
10
10
|
spec.metadata = {"source_code_uri" => "https://github.com/nakilon/directlink"}
|
11
11
|
|
12
|
-
spec.
|
13
|
-
|
14
|
-
spec.add_dependency "
|
15
|
-
spec.add_dependency "
|
12
|
+
spec.required_ruby_version = ">=2.3" # because <<~ heredocs in tests
|
13
|
+
|
14
|
+
spec.add_dependency "fastimage", "~>2.2.0"
|
15
|
+
spec.add_dependency "nokogiri", "<1.11" # 1.11 requires ruby 2.5 # TODO: switch to Oga?
|
16
|
+
# spec.add_dependency "nethttputils", "~>0.4.2.0"
|
17
|
+
spec.add_dependency "reddit_bot", "~>1.10.0"
|
16
18
|
spec.add_dependency "kramdown"
|
17
19
|
spec.add_dependency "addressable"
|
18
|
-
spec.add_development_dependency "minitest"
|
20
|
+
spec.add_development_dependency "minitest-around"
|
21
|
+
spec.add_development_dependency "webmock"
|
19
22
|
|
20
23
|
spec.require_path = "lib"
|
21
24
|
spec.bindir = "bin"
|
22
25
|
spec.executable = "directlink"
|
23
|
-
spec.test_file = "test.rb"
|
26
|
+
spec.test_file = "unit.test.rb"
|
24
27
|
spec.files = %w{ LICENSE directlink.gemspec lib/directlink.rb bin/directlink }
|
25
28
|
|
26
29
|
spec.requirements << "you may want to create apps and provide API tokens:"
|
27
|
-
spec.requirements << "IMGUR_CLIENT_ID, FLICKR_API_KEY, REDDIT_SECRETS"
|
30
|
+
spec.requirements << "IMGUR_CLIENT_ID, FLICKR_API_KEY, REDDIT_SECRETS, VK_ACCESS_TOKEN, VK_CLIENT_SECRET"
|
28
31
|
end
|
data/lib/directlink.rb
CHANGED
@@ -11,7 +11,7 @@ module DirectLink
|
|
11
11
|
puts str unless Module.nesting.first.silent
|
12
12
|
end
|
13
13
|
|
14
|
-
class ErrorAssert < RuntimeError
|
14
|
+
class ErrorAssert < RuntimeError # gem user should not face this error
|
15
15
|
def initialize msg
|
16
16
|
super "#{msg} -- consider reporting this issue to GitHub"
|
17
17
|
end
|
@@ -40,13 +40,15 @@ module DirectLink
|
|
40
40
|
SocketError,
|
41
41
|
Net::OpenTimeout,
|
42
42
|
Errno::ECONNRESET,
|
43
|
+
Errno::ECONNREFUSED,
|
44
|
+
Errno::ETIMEDOUT, # from FastImage
|
43
45
|
NetHTTPUtils::Error,
|
44
46
|
NetHTTPUtils::EOFError_from_rbuf_fill,
|
45
47
|
FastImage::UnknownImageType,
|
46
48
|
FastImage::ImageFetchFailure,
|
47
49
|
DirectLink::ErrorNotFound,
|
48
50
|
DirectLink::ErrorBadLink,
|
49
|
-
] #
|
51
|
+
] # all known exceptions that can be raised while using Directlink but not as its fault
|
50
52
|
|
51
53
|
|
52
54
|
def self.google src, width = 0
|
@@ -57,7 +59,7 @@ module DirectLink
|
|
57
59
|
"#{$1}s#{width}/"
|
58
60
|
when /\A(\/\/lh3\.googleusercontent\.com\/proxy\/[a-zA-Z0-9_-]{66,523}=)(?:w(?:[45]\d\d)-h\d\d\d-[np]|s530-p|s110-p-k)\z/
|
59
61
|
"https:#{$1}s#{width}/"
|
60
|
-
when /\A(\/\/lh3\.googleusercontent\.com\/
|
62
|
+
when /\A(\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9]{24}_[a-zA-Z]{30}7zGIDTJfkc1YZFX2MhgKnjA=)w530-h398-p\z/
|
61
63
|
"https:#{$1}s#{width}/"
|
62
64
|
when /\A(\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9-]{11}\/[VW][a-zA-Z0-9_-]{9}I\/AAAAAAA[AC][a-zA-Z0-9]{3}\/[a-zA-Z0-9_-]{32}[gwAQ]CJoC\/)w530-h[23]\d\d-p\/[^\/]+\z/,
|
63
65
|
/\A(?:https?:)?(\/\/[1-4]\.bp\.blogspot\.com\/-[a-zA-Z0-9_-]{11}\/[UVWX][a-zA-Z0-9_-]{9}I\/AAAAAAAA[A-Z][a-zA-Z0-9_-]{2}\/[a-zA-Z0-9_-]{33}C(?:EwYBhgL|(?:Lc|Kg)BGAs(?:YHQ)?)\/)(?:s640|w\d{2,4}-h\d\d\d?-p(?:-k-no-nu)?)\/[^\/]+\z/,
|
@@ -78,20 +80,27 @@ module DirectLink
|
|
78
80
|
/\A(https:\/\/lh[356]\.googleusercontent\.com\/-[a-zA-Z0-9]{11}\/AAAAAAAAAAI\/AAAAAAAAAAA\/[a-zA-Z0-9_]{34}\/)s(?:46|64)-c(?:-k(?:-no)?)?-mo\/photo\.jpg\z/
|
79
81
|
"#{$1}s#{width}/"
|
80
82
|
# Google Keep
|
81
|
-
when /\A(https:\/\/lh\d\.googleusercontent\.com\/[a-zA-Z0-9_-]{104,106}=s)\d\d\d\d?\
|
83
|
+
when /\A(https:\/\/lh\d\.googleusercontent\.com\/[a-zA-Z0-9_-]{104,106}=s)\d\d\d\d?\z/
|
82
84
|
"#{$1}#{width}"
|
85
|
+
# opensea
|
86
|
+
when /\A(https:\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9]{78}-nGx_jf_XGqqiVANe_Jr8u2g=)w1400-k\z/
|
87
|
+
"#{$1}s#{width}"
|
83
88
|
# mp4
|
84
89
|
when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z]{11}\/W[a-zA-Z0-9]{9}I\/AAAAAAAAODw\/[a-zA-Z0-9]{32}QCJoC\/)w530-h883-n-k-no\/[^\/]+\.mp4\z/
|
85
90
|
"#{$1}s#{width}/"
|
91
|
+
# something else
|
92
|
+
when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9_]{11}\/X-[a-zA-Z0-9]{8}I\/AAAAAAAAALE\/[a-zA-Z0-9]{23}_[a-zA-Z0-9]{19}\/)w1200-h630-p-k-no-nu\/[\d-]+\.png\z/
|
93
|
+
"#{$1}s#{width}/"
|
86
94
|
else
|
87
95
|
raise ErrorBadLink.new src
|
88
96
|
end
|
89
97
|
end
|
90
98
|
|
99
|
+
|
91
100
|
require "json"
|
92
101
|
|
93
102
|
# TODO make the timeout handling respect the way the Directlink method works with timeouts
|
94
|
-
def self.imgur link, timeout =
|
103
|
+
def self.imgur link, timeout = 1000
|
95
104
|
raise ErrorMissingEnvVar.new "define IMGUR_CLIENT_ID env var" unless ENV["IMGUR_CLIENT_ID"]
|
96
105
|
|
97
106
|
request_data = lambda do |url|
|
@@ -100,7 +109,7 @@ module DirectLink
|
|
100
109
|
NetHTTPUtils.request_data url, header: { Authorization: "Client-ID #{ENV["IMGUR_CLIENT_ID"]}" }
|
101
110
|
rescue NetHTTPUtils::Error => e
|
102
111
|
raise ErrorNotFound.new url.inspect if 404 == e.code
|
103
|
-
if t < timeout && [400, 500, 503].include?(e.code)
|
112
|
+
if t < timeout && [400, 500, 502, 503].include?(e.code)
|
104
113
|
logger.error "retrying in #{t} seconds because of Imgur HTTP ERROR #{e.code}"
|
105
114
|
sleep t
|
106
115
|
t *= 2
|
@@ -151,7 +160,6 @@ module DirectLink
|
|
151
160
|
def self._500px link
|
152
161
|
raise ErrorBadLink.new link unless %r{\Ahttps://500px\.com/photo/(?<id>[^/]+)/[-[a-zA-Z0-9]%]+\/?\z} =~ link
|
153
162
|
require "nokogiri"
|
154
|
-
resp = NetHTTPUtils.request_data link
|
155
163
|
f = lambda do |form|
|
156
164
|
JSON.load(NetHTTPUtils.request_data "https://api.500px.com/v1/photos", form: form).fetch("photos").values.first
|
157
165
|
end
|
@@ -240,13 +248,15 @@ module DirectLink
|
|
240
248
|
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" unless data["media"].keys.sort == %w{ oembed type } && %w{ youtube.com gfycat.com imgur.com }.include?(data["media"]["type"])
|
241
249
|
return [true, data["media"]["oembed"]["thumbnail_url"]]
|
242
250
|
end
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
251
|
+
if data["media_metadata"]
|
252
|
+
return [true, data["media_metadata"].values.map do |media|
|
253
|
+
next if media == {"status"=>"failed"} || media == {"status"=>"unprocessed"}
|
254
|
+
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated (media == #{media.inspect})" unless media["status"] == "valid"
|
255
|
+
[media["m"], *media["s"].values_at("x", "y"), CGI.unescapeHTML(media["s"][media["m"]=="image/gif" ? "gif" : "u"])]
|
256
|
+
end.compact]
|
257
|
+
end
|
248
258
|
return [true, "#{"https://www.reddit.com" if /\A\/r\/[0-9a-zA-Z_]+\/comments\/[0-9a-z]{5,6}\// =~ data["url"]}#{data["url"]}"] if data["crosspost_parent"]
|
249
|
-
return [true, data["url"]] unless data["is_self"]
|
259
|
+
return [true, CGI.unescapeHTML(data["url"])] unless data["is_self"]
|
250
260
|
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" if data["url"] != "https://www.reddit.com" + data["permalink"]
|
251
261
|
return [false, data["selftext"]]
|
252
262
|
end
|
@@ -255,8 +265,8 @@ module DirectLink
|
|
255
265
|
id, mtd, field, f = case link
|
256
266
|
when %r{\Ahttps://vk\.com/id(?<user_id>\d+)\?z=photo(?<id>\k<user_id>_\d+)(%2F(album\k<user_id>_0|photos\k<user_id>))?\z},
|
257
267
|
%r{\Ahttps://vk\.com/[a-z_]+\?z=photo(?<_>)(?<id>(?<user_id>\d+)_\d+)%2Fphotos\k<user_id>\z},
|
258
|
-
%r{\Ahttps://vk\.com/photo(?<_>)(?<id>-?\d+_\d+)(\?(
|
259
|
-
%r{\Ahttps://vk\.com/feed\?section=likes&z=photo(?<_>)(?<id
|
268
|
+
%r{\Ahttps://vk\.com/photo(?<_>)(?<id>-?\d+_\d+)(\?(all|rev)=1)?\z},
|
269
|
+
%r{\Ahttps://vk\.com/feed\?(?:section=likes&)?z=photo(?<_>)(?<id>(?<user_id>-?\d+)_\d+)%2F(liked\d+|album\k<user_id>_0(0%2Frev)?)\z},
|
260
270
|
%r{\Ahttps://vk\.com/[a-z_]+\?z=photo(?<_>)(?<id>(?<user_id>-\d+)_\d+)%2F(wall\k<user_id>_\d+|album\k<user_id>_0)\z},
|
261
271
|
%r{\Ahttps://vk\.com/wall(?<user_id>-\d+)_\d+\?z=photo(?<id>\k<user_id>_\d+)%2F(wall\k<user_id>_\d+|album\k<user_id>_00%2Frev|\d+)\z}
|
262
272
|
[$2, :photos, :photos, lambda do |t|
|
@@ -264,7 +274,7 @@ module DirectLink
|
|
264
274
|
t
|
265
275
|
end ]
|
266
276
|
when %r{\Ahttps://vk\.com/wall(?<id>-?\d+_\d+)\z},
|
267
|
-
%r{\Ahttps://vk\.com/[a-z\.]+\?w=wall(?<id
|
277
|
+
%r{\Ahttps://vk\.com/[a-z\.]+\?w=wall(?<id>-?\d+_\d+)\z}
|
268
278
|
[$1, :wall, :posts, lambda do |t|
|
269
279
|
t.first.fetch("attachments").select do |item|
|
270
280
|
case item.keys
|
@@ -282,14 +292,14 @@ module DirectLink
|
|
282
292
|
raise ErrorBadLink.new link
|
283
293
|
end
|
284
294
|
raise ErrorMissingEnvVar.new "define VK_ACCESS_TOKEN and VK_CLIENT_SECRET env vars" unless ENV["VK_ACCESS_TOKEN"] && ENV["VK_CLIENT_SECRET"]
|
285
|
-
sleep 0.25 # "error_msg"=>"Too many requests per second"
|
295
|
+
sleep 0.25 unless ENV["CI"] # "error_msg"=>"Too many requests per second"
|
286
296
|
f.call( JSON.load( NetHTTPUtils.request_data "https://api.vk.com/method/#{mtd}.getById",
|
287
297
|
:POST, form: { field => id, :access_token => ENV["VK_ACCESS_TOKEN"], :client_secret => ENV["VK_CLIENT_SECRET"], :v => "5.101" }
|
288
298
|
).fetch("response") ).map do |photos|
|
289
299
|
photos.fetch("sizes").map do |size|
|
290
300
|
size.values_at("width", "height", "url").tap do |whu|
|
291
301
|
w, h, u = whu
|
292
|
-
whu[0, 2] = FastImage.new(u, raise_on_failure: true).size if [w, h].include? 0
|
302
|
+
whu[0, 2] = FastImage.new(u, raise_on_failure: true).size if [w, h].include? 0 # wtf?
|
293
303
|
end
|
294
304
|
end.max_by{ |w, h, u| w * h }
|
295
305
|
end
|
@@ -341,12 +351,13 @@ def DirectLink link, timeout = nil, proxy = nil, giveup: false, ignore_meta: fal
|
|
341
351
|
head = NetHTTPUtils.request_data link, :HEAD, header: header, **(proxy ? {proxy: proxy} : {}), **(timeout ? {
|
342
352
|
timeout: timeout,
|
343
353
|
max_start_http_retry_delay: timeout,
|
344
|
-
max_read_retry_delay: timeout
|
354
|
+
max_read_retry_delay: timeout,
|
345
355
|
} : {})
|
346
|
-
rescue Net::ReadTimeout
|
356
|
+
rescue Net::ReadTimeout, Errno::ETIMEDOUT
|
347
357
|
rescue NetHTTPUtils::Error => e
|
348
358
|
raise unless 418 == e.code
|
349
359
|
else
|
360
|
+
raise DirectLink::ErrorAssert.new "last_response.uri is not set" unless head.instance_variable_get(:@last_response).uri
|
350
361
|
link = head.instance_variable_get(:@last_response).uri.to_s
|
351
362
|
end
|
352
363
|
|
@@ -354,10 +365,10 @@ def DirectLink link, timeout = nil, proxy = nil, giveup: false, ignore_meta: fal
|
|
354
365
|
# because they can be hidden behind URL shorteners
|
355
366
|
# also it can resolve NetHTTPUtils::Error(404) before trying the adapter
|
356
367
|
|
357
|
-
t = google_without_schema_crutch[] and return t
|
368
|
+
t = google_without_schema_crutch[] and return t # TODO: why again?
|
358
369
|
|
359
370
|
begin
|
360
|
-
imgur = DirectLink.imgur(link).sort_by{ |u, w, h, t| - w * h }.map do |u, w, h, t|
|
371
|
+
imgur = DirectLink.imgur(link, timeout).sort_by{ |u, w, h, t| - w * h }.map do |u, w, h, t|
|
361
372
|
struct.new u, w, h, t
|
362
373
|
end
|
363
374
|
# `DirectLink.imgur` return value is always an Array
|
@@ -432,11 +443,11 @@ def DirectLink link, timeout = nil, proxy = nil, giveup: false, ignore_meta: fal
|
|
432
443
|
timeout: timeout, # NetHTTPUtild passes this as read_timeout to Net::HTTP.start
|
433
444
|
max_read_retry_delay: timeout # and then compares accumulated delay to this
|
434
445
|
# if we use :get here we will download megabytes of files just to giveup on content_type we can't process
|
435
|
-
case head.instance_variable_get(:@last_response).content_type
|
446
|
+
case head.instance_variable_get(:@last_response).content_type # webmock should provide this
|
436
447
|
when "text/html" ; nil
|
437
448
|
else ; raise
|
438
449
|
end
|
439
|
-
html = Nokogiri::HTML NetHTTPUtils.request_data link, header: {"User-Agent" => "Mozilla"}
|
450
|
+
html = Nokogiri::HTML NetHTTPUtils.request_data link, :GET, header: {"User-Agent" => "Mozilla"}
|
440
451
|
if t = html.at_css("meta[@property='og:image']")
|
441
452
|
begin
|
442
453
|
return DirectLink URI.join(link, t[:content]).to_s, nil, *proxy, giveup: true
|