directlink 0.0.9.1 → 0.0.11.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/directlink +5 -10
- data/directlink.gemspec +13 -11
- data/lib/directlink.rb +74 -47
- data/unit.test.rb +1548 -0
- metadata +17 -40
- data/.bashrc +0 -5
- data/.travis.yml +0 -39
- data/Gemfile +0 -3
- data/README.md +0 -198
- data/Rakefile +0 -1
- data/api_tokens_for_travis.sh +0 -8
- data/gplus.txt +0 -1454
- data/reddit_token_for_travis.yaml +0 -4
- data/test.rb +0 -832
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0cadc1fae7bcad2314224fa404ca05dff9988e6
|
4
|
+
data.tar.gz: e5c4c32ac6e03efa4633b9dc323c8a44d8f52c4e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c96f80c6d75a232d282112cc2aad8a085e7c324df5bce7951cde142a11ec94346a53ce2a1f34ad073215307220540402ea64442ea9f55ee9ad2910aa9eac349e
|
7
|
+
data.tar.gz: 00f67755c1df4fab9aa7f2819b7b633f3d2462e3b55b767f38d5d3e458f20ce50feb45e50735d7fd06900d97bb2201a80425aa3852a82c569cebfe40fcedd6ca
|
data/bin/directlink
CHANGED
@@ -68,18 +68,13 @@ begin
|
|
68
68
|
(t.is_a?(Array) ? t : [t]).each{ |s| puts "=> #{s.url}\n #{s.type} #{s.width}x#{s.height}" }
|
69
69
|
end
|
70
70
|
end
|
71
|
-
rescue
|
72
|
-
Net::OpenTimeout,
|
73
|
-
Errno::ECONNRESET,
|
74
|
-
NetHTTPUtils::Error,
|
75
|
-
FastImage::UnknownImageType,
|
76
|
-
FastImage::ImageFetchFailure,
|
77
|
-
# DirectLink::ErrorMissingEnvVar,
|
78
|
-
# DirectLink::ErrorAssert,
|
79
|
-
DirectLink::ErrorNotFound,
|
80
|
-
DirectLink::ErrorBadLink => e
|
71
|
+
rescue *DirectLink::NORMAL_EXCEPTIONS => e
|
81
72
|
puts e.backtrace if debug
|
82
73
|
cause = e.cause if e.cause if e.respond_to? :cause
|
83
74
|
c = e.class.to_s
|
84
75
|
abort "#{c}#{": #{e}" if c != e.to_s}#{": #{cause}" if cause && c != cause.to_s}"
|
76
|
+
rescue
|
77
|
+
raise unless $!.cause
|
78
|
+
raise $!.cause unless $!.cause.cause
|
79
|
+
raise $!.cause.cause
|
85
80
|
end
|
data/directlink.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "directlink"
|
3
|
-
spec.version = "0.0.
|
4
|
-
spec.summary = "
|
3
|
+
spec.version = "0.0.11.1"
|
4
|
+
spec.summary = "obtains from any kind of hyperlink a link to an image, its format and resolution"
|
5
5
|
|
6
6
|
spec.author = "Victor Maslov aka Nakilon"
|
7
7
|
spec.email = "nakilon@gmail.com"
|
@@ -9,21 +9,23 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.homepage = "https://github.com/nakilon/directlink"
|
10
10
|
spec.metadata = {"source_code_uri" => "https://github.com/nakilon/directlink"}
|
11
11
|
|
12
|
-
spec.
|
13
|
-
|
14
|
-
spec.add_dependency "
|
15
|
-
spec.add_dependency "
|
12
|
+
spec.required_ruby_version = ">=2.3" # because <<~ heredocs in tests
|
13
|
+
|
14
|
+
spec.add_dependency "fastimage", "~>2.2.0"
|
15
|
+
spec.add_dependency "nokogiri", "<1.11" # 1.11 requires ruby 2.5 # TODO: switch to Oga?
|
16
|
+
# spec.add_dependency "nethttputils", "~>0.4.2.0"
|
17
|
+
spec.add_dependency "reddit_bot", "~>1.10.0"
|
16
18
|
spec.add_dependency "kramdown"
|
17
19
|
spec.add_dependency "addressable"
|
18
|
-
spec.add_development_dependency "minitest"
|
19
|
-
spec.add_development_dependency "
|
20
|
+
spec.add_development_dependency "minitest-around"
|
21
|
+
spec.add_development_dependency "webmock"
|
20
22
|
|
21
23
|
spec.require_path = "lib"
|
22
24
|
spec.bindir = "bin"
|
23
25
|
spec.executable = "directlink"
|
24
|
-
spec.test_file = "test.rb"
|
25
|
-
spec.files =
|
26
|
+
spec.test_file = "unit.test.rb"
|
27
|
+
spec.files = %w{ LICENSE directlink.gemspec lib/directlink.rb bin/directlink }
|
26
28
|
|
27
|
-
spec.requirements << "you may
|
29
|
+
spec.requirements << "you may want to create apps and provide API tokens:"
|
28
30
|
spec.requirements << "IMGUR_CLIENT_ID, FLICKR_API_KEY, REDDIT_SECRETS"
|
29
31
|
end
|
data/lib/directlink.rb
CHANGED
@@ -2,17 +2,16 @@ module DirectLink
|
|
2
2
|
|
3
3
|
class << self
|
4
4
|
attr_accessor :silent
|
5
|
-
end
|
6
|
-
self.silent = false
|
7
|
-
class << self
|
8
5
|
attr_accessor :logger
|
6
|
+
attr_accessor :timeout
|
9
7
|
end
|
8
|
+
self.silent = false
|
10
9
|
self.logger = Object.new
|
11
10
|
self.logger.define_singleton_method :error do |str|
|
12
11
|
puts str unless Module.nesting.first.silent
|
13
12
|
end
|
14
13
|
|
15
|
-
class ErrorAssert < RuntimeError
|
14
|
+
class ErrorAssert < RuntimeError # gem user should not face this error
|
16
15
|
def initialize msg
|
17
16
|
super "#{msg} -- consider reporting this issue to GitHub"
|
18
17
|
end
|
@@ -35,6 +34,22 @@ module DirectLink
|
|
35
34
|
end
|
36
35
|
end
|
37
36
|
|
37
|
+
require "nethttputils"
|
38
|
+
require "fastimage"
|
39
|
+
NORMAL_EXCEPTIONS = [
|
40
|
+
SocketError,
|
41
|
+
Net::OpenTimeout,
|
42
|
+
Errno::ECONNRESET,
|
43
|
+
Errno::ECONNREFUSED,
|
44
|
+
Errno::ETIMEDOUT, # from FastImage
|
45
|
+
NetHTTPUtils::Error,
|
46
|
+
NetHTTPUtils::EOFError_from_rbuf_fill,
|
47
|
+
FastImage::UnknownImageType,
|
48
|
+
FastImage::ImageFetchFailure,
|
49
|
+
DirectLink::ErrorNotFound,
|
50
|
+
DirectLink::ErrorBadLink,
|
51
|
+
] # all known exceptions that can be raised while using Directlink but not as its fault
|
52
|
+
|
38
53
|
|
39
54
|
def self.google src, width = 0
|
40
55
|
# this can handle links without schema because it's used for parsing community HTML pages
|
@@ -44,7 +59,7 @@ module DirectLink
|
|
44
59
|
"#{$1}s#{width}/"
|
45
60
|
when /\A(\/\/lh3\.googleusercontent\.com\/proxy\/[a-zA-Z0-9_-]{66,523}=)(?:w(?:[45]\d\d)-h\d\d\d-[np]|s530-p|s110-p-k)\z/
|
46
61
|
"https:#{$1}s#{width}/"
|
47
|
-
when /\A(\/\/lh3\.googleusercontent\.com\/
|
62
|
+
when /\A(\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9]{24}_[a-zA-Z]{30}7zGIDTJfkc1YZFX2MhgKnjA=)w530-h398-p\z/
|
48
63
|
"https:#{$1}s#{width}/"
|
49
64
|
when /\A(\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9-]{11}\/[VW][a-zA-Z0-9_-]{9}I\/AAAAAAA[AC][a-zA-Z0-9]{3}\/[a-zA-Z0-9_-]{32}[gwAQ]CJoC\/)w530-h[23]\d\d-p\/[^\/]+\z/,
|
50
65
|
/\A(?:https?:)?(\/\/[1-4]\.bp\.blogspot\.com\/-[a-zA-Z0-9_-]{11}\/[UVWX][a-zA-Z0-9_-]{9}I\/AAAAAAAA[A-Z][a-zA-Z0-9_-]{2}\/[a-zA-Z0-9_-]{33}C(?:EwYBhgL|(?:Lc|Kg)BGAs(?:YHQ)?)\/)(?:s640|w\d{2,4}-h\d\d\d?-p(?:-k-no-nu)?)\/[^\/]+\z/,
|
@@ -65,18 +80,24 @@ module DirectLink
|
|
65
80
|
/\A(https:\/\/lh[356]\.googleusercontent\.com\/-[a-zA-Z0-9]{11}\/AAAAAAAAAAI\/AAAAAAAAAAA\/[a-zA-Z0-9_]{34}\/)s(?:46|64)-c(?:-k(?:-no)?)?-mo\/photo\.jpg\z/
|
66
81
|
"#{$1}s#{width}/"
|
67
82
|
# Google Keep
|
68
|
-
when /\A(https:\/\/lh\d\.googleusercontent\.com\/[a-zA-Z0-9_-]{104,106}=s)\d\d\d\d?\
|
83
|
+
when /\A(https:\/\/lh\d\.googleusercontent\.com\/[a-zA-Z0-9_-]{104,106}=s)\d\d\d\d?\z/
|
69
84
|
"#{$1}#{width}"
|
85
|
+
# opensea
|
86
|
+
when /\A(https:\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9]{78}-nGx_jf_XGqqiVANe_Jr8u2g=)w1400-k\z/
|
87
|
+
"#{$1}s#{width}"
|
70
88
|
# mp4
|
71
89
|
when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z]{11}\/W[a-zA-Z0-9]{9}I\/AAAAAAAAODw\/[a-zA-Z0-9]{32}QCJoC\/)w530-h883-n-k-no\/[^\/]+\.mp4\z/
|
72
90
|
"#{$1}s#{width}/"
|
91
|
+
# something else
|
92
|
+
when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9_]{11}\/X-[a-zA-Z0-9]{8}I\/AAAAAAAAALE\/[a-zA-Z0-9]{23}_[a-zA-Z0-9]{19}\/)w1200-h630-p-k-no-nu\/[\d-]+\.png\z/
|
93
|
+
"#{$1}s#{width}/"
|
73
94
|
else
|
74
95
|
raise ErrorBadLink.new src
|
75
96
|
end
|
76
97
|
end
|
77
98
|
|
99
|
+
|
78
100
|
require "json"
|
79
|
-
require "nethttputils"
|
80
101
|
|
81
102
|
# TODO make the timeout handling respect the way the Directlink method works with timeouts
|
82
103
|
def self.imgur link, timeout = 1000
|
@@ -88,7 +109,7 @@ module DirectLink
|
|
88
109
|
NetHTTPUtils.request_data url, header: { Authorization: "Client-ID #{ENV["IMGUR_CLIENT_ID"]}" }
|
89
110
|
rescue NetHTTPUtils::Error => e
|
90
111
|
raise ErrorNotFound.new url.inspect if 404 == e.code
|
91
|
-
if t < timeout && [400, 500, 503].include?(e.code)
|
112
|
+
if t < timeout && [400, 500, 502, 503].include?(e.code)
|
92
113
|
logger.error "retrying in #{t} seconds because of Imgur HTTP ERROR #{e.code}"
|
93
114
|
sleep t
|
94
115
|
t *= 2
|
@@ -107,16 +128,16 @@ module DirectLink
|
|
107
128
|
elsif data["images"]
|
108
129
|
raise ErrorNotFound.new link.inspect if data["images"].empty?
|
109
130
|
data["images"]
|
110
|
-
elsif data["type"] && data["type"]
|
131
|
+
elsif data["type"] && %w{ image/jpeg image/png image/gif video/mp4 }.include?(data["type"])
|
111
132
|
# TODO check if this branch is possible at all
|
112
133
|
[ data ]
|
113
134
|
# elsif data["comment"]
|
114
135
|
# fi["https://imgur.com/" + data["image_id"]]
|
115
136
|
else
|
116
137
|
# one day single-video item should hit this but somehow it didn't yet
|
117
|
-
raise ErrorAssert.new "unknown data format #{
|
138
|
+
raise ErrorAssert.new "unknown data format #{json} for #{link}"
|
118
139
|
end
|
119
|
-
when /\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/([a-zA-Z0-9]{7,8})(?:\.(?:gifv|
|
140
|
+
when /\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/([a-zA-Z0-9]{7,8})(?:\.(?:gifv|jpe?g(?:\?fb)?|png))?\z/,
|
120
141
|
/\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/([a-zA-Z0-9]{5})\.mp4\z/,
|
121
142
|
/\Ahttps?:\/\/imgur\.com\/([a-zA-Z0-9]{5}(?:[a-zA-Z0-9]{2})?)\z/,
|
122
143
|
/\Ahttps?:\/\/imgur\.com\/([a-zA-Z0-9]{7})(?:\?\S+)?\z/,
|
@@ -128,7 +149,7 @@ module DirectLink
|
|
128
149
|
raise ErrorBadLink.new link
|
129
150
|
end.map do |image|
|
130
151
|
case image["type"]
|
131
|
-
when
|
152
|
+
when *%w{ image/jpeg image/png image/gif video/mp4 }
|
132
153
|
image.values_at "link", "width", "height", "type"
|
133
154
|
else
|
134
155
|
raise ErrorAssert.new "unknown type of #{link}: #{image}"
|
@@ -139,7 +160,6 @@ module DirectLink
|
|
139
160
|
def self._500px link
|
140
161
|
raise ErrorBadLink.new link unless %r{\Ahttps://500px\.com/photo/(?<id>[^/]+)/[-[a-zA-Z0-9]%]+\/?\z} =~ link
|
141
162
|
require "nokogiri"
|
142
|
-
resp = NetHTTPUtils.request_data link
|
143
163
|
f = lambda do |form|
|
144
164
|
JSON.load(NetHTTPUtils.request_data "https://api.500px.com/v1/photos", form: form).fetch("photos").values.first
|
145
165
|
end
|
@@ -191,11 +211,11 @@ module DirectLink
|
|
191
211
|
attr_accessor :reddit_bot
|
192
212
|
end
|
193
213
|
def self.reddit link, timeout = 1000
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
214
|
+
return [true, link] if URI(link).host &&
|
215
|
+
URI(link).host.split(?.) == %w{ i redd it } &&
|
216
|
+
URI(link).path[/\A\/[a-z0-9]{12,13}\.(gif|jpg)\z/]
|
217
|
+
unless id = link[/\Ahttps:\/\/www\.reddit\.com\/gallery\/([0-9a-z]{5,6})\z/, 1]
|
218
|
+
raise DirectLink::ErrorBadLink.new link unless id = URI(link).path[/\A(?:\/r\/[0-9a-zA-Z_]+)?(?:\/comments|\/duplicates)?\/([0-9a-z]{5,6})(?:\/|\z)/, 1]
|
199
219
|
end
|
200
220
|
retry_on_json_parseerror = lambda do |&b|
|
201
221
|
t = 1
|
@@ -223,17 +243,20 @@ module DirectLink
|
|
223
243
|
end
|
224
244
|
# TODO: do we handle linking Imgur albums?
|
225
245
|
data = json["data"]["children"].first["data"]
|
226
|
-
if data["media"]
|
227
|
-
return [true, data["media"]["reddit_video"]["fallback_url"]]
|
228
|
-
else
|
246
|
+
if data["media"]
|
247
|
+
return [true, data["media"]["reddit_video"]["fallback_url"]] if data["media"]["reddit_video"]
|
229
248
|
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" unless data["media"].keys.sort == %w{ oembed type } && %w{ youtube.com gfycat.com imgur.com }.include?(data["media"]["type"])
|
230
249
|
return [true, data["media"]["oembed"]["thumbnail_url"]]
|
231
|
-
end
|
232
|
-
|
233
|
-
[
|
234
|
-
|
235
|
-
|
236
|
-
|
250
|
+
end
|
251
|
+
if data["media_metadata"]
|
252
|
+
return [true, data["media_metadata"].values.map do |media|
|
253
|
+
next if media == {"status"=>"failed"} || media == {"status"=>"unprocessed"}
|
254
|
+
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated (media == #{media.inspect})" unless media["status"] == "valid"
|
255
|
+
[media["m"], *media["s"].values_at("x", "y"), CGI.unescapeHTML(media["s"][media["m"]=="image/gif" ? "gif" : "u"])]
|
256
|
+
end.compact]
|
257
|
+
end
|
258
|
+
return [true, "#{"https://www.reddit.com" if /\A\/r\/[0-9a-zA-Z_]+\/comments\/[0-9a-z]{5,6}\// =~ data["url"]}#{data["url"]}"] if data["crosspost_parent"]
|
259
|
+
return [true, CGI.unescapeHTML(data["url"])] unless data["is_self"]
|
237
260
|
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" if data["url"] != "https://www.reddit.com" + data["permalink"]
|
238
261
|
return [false, data["selftext"]]
|
239
262
|
end
|
@@ -242,7 +265,7 @@ module DirectLink
|
|
242
265
|
id, mtd, field, f = case link
|
243
266
|
when %r{\Ahttps://vk\.com/id(?<user_id>\d+)\?z=photo(?<id>\k<user_id>_\d+)(%2F(album\k<user_id>_0|photos\k<user_id>))?\z},
|
244
267
|
%r{\Ahttps://vk\.com/[a-z_]+\?z=photo(?<_>)(?<id>(?<user_id>\d+)_\d+)%2Fphotos\k<user_id>\z},
|
245
|
-
%r{\Ahttps://vk\.com/photo(?<_>)(?<id>-?\d+_\d+)(\?all=1)?\z},
|
268
|
+
%r{\Ahttps://vk\.com/photo(?<_>)(?<id>-?\d+_\d+)(\?(?:all|rev)=1)?\z},
|
246
269
|
%r{\Ahttps://vk\.com/feed\?section=likes&z=photo(?<_>)(?<id>-(?<user_id>\d+)_\d+)%2F(liked\d+|album\k<user_id>_0)\z},
|
247
270
|
%r{\Ahttps://vk\.com/[a-z_]+\?z=photo(?<_>)(?<id>(?<user_id>-\d+)_\d+)%2F(wall\k<user_id>_\d+|album\k<user_id>_0)\z},
|
248
271
|
%r{\Ahttps://vk\.com/wall(?<user_id>-\d+)_\d+\?z=photo(?<id>\k<user_id>_\d+)%2F(wall\k<user_id>_\d+|album\k<user_id>_00%2Frev|\d+)\z}
|
@@ -251,7 +274,7 @@ module DirectLink
|
|
251
274
|
t
|
252
275
|
end ]
|
253
276
|
when %r{\Ahttps://vk\.com/wall(?<id>-?\d+_\d+)\z},
|
254
|
-
%r{\Ahttps://vk\.com/[a-z\.]+\?w=wall(?<id
|
277
|
+
%r{\Ahttps://vk\.com/[a-z\.]+\?w=wall(?<id>-?\d+_\d+)\z}
|
255
278
|
[$1, :wall, :posts, lambda do |t|
|
256
279
|
t.first.fetch("attachments").select do |item|
|
257
280
|
case item.keys
|
@@ -269,14 +292,14 @@ module DirectLink
|
|
269
292
|
raise ErrorBadLink.new link
|
270
293
|
end
|
271
294
|
raise ErrorMissingEnvVar.new "define VK_ACCESS_TOKEN and VK_CLIENT_SECRET env vars" unless ENV["VK_ACCESS_TOKEN"] && ENV["VK_CLIENT_SECRET"]
|
272
|
-
sleep 0.25 # "error_msg"=>"Too many requests per second"
|
295
|
+
sleep 0.25 unless ENV["CI"] # "error_msg"=>"Too many requests per second"
|
273
296
|
f.call( JSON.load( NetHTTPUtils.request_data "https://api.vk.com/method/#{mtd}.getById",
|
274
297
|
:POST, form: { field => id, :access_token => ENV["VK_ACCESS_TOKEN"], :client_secret => ENV["VK_CLIENT_SECRET"], :v => "5.101" }
|
275
298
|
).fetch("response") ).map do |photos|
|
276
299
|
photos.fetch("sizes").map do |size|
|
277
300
|
size.values_at("width", "height", "url").tap do |whu|
|
278
301
|
w, h, u = whu
|
279
|
-
whu[0, 2] = FastImage.new(u, raise_on_failure: true).size if [w, h].include? 0
|
302
|
+
whu[0, 2] = FastImage.new(u, raise_on_failure: true).size if [w, h].include? 0 # wtf?
|
280
303
|
end
|
281
304
|
end.max_by{ |w, h, u| w * h }
|
282
305
|
end
|
@@ -286,9 +309,8 @@ module DirectLink
|
|
286
309
|
end
|
287
310
|
|
288
311
|
|
289
|
-
|
290
|
-
|
291
|
-
def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
312
|
+
def DirectLink link, timeout = nil, proxy = nil, giveup: false, ignore_meta: false
|
313
|
+
timeout ||= DirectLink.timeout
|
292
314
|
ArgumentError.new("link should be a <String>, not <#{link.class}>") unless link.is_a? String
|
293
315
|
begin
|
294
316
|
URI link
|
@@ -326,15 +348,16 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
326
348
|
**( %w{ reddit com } == URI(link).host.split(?.).last(2) ||
|
327
349
|
%w{ redd it } == URI(link).host.split(?.) ? {Cookie: "over18=1"} : {} ),
|
328
350
|
}
|
329
|
-
head = NetHTTPUtils.request_data link, :
|
351
|
+
head = NetHTTPUtils.request_data link, :HEAD, header: header, **(proxy ? {proxy: proxy} : {}), **(timeout ? {
|
330
352
|
timeout: timeout,
|
331
353
|
max_start_http_retry_delay: timeout,
|
332
|
-
max_read_retry_delay: timeout
|
354
|
+
max_read_retry_delay: timeout,
|
333
355
|
} : {})
|
334
|
-
rescue Net::ReadTimeout
|
356
|
+
rescue Net::ReadTimeout, Errno::ETIMEDOUT
|
335
357
|
rescue NetHTTPUtils::Error => e
|
336
358
|
raise unless 418 == e.code
|
337
359
|
else
|
360
|
+
raise DirectLink::ErrorAssert.new "last_response.uri is not set" unless head.instance_variable_get(:@last_response).uri
|
338
361
|
link = head.instance_variable_get(:@last_response).uri.to_s
|
339
362
|
end
|
340
363
|
|
@@ -342,10 +365,10 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
342
365
|
# because they can be hidden behind URL shorteners
|
343
366
|
# also it can resolve NetHTTPUtils::Error(404) before trying the adapter
|
344
367
|
|
345
|
-
t = google_without_schema_crutch[] and return t
|
368
|
+
t = google_without_schema_crutch[] and return t # TODO: why again?
|
346
369
|
|
347
370
|
begin
|
348
|
-
imgur = DirectLink.imgur(link).sort_by{ |u, w, h, t| - w * h }.map do |u, w, h, t|
|
371
|
+
imgur = DirectLink.imgur(link, timeout).sort_by{ |u, w, h, t| - w * h }.map do |u, w, h, t|
|
349
372
|
struct.new u, w, h, t
|
350
373
|
end
|
351
374
|
# `DirectLink.imgur` return value is always an Array
|
@@ -383,7 +406,7 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
383
406
|
f = ->_{ _.type == :a ? _.attr["href"] : _.children.flat_map(&f) }
|
384
407
|
require "kramdown"
|
385
408
|
return f[Kramdown::Document.new(u).root].flat_map do |sublink|
|
386
|
-
DirectLink URI.join(link, sublink).to_s, timeout, giveup: giveup
|
409
|
+
DirectLink URI.join(link, sublink).to_s, timeout, giveup: giveup # TODO: maybe subtract from timeout the time we've already wasted
|
387
410
|
end
|
388
411
|
end
|
389
412
|
if u.is_a? Hash
|
@@ -393,8 +416,8 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
393
416
|
struct.new u, x, y, t
|
394
417
|
end
|
395
418
|
end
|
396
|
-
|
397
|
-
|
419
|
+
raise DirectLink::ErrorNotFound.new link.inspect if link == u
|
420
|
+
return DirectLink u, timeout, giveup: giveup
|
398
421
|
rescue DirectLink::ErrorMissingEnvVar
|
399
422
|
end if %w{ reddit com } == URI(link).host.split(?.).last(2) ||
|
400
423
|
%w{ redd it } == URI(link).host.split(?.)
|
@@ -407,23 +430,27 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
407
430
|
end if %w{ vk com } == URI(link).host.split(?.)
|
408
431
|
|
409
432
|
begin
|
410
|
-
f = FastImage.new
|
433
|
+
f = FastImage.new link,
|
434
|
+
raise_on_failure: true,
|
435
|
+
timeout: timeout,
|
436
|
+
**(proxy ? {proxy: "http://#{proxy}"} : {}),
|
437
|
+
http_header: {"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"}
|
411
438
|
rescue FastImage::UnknownImageType
|
412
439
|
raise if giveup
|
413
440
|
require "nokogiri"
|
414
|
-
head = NetHTTPUtils.request_data link, :
|
441
|
+
head = NetHTTPUtils.request_data link, :HEAD, header: {"User-Agent" => "Mozilla"},
|
415
442
|
max_start_http_retry_delay: timeout,
|
416
443
|
timeout: timeout, # NetHTTPUtild passes this as read_timeout to Net::HTTP.start
|
417
444
|
max_read_retry_delay: timeout # and then compares accumulated delay to this
|
418
445
|
# if we use :get here we will download megabytes of files just to giveup on content_type we can't process
|
419
|
-
case head.instance_variable_get(:@last_response).content_type
|
446
|
+
case head.instance_variable_get(:@last_response).content_type # webmock should provide this
|
420
447
|
when "text/html" ; nil
|
421
448
|
else ; raise
|
422
449
|
end
|
423
|
-
html = Nokogiri::HTML NetHTTPUtils.request_data link, header: {"User-Agent" => "Mozilla"}
|
450
|
+
html = Nokogiri::HTML NetHTTPUtils.request_data link, :GET, header: {"User-Agent" => "Mozilla"}
|
424
451
|
if t = html.at_css("meta[@property='og:image']")
|
425
452
|
begin
|
426
|
-
return DirectLink URI.join(link, t[:content]).to_s, nil, giveup: true
|
453
|
+
return DirectLink URI.join(link, t[:content]).to_s, nil, *proxy, giveup: true
|
427
454
|
rescue URI::InvalidURIError
|
428
455
|
end
|
429
456
|
end unless ignore_meta
|