directlink 0.0.9.1 → 0.0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/directlink +5 -10
- data/directlink.gemspec +13 -11
- data/lib/directlink.rb +74 -47
- data/unit.test.rb +1548 -0
- metadata +17 -40
- data/.bashrc +0 -5
- data/.travis.yml +0 -39
- data/Gemfile +0 -3
- data/README.md +0 -198
- data/Rakefile +0 -1
- data/api_tokens_for_travis.sh +0 -8
- data/gplus.txt +0 -1454
- data/reddit_token_for_travis.yaml +0 -4
- data/test.rb +0 -832
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0cadc1fae7bcad2314224fa404ca05dff9988e6
|
4
|
+
data.tar.gz: e5c4c32ac6e03efa4633b9dc323c8a44d8f52c4e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c96f80c6d75a232d282112cc2aad8a085e7c324df5bce7951cde142a11ec94346a53ce2a1f34ad073215307220540402ea64442ea9f55ee9ad2910aa9eac349e
|
7
|
+
data.tar.gz: 00f67755c1df4fab9aa7f2819b7b633f3d2462e3b55b767f38d5d3e458f20ce50feb45e50735d7fd06900d97bb2201a80425aa3852a82c569cebfe40fcedd6ca
|
data/bin/directlink
CHANGED
@@ -68,18 +68,13 @@ begin
|
|
68
68
|
(t.is_a?(Array) ? t : [t]).each{ |s| puts "=> #{s.url}\n #{s.type} #{s.width}x#{s.height}" }
|
69
69
|
end
|
70
70
|
end
|
71
|
-
rescue
|
72
|
-
Net::OpenTimeout,
|
73
|
-
Errno::ECONNRESET,
|
74
|
-
NetHTTPUtils::Error,
|
75
|
-
FastImage::UnknownImageType,
|
76
|
-
FastImage::ImageFetchFailure,
|
77
|
-
# DirectLink::ErrorMissingEnvVar,
|
78
|
-
# DirectLink::ErrorAssert,
|
79
|
-
DirectLink::ErrorNotFound,
|
80
|
-
DirectLink::ErrorBadLink => e
|
71
|
+
rescue *DirectLink::NORMAL_EXCEPTIONS => e
|
81
72
|
puts e.backtrace if debug
|
82
73
|
cause = e.cause if e.cause if e.respond_to? :cause
|
83
74
|
c = e.class.to_s
|
84
75
|
abort "#{c}#{": #{e}" if c != e.to_s}#{": #{cause}" if cause && c != cause.to_s}"
|
76
|
+
rescue
|
77
|
+
raise unless $!.cause
|
78
|
+
raise $!.cause unless $!.cause.cause
|
79
|
+
raise $!.cause.cause
|
85
80
|
end
|
data/directlink.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "directlink"
|
3
|
-
spec.version = "0.0.
|
4
|
-
spec.summary = "
|
3
|
+
spec.version = "0.0.11.1"
|
4
|
+
spec.summary = "obtains from any kind of hyperlink a link to an image, its format and resolution"
|
5
5
|
|
6
6
|
spec.author = "Victor Maslov aka Nakilon"
|
7
7
|
spec.email = "nakilon@gmail.com"
|
@@ -9,21 +9,23 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.homepage = "https://github.com/nakilon/directlink"
|
10
10
|
spec.metadata = {"source_code_uri" => "https://github.com/nakilon/directlink"}
|
11
11
|
|
12
|
-
spec.
|
13
|
-
|
14
|
-
spec.add_dependency "
|
15
|
-
spec.add_dependency "
|
12
|
+
spec.required_ruby_version = ">=2.3" # because <<~ heredocs in tests
|
13
|
+
|
14
|
+
spec.add_dependency "fastimage", "~>2.2.0"
|
15
|
+
spec.add_dependency "nokogiri", "<1.11" # 1.11 requires ruby 2.5 # TODO: switch to Oga?
|
16
|
+
# spec.add_dependency "nethttputils", "~>0.4.2.0"
|
17
|
+
spec.add_dependency "reddit_bot", "~>1.10.0"
|
16
18
|
spec.add_dependency "kramdown"
|
17
19
|
spec.add_dependency "addressable"
|
18
|
-
spec.add_development_dependency "minitest"
|
19
|
-
spec.add_development_dependency "
|
20
|
+
spec.add_development_dependency "minitest-around"
|
21
|
+
spec.add_development_dependency "webmock"
|
20
22
|
|
21
23
|
spec.require_path = "lib"
|
22
24
|
spec.bindir = "bin"
|
23
25
|
spec.executable = "directlink"
|
24
|
-
spec.test_file = "test.rb"
|
25
|
-
spec.files =
|
26
|
+
spec.test_file = "unit.test.rb"
|
27
|
+
spec.files = %w{ LICENSE directlink.gemspec lib/directlink.rb bin/directlink }
|
26
28
|
|
27
|
-
spec.requirements << "you may
|
29
|
+
spec.requirements << "you may want to create apps and provide API tokens:"
|
28
30
|
spec.requirements << "IMGUR_CLIENT_ID, FLICKR_API_KEY, REDDIT_SECRETS"
|
29
31
|
end
|
data/lib/directlink.rb
CHANGED
@@ -2,17 +2,16 @@ module DirectLink
|
|
2
2
|
|
3
3
|
class << self
|
4
4
|
attr_accessor :silent
|
5
|
-
end
|
6
|
-
self.silent = false
|
7
|
-
class << self
|
8
5
|
attr_accessor :logger
|
6
|
+
attr_accessor :timeout
|
9
7
|
end
|
8
|
+
self.silent = false
|
10
9
|
self.logger = Object.new
|
11
10
|
self.logger.define_singleton_method :error do |str|
|
12
11
|
puts str unless Module.nesting.first.silent
|
13
12
|
end
|
14
13
|
|
15
|
-
class ErrorAssert < RuntimeError
|
14
|
+
class ErrorAssert < RuntimeError # gem user should not face this error
|
16
15
|
def initialize msg
|
17
16
|
super "#{msg} -- consider reporting this issue to GitHub"
|
18
17
|
end
|
@@ -35,6 +34,22 @@ module DirectLink
|
|
35
34
|
end
|
36
35
|
end
|
37
36
|
|
37
|
+
require "nethttputils"
|
38
|
+
require "fastimage"
|
39
|
+
NORMAL_EXCEPTIONS = [
|
40
|
+
SocketError,
|
41
|
+
Net::OpenTimeout,
|
42
|
+
Errno::ECONNRESET,
|
43
|
+
Errno::ECONNREFUSED,
|
44
|
+
Errno::ETIMEDOUT, # from FastImage
|
45
|
+
NetHTTPUtils::Error,
|
46
|
+
NetHTTPUtils::EOFError_from_rbuf_fill,
|
47
|
+
FastImage::UnknownImageType,
|
48
|
+
FastImage::ImageFetchFailure,
|
49
|
+
DirectLink::ErrorNotFound,
|
50
|
+
DirectLink::ErrorBadLink,
|
51
|
+
] # all known exceptions that can be raised while using Directlink but not as its fault
|
52
|
+
|
38
53
|
|
39
54
|
def self.google src, width = 0
|
40
55
|
# this can handle links without schema because it's used for parsing community HTML pages
|
@@ -44,7 +59,7 @@ module DirectLink
|
|
44
59
|
"#{$1}s#{width}/"
|
45
60
|
when /\A(\/\/lh3\.googleusercontent\.com\/proxy\/[a-zA-Z0-9_-]{66,523}=)(?:w(?:[45]\d\d)-h\d\d\d-[np]|s530-p|s110-p-k)\z/
|
46
61
|
"https:#{$1}s#{width}/"
|
47
|
-
when /\A(\/\/lh3\.googleusercontent\.com\/
|
62
|
+
when /\A(\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9]{24}_[a-zA-Z]{30}7zGIDTJfkc1YZFX2MhgKnjA=)w530-h398-p\z/
|
48
63
|
"https:#{$1}s#{width}/"
|
49
64
|
when /\A(\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9-]{11}\/[VW][a-zA-Z0-9_-]{9}I\/AAAAAAA[AC][a-zA-Z0-9]{3}\/[a-zA-Z0-9_-]{32}[gwAQ]CJoC\/)w530-h[23]\d\d-p\/[^\/]+\z/,
|
50
65
|
/\A(?:https?:)?(\/\/[1-4]\.bp\.blogspot\.com\/-[a-zA-Z0-9_-]{11}\/[UVWX][a-zA-Z0-9_-]{9}I\/AAAAAAAA[A-Z][a-zA-Z0-9_-]{2}\/[a-zA-Z0-9_-]{33}C(?:EwYBhgL|(?:Lc|Kg)BGAs(?:YHQ)?)\/)(?:s640|w\d{2,4}-h\d\d\d?-p(?:-k-no-nu)?)\/[^\/]+\z/,
|
@@ -65,18 +80,24 @@ module DirectLink
|
|
65
80
|
/\A(https:\/\/lh[356]\.googleusercontent\.com\/-[a-zA-Z0-9]{11}\/AAAAAAAAAAI\/AAAAAAAAAAA\/[a-zA-Z0-9_]{34}\/)s(?:46|64)-c(?:-k(?:-no)?)?-mo\/photo\.jpg\z/
|
66
81
|
"#{$1}s#{width}/"
|
67
82
|
# Google Keep
|
68
|
-
when /\A(https:\/\/lh\d\.googleusercontent\.com\/[a-zA-Z0-9_-]{104,106}=s)\d\d\d\d?\
|
83
|
+
when /\A(https:\/\/lh\d\.googleusercontent\.com\/[a-zA-Z0-9_-]{104,106}=s)\d\d\d\d?\z/
|
69
84
|
"#{$1}#{width}"
|
85
|
+
# opensea
|
86
|
+
when /\A(https:\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9]{78}-nGx_jf_XGqqiVANe_Jr8u2g=)w1400-k\z/
|
87
|
+
"#{$1}s#{width}"
|
70
88
|
# mp4
|
71
89
|
when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z]{11}\/W[a-zA-Z0-9]{9}I\/AAAAAAAAODw\/[a-zA-Z0-9]{32}QCJoC\/)w530-h883-n-k-no\/[^\/]+\.mp4\z/
|
72
90
|
"#{$1}s#{width}/"
|
91
|
+
# something else
|
92
|
+
when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9_]{11}\/X-[a-zA-Z0-9]{8}I\/AAAAAAAAALE\/[a-zA-Z0-9]{23}_[a-zA-Z0-9]{19}\/)w1200-h630-p-k-no-nu\/[\d-]+\.png\z/
|
93
|
+
"#{$1}s#{width}/"
|
73
94
|
else
|
74
95
|
raise ErrorBadLink.new src
|
75
96
|
end
|
76
97
|
end
|
77
98
|
|
99
|
+
|
78
100
|
require "json"
|
79
|
-
require "nethttputils"
|
80
101
|
|
81
102
|
# TODO make the timeout handling respect the way the Directlink method works with timeouts
|
82
103
|
def self.imgur link, timeout = 1000
|
@@ -88,7 +109,7 @@ module DirectLink
|
|
88
109
|
NetHTTPUtils.request_data url, header: { Authorization: "Client-ID #{ENV["IMGUR_CLIENT_ID"]}" }
|
89
110
|
rescue NetHTTPUtils::Error => e
|
90
111
|
raise ErrorNotFound.new url.inspect if 404 == e.code
|
91
|
-
if t < timeout && [400, 500, 503].include?(e.code)
|
112
|
+
if t < timeout && [400, 500, 502, 503].include?(e.code)
|
92
113
|
logger.error "retrying in #{t} seconds because of Imgur HTTP ERROR #{e.code}"
|
93
114
|
sleep t
|
94
115
|
t *= 2
|
@@ -107,16 +128,16 @@ module DirectLink
|
|
107
128
|
elsif data["images"]
|
108
129
|
raise ErrorNotFound.new link.inspect if data["images"].empty?
|
109
130
|
data["images"]
|
110
|
-
elsif data["type"] && data["type"]
|
131
|
+
elsif data["type"] && %w{ image/jpeg image/png image/gif video/mp4 }.include?(data["type"])
|
111
132
|
# TODO check if this branch is possible at all
|
112
133
|
[ data ]
|
113
134
|
# elsif data["comment"]
|
114
135
|
# fi["https://imgur.com/" + data["image_id"]]
|
115
136
|
else
|
116
137
|
# one day single-video item should hit this but somehow it didn't yet
|
117
|
-
raise ErrorAssert.new "unknown data format #{
|
138
|
+
raise ErrorAssert.new "unknown data format #{json} for #{link}"
|
118
139
|
end
|
119
|
-
when /\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/([a-zA-Z0-9]{7,8})(?:\.(?:gifv|
|
140
|
+
when /\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/([a-zA-Z0-9]{7,8})(?:\.(?:gifv|jpe?g(?:\?fb)?|png))?\z/,
|
120
141
|
/\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/([a-zA-Z0-9]{5})\.mp4\z/,
|
121
142
|
/\Ahttps?:\/\/imgur\.com\/([a-zA-Z0-9]{5}(?:[a-zA-Z0-9]{2})?)\z/,
|
122
143
|
/\Ahttps?:\/\/imgur\.com\/([a-zA-Z0-9]{7})(?:\?\S+)?\z/,
|
@@ -128,7 +149,7 @@ module DirectLink
|
|
128
149
|
raise ErrorBadLink.new link
|
129
150
|
end.map do |image|
|
130
151
|
case image["type"]
|
131
|
-
when
|
152
|
+
when *%w{ image/jpeg image/png image/gif video/mp4 }
|
132
153
|
image.values_at "link", "width", "height", "type"
|
133
154
|
else
|
134
155
|
raise ErrorAssert.new "unknown type of #{link}: #{image}"
|
@@ -139,7 +160,6 @@ module DirectLink
|
|
139
160
|
def self._500px link
|
140
161
|
raise ErrorBadLink.new link unless %r{\Ahttps://500px\.com/photo/(?<id>[^/]+)/[-[a-zA-Z0-9]%]+\/?\z} =~ link
|
141
162
|
require "nokogiri"
|
142
|
-
resp = NetHTTPUtils.request_data link
|
143
163
|
f = lambda do |form|
|
144
164
|
JSON.load(NetHTTPUtils.request_data "https://api.500px.com/v1/photos", form: form).fetch("photos").values.first
|
145
165
|
end
|
@@ -191,11 +211,11 @@ module DirectLink
|
|
191
211
|
attr_accessor :reddit_bot
|
192
212
|
end
|
193
213
|
def self.reddit link, timeout = 1000
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
214
|
+
return [true, link] if URI(link).host &&
|
215
|
+
URI(link).host.split(?.) == %w{ i redd it } &&
|
216
|
+
URI(link).path[/\A\/[a-z0-9]{12,13}\.(gif|jpg)\z/]
|
217
|
+
unless id = link[/\Ahttps:\/\/www\.reddit\.com\/gallery\/([0-9a-z]{5,6})\z/, 1]
|
218
|
+
raise DirectLink::ErrorBadLink.new link unless id = URI(link).path[/\A(?:\/r\/[0-9a-zA-Z_]+)?(?:\/comments|\/duplicates)?\/([0-9a-z]{5,6})(?:\/|\z)/, 1]
|
199
219
|
end
|
200
220
|
retry_on_json_parseerror = lambda do |&b|
|
201
221
|
t = 1
|
@@ -223,17 +243,20 @@ module DirectLink
|
|
223
243
|
end
|
224
244
|
# TODO: do we handle linking Imgur albums?
|
225
245
|
data = json["data"]["children"].first["data"]
|
226
|
-
if data["media"]
|
227
|
-
return [true, data["media"]["reddit_video"]["fallback_url"]]
|
228
|
-
else
|
246
|
+
if data["media"]
|
247
|
+
return [true, data["media"]["reddit_video"]["fallback_url"]] if data["media"]["reddit_video"]
|
229
248
|
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" unless data["media"].keys.sort == %w{ oembed type } && %w{ youtube.com gfycat.com imgur.com }.include?(data["media"]["type"])
|
230
249
|
return [true, data["media"]["oembed"]["thumbnail_url"]]
|
231
|
-
end
|
232
|
-
|
233
|
-
[
|
234
|
-
|
235
|
-
|
236
|
-
|
250
|
+
end
|
251
|
+
if data["media_metadata"]
|
252
|
+
return [true, data["media_metadata"].values.map do |media|
|
253
|
+
next if media == {"status"=>"failed"} || media == {"status"=>"unprocessed"}
|
254
|
+
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated (media == #{media.inspect})" unless media["status"] == "valid"
|
255
|
+
[media["m"], *media["s"].values_at("x", "y"), CGI.unescapeHTML(media["s"][media["m"]=="image/gif" ? "gif" : "u"])]
|
256
|
+
end.compact]
|
257
|
+
end
|
258
|
+
return [true, "#{"https://www.reddit.com" if /\A\/r\/[0-9a-zA-Z_]+\/comments\/[0-9a-z]{5,6}\// =~ data["url"]}#{data["url"]}"] if data["crosspost_parent"]
|
259
|
+
return [true, CGI.unescapeHTML(data["url"])] unless data["is_self"]
|
237
260
|
raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" if data["url"] != "https://www.reddit.com" + data["permalink"]
|
238
261
|
return [false, data["selftext"]]
|
239
262
|
end
|
@@ -242,7 +265,7 @@ module DirectLink
|
|
242
265
|
id, mtd, field, f = case link
|
243
266
|
when %r{\Ahttps://vk\.com/id(?<user_id>\d+)\?z=photo(?<id>\k<user_id>_\d+)(%2F(album\k<user_id>_0|photos\k<user_id>))?\z},
|
244
267
|
%r{\Ahttps://vk\.com/[a-z_]+\?z=photo(?<_>)(?<id>(?<user_id>\d+)_\d+)%2Fphotos\k<user_id>\z},
|
245
|
-
%r{\Ahttps://vk\.com/photo(?<_>)(?<id>-?\d+_\d+)(\?all=1)?\z},
|
268
|
+
%r{\Ahttps://vk\.com/photo(?<_>)(?<id>-?\d+_\d+)(\?(?:all|rev)=1)?\z},
|
246
269
|
%r{\Ahttps://vk\.com/feed\?section=likes&z=photo(?<_>)(?<id>-(?<user_id>\d+)_\d+)%2F(liked\d+|album\k<user_id>_0)\z},
|
247
270
|
%r{\Ahttps://vk\.com/[a-z_]+\?z=photo(?<_>)(?<id>(?<user_id>-\d+)_\d+)%2F(wall\k<user_id>_\d+|album\k<user_id>_0)\z},
|
248
271
|
%r{\Ahttps://vk\.com/wall(?<user_id>-\d+)_\d+\?z=photo(?<id>\k<user_id>_\d+)%2F(wall\k<user_id>_\d+|album\k<user_id>_00%2Frev|\d+)\z}
|
@@ -251,7 +274,7 @@ module DirectLink
|
|
251
274
|
t
|
252
275
|
end ]
|
253
276
|
when %r{\Ahttps://vk\.com/wall(?<id>-?\d+_\d+)\z},
|
254
|
-
%r{\Ahttps://vk\.com/[a-z\.]+\?w=wall(?<id
|
277
|
+
%r{\Ahttps://vk\.com/[a-z\.]+\?w=wall(?<id>-?\d+_\d+)\z}
|
255
278
|
[$1, :wall, :posts, lambda do |t|
|
256
279
|
t.first.fetch("attachments").select do |item|
|
257
280
|
case item.keys
|
@@ -269,14 +292,14 @@ module DirectLink
|
|
269
292
|
raise ErrorBadLink.new link
|
270
293
|
end
|
271
294
|
raise ErrorMissingEnvVar.new "define VK_ACCESS_TOKEN and VK_CLIENT_SECRET env vars" unless ENV["VK_ACCESS_TOKEN"] && ENV["VK_CLIENT_SECRET"]
|
272
|
-
sleep 0.25 # "error_msg"=>"Too many requests per second"
|
295
|
+
sleep 0.25 unless ENV["CI"] # "error_msg"=>"Too many requests per second"
|
273
296
|
f.call( JSON.load( NetHTTPUtils.request_data "https://api.vk.com/method/#{mtd}.getById",
|
274
297
|
:POST, form: { field => id, :access_token => ENV["VK_ACCESS_TOKEN"], :client_secret => ENV["VK_CLIENT_SECRET"], :v => "5.101" }
|
275
298
|
).fetch("response") ).map do |photos|
|
276
299
|
photos.fetch("sizes").map do |size|
|
277
300
|
size.values_at("width", "height", "url").tap do |whu|
|
278
301
|
w, h, u = whu
|
279
|
-
whu[0, 2] = FastImage.new(u, raise_on_failure: true).size if [w, h].include? 0
|
302
|
+
whu[0, 2] = FastImage.new(u, raise_on_failure: true).size if [w, h].include? 0 # wtf?
|
280
303
|
end
|
281
304
|
end.max_by{ |w, h, u| w * h }
|
282
305
|
end
|
@@ -286,9 +309,8 @@ module DirectLink
|
|
286
309
|
end
|
287
310
|
|
288
311
|
|
289
|
-
|
290
|
-
|
291
|
-
def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
312
|
+
def DirectLink link, timeout = nil, proxy = nil, giveup: false, ignore_meta: false
|
313
|
+
timeout ||= DirectLink.timeout
|
292
314
|
ArgumentError.new("link should be a <String>, not <#{link.class}>") unless link.is_a? String
|
293
315
|
begin
|
294
316
|
URI link
|
@@ -326,15 +348,16 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
326
348
|
**( %w{ reddit com } == URI(link).host.split(?.).last(2) ||
|
327
349
|
%w{ redd it } == URI(link).host.split(?.) ? {Cookie: "over18=1"} : {} ),
|
328
350
|
}
|
329
|
-
head = NetHTTPUtils.request_data link, :
|
351
|
+
head = NetHTTPUtils.request_data link, :HEAD, header: header, **(proxy ? {proxy: proxy} : {}), **(timeout ? {
|
330
352
|
timeout: timeout,
|
331
353
|
max_start_http_retry_delay: timeout,
|
332
|
-
max_read_retry_delay: timeout
|
354
|
+
max_read_retry_delay: timeout,
|
333
355
|
} : {})
|
334
|
-
rescue Net::ReadTimeout
|
356
|
+
rescue Net::ReadTimeout, Errno::ETIMEDOUT
|
335
357
|
rescue NetHTTPUtils::Error => e
|
336
358
|
raise unless 418 == e.code
|
337
359
|
else
|
360
|
+
raise DirectLink::ErrorAssert.new "last_response.uri is not set" unless head.instance_variable_get(:@last_response).uri
|
338
361
|
link = head.instance_variable_get(:@last_response).uri.to_s
|
339
362
|
end
|
340
363
|
|
@@ -342,10 +365,10 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
342
365
|
# because they can be hidden behind URL shorteners
|
343
366
|
# also it can resolve NetHTTPUtils::Error(404) before trying the adapter
|
344
367
|
|
345
|
-
t = google_without_schema_crutch[] and return t
|
368
|
+
t = google_without_schema_crutch[] and return t # TODO: why again?
|
346
369
|
|
347
370
|
begin
|
348
|
-
imgur = DirectLink.imgur(link).sort_by{ |u, w, h, t| - w * h }.map do |u, w, h, t|
|
371
|
+
imgur = DirectLink.imgur(link, timeout).sort_by{ |u, w, h, t| - w * h }.map do |u, w, h, t|
|
349
372
|
struct.new u, w, h, t
|
350
373
|
end
|
351
374
|
# `DirectLink.imgur` return value is always an Array
|
@@ -383,7 +406,7 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
383
406
|
f = ->_{ _.type == :a ? _.attr["href"] : _.children.flat_map(&f) }
|
384
407
|
require "kramdown"
|
385
408
|
return f[Kramdown::Document.new(u).root].flat_map do |sublink|
|
386
|
-
DirectLink URI.join(link, sublink).to_s, timeout, giveup: giveup
|
409
|
+
DirectLink URI.join(link, sublink).to_s, timeout, giveup: giveup # TODO: maybe subtract from timeout the time we've already wasted
|
387
410
|
end
|
388
411
|
end
|
389
412
|
if u.is_a? Hash
|
@@ -393,8 +416,8 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
393
416
|
struct.new u, x, y, t
|
394
417
|
end
|
395
418
|
end
|
396
|
-
|
397
|
-
|
419
|
+
raise DirectLink::ErrorNotFound.new link.inspect if link == u
|
420
|
+
return DirectLink u, timeout, giveup: giveup
|
398
421
|
rescue DirectLink::ErrorMissingEnvVar
|
399
422
|
end if %w{ reddit com } == URI(link).host.split(?.).last(2) ||
|
400
423
|
%w{ redd it } == URI(link).host.split(?.)
|
@@ -407,23 +430,27 @@ def DirectLink link, timeout = nil, giveup: false, ignore_meta: false
|
|
407
430
|
end if %w{ vk com } == URI(link).host.split(?.)
|
408
431
|
|
409
432
|
begin
|
410
|
-
f = FastImage.new
|
433
|
+
f = FastImage.new link,
|
434
|
+
raise_on_failure: true,
|
435
|
+
timeout: timeout,
|
436
|
+
**(proxy ? {proxy: "http://#{proxy}"} : {}),
|
437
|
+
http_header: {"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"}
|
411
438
|
rescue FastImage::UnknownImageType
|
412
439
|
raise if giveup
|
413
440
|
require "nokogiri"
|
414
|
-
head = NetHTTPUtils.request_data link, :
|
441
|
+
head = NetHTTPUtils.request_data link, :HEAD, header: {"User-Agent" => "Mozilla"},
|
415
442
|
max_start_http_retry_delay: timeout,
|
416
443
|
timeout: timeout, # NetHTTPUtild passes this as read_timeout to Net::HTTP.start
|
417
444
|
max_read_retry_delay: timeout # and then compares accumulated delay to this
|
418
445
|
# if we use :get here we will download megabytes of files just to giveup on content_type we can't process
|
419
|
-
case head.instance_variable_get(:@last_response).content_type
|
446
|
+
case head.instance_variable_get(:@last_response).content_type # webmock should provide this
|
420
447
|
when "text/html" ; nil
|
421
448
|
else ; raise
|
422
449
|
end
|
423
|
-
html = Nokogiri::HTML NetHTTPUtils.request_data link, header: {"User-Agent" => "Mozilla"}
|
450
|
+
html = Nokogiri::HTML NetHTTPUtils.request_data link, :GET, header: {"User-Agent" => "Mozilla"}
|
424
451
|
if t = html.at_css("meta[@property='og:image']")
|
425
452
|
begin
|
426
|
-
return DirectLink URI.join(link, t[:content]).to_s, nil, giveup: true
|
453
|
+
return DirectLink URI.join(link, t[:content]).to_s, nil, *proxy, giveup: true
|
427
454
|
rescue URI::InvalidURIError
|
428
455
|
end
|
429
456
|
end unless ignore_meta
|