down 2.5.1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,216 +1,4 @@
1
- require "down/version"
2
- require "down/chunked_io"
3
-
4
- require "open-uri"
5
- require "net/http"
6
- require "tempfile"
7
- require "fileutils"
8
- require "cgi"
9
-
10
- module Down
11
- class Error < StandardError; end
12
- class TooLarge < Error; end
13
- class NotFound < Error; end
14
-
15
- module_function
16
-
17
- def download(uri, options = {})
18
- warn "Passing :timeout option to `Down.download` is deprecated and will be removed in Down 3. You should use open-uri's :open_timeout and/or :read_timeout." if options.key?(:timeout)
19
- warn "Passing :progress option to `Down.download` is deprecated and will be removed in Down 3. You should use open-uri's :progress_proc." if options.key?(:progress)
20
-
21
- max_size = options.delete(:max_size)
22
- max_redirects = options.delete(:max_redirects) || 2
23
- progress_proc = options.delete(:progress_proc) || options.delete(:progress)
24
- content_length_proc = options.delete(:content_length_proc)
25
- timeout = options.delete(:timeout)
26
-
27
- if options[:proxy]
28
- proxy = URI(options[:proxy])
29
- user = proxy.user
30
- password = proxy.password
31
-
32
- if user || password
33
- proxy.user = nil
34
- proxy.password = nil
35
-
36
- options[:proxy_http_basic_authentication] = [proxy.to_s, user, password]
37
- options.delete(:proxy)
38
- end
39
- end
40
-
41
- tries = max_redirects + 1
42
-
43
- begin
44
- uri = URI(uri)
45
-
46
- open_uri_options = {
47
- "User-Agent" => "Down/#{VERSION}",
48
- content_length_proc: proc { |size|
49
- if size && max_size && size > max_size
50
- raise Down::TooLarge, "file is too large (max is #{max_size/1024/1024}MB)"
51
- end
52
- content_length_proc.call(size) if content_length_proc
53
- },
54
- progress_proc: proc { |current_size|
55
- if max_size && current_size > max_size
56
- raise Down::TooLarge, "file is too large (max is #{max_size/1024/1024}MB)"
57
- end
58
- progress_proc.call(current_size) if progress_proc
59
- },
60
- read_timeout: timeout,
61
- redirect: false,
62
- }
63
-
64
- if uri.user || uri.password
65
- open_uri_options[:http_basic_authentication] = [uri.user, uri.password]
66
- uri.user = nil
67
- uri.password = nil
68
- end
69
-
70
- open_uri_options.update(options)
71
-
72
- downloaded_file = uri.open(open_uri_options)
73
- rescue OpenURI::HTTPRedirect => redirect
74
- uri = redirect.uri
75
- retry if (tries -= 1) > 0
76
- raise Down::NotFound, "too many redirects"
77
- rescue => error
78
- raise if error.is_a?(Down::Error)
79
- raise Down::NotFound, "file not found"
80
- end
81
-
82
- # open-uri will return a StringIO instead of a Tempfile if the filesize is
83
- # less than 10 KB, so if it happens we convert it back to Tempfile. We want
84
- # to do this with a Tempfile as well, because open-uri doesn't preserve the
85
- # file extension, so we want to run it against #copy_to_tempfile which
86
- # does.
87
- open_uri_file = downloaded_file
88
- downloaded_file = copy_to_tempfile(uri.path, open_uri_file)
89
- OpenURI::Meta.init downloaded_file, open_uri_file
90
-
91
- downloaded_file.extend DownloadedFile
92
- downloaded_file
93
- end
94
-
95
- def stream(url, options = {})
96
- warn "Down.stream is deprecated and will be removed in Down 3. Use Down.open instead."
97
- io = open(url, options)
98
- io.each_chunk { |chunk| yield chunk, io.size }
99
- io.close
100
- end
101
-
102
- def open(uri, options = {})
103
- uri = URI(uri)
104
- http_class = Net::HTTP
1
+ # frozen-string-literal: true
105
2
 
106
- if options[:proxy]
107
- proxy = URI.parse(options[:proxy])
108
- http_class = Net::HTTP::Proxy(proxy.hostname, proxy.port, proxy.user, proxy.password)
109
- end
110
-
111
- http = http_class.new(uri.host, uri.port)
112
-
113
- # taken from open-uri implementation
114
- if uri.is_a?(URI::HTTPS)
115
- require "net/https"
116
- http.use_ssl = true
117
- http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER
118
- store = OpenSSL::X509::Store.new
119
- if options[:ssl_ca_cert]
120
- Array(options[:ssl_ca_cert]).each do |cert|
121
- File.directory?(cert) ? store.add_path(cert) : store.add_file(cert)
122
- end
123
- else
124
- store.set_default_paths
125
- end
126
- http.cert_store = store
127
- end
128
-
129
- request_headers = options.select { |key, value| key.is_a?(String) }
130
- get = Net::HTTP::Get.new(uri.request_uri, request_headers)
131
- get.basic_auth(uri.user, uri.password) if uri.user || uri.password
132
-
133
- request = Fiber.new do
134
- http.start do
135
- http.request(get) do |response|
136
- Fiber.yield response
137
- response.instance_variable_set("@read", true)
138
- end
139
- end
140
- end
141
-
142
- response = request.resume
143
-
144
- raise Down::NotFound, "request to #{uri.to_s} returned status #{response.code} and body:\n#{response.body}" if response.code.to_i.between?(400, 599)
145
-
146
- if response.chunked?
147
- # Net::HTTP's implementation of reading "Transfer-Encoding: chunked"
148
- # raises a Fiber error, so we work around it by downloading the whole
149
- # response body without Enumerators (which internally use Fibers).
150
- warn "Response from #{uri.to_s} returned as \"Transfer-Encoding: chunked\", which Down cannot partially download, so the whole response body will be downloaded instead."
151
-
152
- tempfile = Tempfile.new("down", binmode: true)
153
- response.read_body { |chunk| tempfile << chunk }
154
- tempfile.rewind
155
-
156
- request.resume # close HTTP connection
157
-
158
- chunked_io = ChunkedIO.new(
159
- chunks: Enumerator.new { |y| y << tempfile.read(16*1024) until tempfile.eof? },
160
- size: tempfile.size,
161
- on_close: -> { tempfile.close! },
162
- )
163
- else
164
- chunked_io = ChunkedIO.new(
165
- chunks: response.enum_for(:read_body),
166
- size: response["Content-Length"] && response["Content-Length"].to_i,
167
- on_close: -> { request.resume }, # close HTTP connnection
168
- )
169
- end
170
-
171
- chunked_io.data[:status] = response.code.to_i
172
- chunked_io.data[:headers] = {}
173
-
174
- response.each_header do |downcased_name, value|
175
- name = downcased_name.split("-").map(&:capitalize).join("-")
176
- chunked_io.data[:headers].merge!(name => value)
177
- end
178
-
179
- chunked_io
180
- end
181
-
182
- def copy_to_tempfile(basename, io)
183
- tempfile = Tempfile.new(["down", File.extname(basename)], binmode: true)
184
- if io.is_a?(OpenURI::Meta) && io.is_a?(Tempfile)
185
- io.close
186
- tempfile.close
187
- FileUtils.mv io.path, tempfile.path
188
- else
189
- IO.copy_stream(io, tempfile)
190
- io.rewind
191
- end
192
- tempfile.open
193
- tempfile
194
- end
195
-
196
- module DownloadedFile
197
- def original_filename
198
- filename_from_content_disposition || filename_from_uri
199
- end
200
-
201
- private
202
-
203
- def filename_from_content_disposition
204
- content_disposition = meta["content-disposition"].to_s
205
- filename = content_disposition[/filename="([^"]*)"/, 1] || content_disposition[/filename=(.+)/, 1]
206
- filename = CGI.unescape(filename.to_s.strip)
207
- filename unless filename.empty?
208
- end
209
-
210
- def filename_from_uri
211
- path = base_uri.path
212
- filename = path.split("/").last
213
- CGI.unescape(filename) if filename
214
- end
215
- end
216
- end
3
+ require "down/version"
4
+ require "down/net_http" unless Down.respond_to?(:download)
@@ -1,82 +1,117 @@
1
+ # frozen-string-literal: true
2
+
1
3
  require "tempfile"
4
+ require "fiber"
2
5
 
3
6
  module Down
4
7
  class ChunkedIO
5
- attr_reader :tempfile, :data
8
+ attr_accessor :size, :data, :encoding
9
+
10
+ def initialize(chunks:, size: nil, on_close: ->{}, data: {}, rewindable: true, encoding: Encoding::BINARY)
11
+ @chunks = chunks
12
+ @size = size
13
+ @on_close = on_close
14
+ @data = data
15
+ @encoding = find_encoding(encoding)
6
16
 
7
- def initialize(options)
8
- @size = options.fetch(:size)
9
- @chunks = options.fetch(:chunks)
10
- @on_close = options.fetch(:on_close, ->{})
11
- @data = options.fetch(:data, {})
12
- @tempfile = Tempfile.new("down", binmode: true)
17
+ @buffer = String.new("").force_encoding(@encoding)
18
+ @tempfile = Tempfile.new("down-chunked_io", binmode: true) if rewindable
13
19
 
14
- peek_chunk
20
+ retrieve_chunk
15
21
  end
16
22
 
17
- def size
18
- @size
23
+ def each_chunk
24
+ raise IOError, "closed stream" if @closed
25
+
26
+ return enum_for(__method__) if !block_given?
27
+ yield retrieve_chunk until chunks_depleted?
19
28
  end
20
29
 
21
30
  def read(length = nil, outbuf = nil)
22
- download_chunk until enough_downloaded?(length) || download_finished?
23
- @tempfile.read(length, outbuf)
24
- end
31
+ raise IOError, "closed stream" if @closed
25
32
 
26
- def each_chunk
27
- return enum_for(__method__) if !block_given?
28
- yield retrieve_chunk until download_finished?
33
+ outbuf = outbuf.to_s.replace("").force_encoding(@encoding)
34
+
35
+ if @tempfile && !@tempfile.eof?
36
+ @tempfile.read(length, outbuf)
37
+ outbuf.force_encoding(@encoding)
38
+ end
39
+
40
+ until outbuf.bytesize == length || chunks_depleted?
41
+ @buffer << retrieve_chunk if @buffer.empty?
42
+
43
+ buffered_data = if length && length - outbuf.bytesize < @buffer.bytesize
44
+ @buffer.byteslice(0, length - outbuf.bytesize)
45
+ else
46
+ @buffer
47
+ end
48
+
49
+ @tempfile.write(buffered_data) if @tempfile
50
+
51
+ outbuf << buffered_data
52
+
53
+ if buffered_data.bytesize < @buffer.bytesize
54
+ @buffer.replace @buffer.byteslice(buffered_data.bytesize..-1)
55
+ else
56
+ @buffer.clear
57
+ end
58
+ end
59
+
60
+ outbuf unless length && outbuf.empty?
29
61
  end
30
62
 
31
63
  def eof?
32
- @tempfile.eof? && download_finished?
64
+ raise IOError, "closed stream" if @closed
65
+
66
+ return false if @tempfile && !@tempfile.eof?
67
+ @buffer.empty? && chunks_depleted?
33
68
  end
34
69
 
35
70
  def rewind
71
+ raise IOError, "closed stream" if @closed
72
+ raise IOError, "this Down::ChunkedIO is not rewindable" if !@tempfile
73
+
36
74
  @tempfile.rewind
37
75
  end
38
76
 
39
77
  def close
40
- terminate_download
41
- @tempfile.close!
78
+ return if @closed
79
+
80
+ chunks_fiber.resume(:terminate) if chunks_fiber.alive?
81
+ @buffer.clear
82
+ @tempfile.close! if @tempfile
83
+ @closed = true
42
84
  end
43
85
 
44
86
  private
45
87
 
46
- def download_chunk
47
- write(retrieve_chunk)
48
- end
49
-
50
88
  def retrieve_chunk
51
- chunk = @chunks.next
52
- peek_chunk
53
- chunk
54
- end
55
-
56
- def peek_chunk
57
- @chunks.peek
58
- rescue StopIteration
59
- terminate_download
60
- end
61
-
62
- def enough_downloaded?(length)
63
- length && (@tempfile.pos + length <= @tempfile.size)
89
+ chunk = @next_chunk
90
+ @next_chunk = chunks_fiber.resume
91
+ chunk.force_encoding(@encoding) if chunk
64
92
  end
65
93
 
66
- def download_finished?
67
- !@on_close
94
+ def chunks_depleted?
95
+ !chunks_fiber.alive?
68
96
  end
69
97
 
70
- def terminate_download
71
- @on_close.call if @on_close
72
- @on_close = nil
98
+ def chunks_fiber
99
+ @chunks_fiber ||= Fiber.new do
100
+ begin
101
+ @chunks.each do |chunk|
102
+ action = Fiber.yield chunk
103
+ break if action == :terminate
104
+ end
105
+ ensure
106
+ @on_close.call
107
+ end
108
+ end
73
109
  end
74
110
 
75
- def write(chunk)
76
- current_pos = @tempfile.pos
77
- @tempfile.pos = @tempfile.size
78
- @tempfile.write(chunk)
79
- @tempfile.pos = current_pos
111
+ def find_encoding(encoding)
112
+ Encoding.find(encoding)
113
+ rescue ArgumentError
114
+ Encoding::BINARY
80
115
  end
81
116
  end
82
117
  end
@@ -0,0 +1,16 @@
1
+ module Down
2
+ class Error < StandardError
3
+ end
4
+
5
+ class TooLarge < Error
6
+ end
7
+
8
+ class NotFound < Error
9
+ attr_reader :response
10
+
11
+ def initialize(message, response: nil)
12
+ super(message)
13
+ @response = response
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,150 @@
1
+ # frozen-string-literal: true
2
+
3
+ require "http"
4
+
5
+ require "down/version"
6
+ require "down/chunked_io"
7
+ require "down/errors"
8
+
9
+ require "tempfile"
10
+ require "cgi"
11
+ require "base64"
12
+
13
+ if Gem::Version.new(HTTP::VERSION) < Gem::Version.new("2.1.0")
14
+ fail "Down requires HTTP.rb version 2.1.0 or higher"
15
+ end
16
+
17
+ module Down
18
+ module_function
19
+
20
+ def download(url, **options, &block)
21
+ Http.download(url, **options, &block)
22
+ end
23
+
24
+ def open(url, **options, &block)
25
+ Http.open(url, **options, &block)
26
+ end
27
+
28
+ module Http
29
+ module_function
30
+
31
+ def download(url, **options, &block)
32
+ max_size = options.delete(:max_size)
33
+
34
+ io = open(url, **options, rewindable: false, &block)
35
+
36
+ if max_size && io.size && io.size > max_size
37
+ raise Down::TooLarge, "file is too large (max is #{max_size/1024/1024}MB)"
38
+ end
39
+
40
+ extname = File.extname(io.data[:response].uri.path)
41
+ tempfile = Tempfile.new(["down", extname], binmode: true)
42
+
43
+ io.each_chunk do |chunk|
44
+ tempfile.write(chunk)
45
+
46
+ if max_size && tempfile.size > max_size
47
+ raise Down::TooLarge, "file is too large (max is #{max_size/1024/1024}MB)"
48
+ end
49
+ end
50
+
51
+ tempfile.open # flush written content
52
+
53
+ tempfile.extend DownloadedFile
54
+ tempfile.url = io.data[:response].uri.to_s
55
+ tempfile.headers = io.data[:headers]
56
+
57
+ tempfile
58
+ rescue
59
+ tempfile.close! if tempfile
60
+ raise
61
+ ensure
62
+ io.close if io
63
+ end
64
+
65
+ def open(url, **options, &block)
66
+ rewindable = options.delete(:rewindable)
67
+
68
+ response = get(url, **options, &block)
69
+
70
+ if response.code.between?(400, 599)
71
+ raise Down::NotFound.new("file not found", response: response)
72
+ end
73
+
74
+ down_options = {
75
+ chunks: response.body.enum_for(:each),
76
+ size: response.content_length,
77
+ data: { status: response.status, headers: response.headers.to_h, response: response },
78
+ }
79
+ down_options[:encoding] = response.content_type.charset if response.content_type.charset
80
+ down_options[:on_close] = -> { response.connection.close } unless client.persistent?
81
+ down_options[:rewindable] = rewindable if rewindable != nil
82
+
83
+ Down::ChunkedIO.new(down_options)
84
+ rescue HTTP::ConnectionError,
85
+ HTTP::Request::UnsupportedSchemeError,
86
+ HTTP::TimeoutError
87
+ raise Down::NotFound, "file not found"
88
+ rescue HTTP::Redirector::TooManyRedirectsError
89
+ raise Down::NotFound, "too many redirects"
90
+ end
91
+
92
+ def get(url, **options, &block)
93
+ uri = HTTP::URI.parse(url)
94
+
95
+ if uri.user || uri.password
96
+ user, pass = uri.user, uri.password
97
+ authorization = "Basic #{Base64.strict_encode64("#{user}:#{pass}")}"
98
+ (options[:headers] ||= {}).merge!("Authorization" => authorization)
99
+ uri.user = uri.password = nil
100
+ end
101
+
102
+ client = self.client
103
+ client = block.call(client) if block
104
+ client.get(url, options)
105
+ end
106
+
107
+ def client
108
+ Thread.current[:down_client] ||= ::HTTP.headers("User-Agent" => "Down/#{VERSION}").follow(max_hops: 2)
109
+ end
110
+
111
+ def client=(value)
112
+ Thread.current[:down_client] = value
113
+ end
114
+
115
+ module DownloadedFile
116
+ attr_accessor :url, :headers
117
+
118
+ def original_filename
119
+ filename_from_content_disposition || filename_from_url
120
+ end
121
+
122
+ def content_type
123
+ content_type_header.mime_type
124
+ end
125
+
126
+ def charset
127
+ content_type_header.charset
128
+ end
129
+
130
+ private
131
+
132
+ def content_type_header
133
+ ::HTTP::ContentType.parse(headers["Content-Type"])
134
+ end
135
+
136
+ def filename_from_content_disposition
137
+ content_disposition = headers["Content-Disposition"].to_s
138
+ filename = content_disposition[/filename="([^"]*)"/, 1] || content_disposition[/filename=(.+)/, 1]
139
+ filename = CGI.unescape(filename.to_s.strip)
140
+ filename unless filename.empty?
141
+ end
142
+
143
+ def filename_from_url
144
+ path = URI(url).path
145
+ filename = path.split("/").last
146
+ CGI.unescape(filename) if filename
147
+ end
148
+ end
149
+ end
150
+ end