dratools 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +120 -0
  4. data/bin/dratools +8 -0
  5. data/docs/design.md +80 -0
  6. data/docs/development.md +39 -0
  7. data/docs/environment.md +71 -0
  8. data/docs/usage.md +289 -0
  9. data/lib/dratools/accession_input_collector.rb +53 -0
  10. data/lib/dratools/accession_resolver.rb +104 -0
  11. data/lib/dratools/accession_resource_type_classifier.rb +34 -0
  12. data/lib/dratools/byte_formatter.rb +25 -0
  13. data/lib/dratools/checksum_verifier.rb +34 -0
  14. data/lib/dratools/command_line_interface.rb +138 -0
  15. data/lib/dratools/commands/base_command.rb +189 -0
  16. data/lib/dratools/commands/get_command.rb +87 -0
  17. data/lib/dratools/commands/meta_command.rb +123 -0
  18. data/lib/dratools/commands/probe_command.rb +55 -0
  19. data/lib/dratools/commands/runs_command.rb +70 -0
  20. data/lib/dratools/commands/size_command.rb +163 -0
  21. data/lib/dratools/commands/tree_command.rb +45 -0
  22. data/lib/dratools/commands/url_command.rb +118 -0
  23. data/lib/dratools/config.rb +114 -0
  24. data/lib/dratools/ddbj_record_fields.rb +56 -0
  25. data/lib/dratools/ddbj_resource_client.rb +78 -0
  26. data/lib/dratools/download_candidate.rb +45 -0
  27. data/lib/dratools/download_candidate_builder.rb +90 -0
  28. data/lib/dratools/download_service.rb +221 -0
  29. data/lib/dratools/errors.rb +39 -0
  30. data/lib/dratools/external_command_runner.rb +115 -0
  31. data/lib/dratools/run_record_collector.rb +198 -0
  32. data/lib/dratools/traversal_node.rb +68 -0
  33. data/lib/dratools/tree_renderer.rb +83 -0
  34. data/lib/dratools/version.rb +6 -0
  35. data/lib/dratools.rb +19 -0
  36. metadata +76 -0
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dratools
4
+ # DDBJ Search resource JSON で使う resource type とキー名をまとめる。
5
+ module DdbjRecordFields
6
+ SRA_RUN_RESOURCE_TYPE = 'sra-run'
7
+ SRA_EXPERIMENT_RESOURCE_TYPE = 'sra-experiment'
8
+ SRA_SAMPLE_RESOURCE_TYPE = 'sra-sample'
9
+ SRA_STUDY_RESOURCE_TYPE = 'sra-study'
10
+ SRA_SUBMISSION_RESOURCE_TYPE = 'sra-submission'
11
+ BIOPROJECT_RESOURCE_TYPE = 'bioproject'
12
+ BIOSAMPLE_RESOURCE_TYPE = 'biosample'
13
+
14
+ FILE_TYPE_SRA = 'sra'
15
+ FILE_TYPE_FASTQ = 'fastq'
16
+ FILE_TYPE_ALL = 'all'
17
+
18
+ DB_XREFS_KEY = 'dbXrefs'
19
+ CHILD_BIOPROJECTS_KEY = 'childBioProjects'
20
+ TYPE_KEY = 'type'
21
+ URL_KEY = 'url'
22
+ FTP_URL_KEY = 'ftpUrl'
23
+ ID_KEY = 'id'
24
+ IDENTIFIER_KEY = 'identifier'
25
+ ACCESSION_KEY = 'accession'
26
+ PRIMARY_ID_KEY = 'primaryId'
27
+ DOWNLOAD_URL_KEY = 'downloadUrl'
28
+ DISTRIBUTION_KEY = 'distribution'
29
+ CONTENT_URL_KEY = 'contentUrl'
30
+ CONTENT_SIZE_KEY = 'contentSize'
31
+ SIZE_KEY = 'size'
32
+ FILE_SIZE_KEY = 'fileSize'
33
+ MD5_KEY = 'md5'
34
+ MD5_SUM_KEY = 'md5sum'
35
+ ENCODING_FORMAT_KEY = 'encodingFormat'
36
+
37
+ INFO_FIELD_KEYS = [
38
+ IDENTIFIER_KEY,
39
+ TYPE_KEY,
40
+ 'title',
41
+ 'description',
42
+ 'organism',
43
+ 'platform',
44
+ 'instrumentModel',
45
+ 'libraryStrategy',
46
+ 'librarySource',
47
+ 'librarySelection',
48
+ 'libraryLayout',
49
+ 'libraryName',
50
+ 'dateCreated',
51
+ 'dateModified',
52
+ 'datePublished',
53
+ 'status'
54
+ ].freeze
55
+ end
56
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'net/http'
5
+ require 'timeout'
6
+ require 'uri'
7
+
8
+ require_relative 'errors'
9
+ require_relative 'version'
10
+
11
+ module Dratools
12
+ # DDBJ resource API を呼び出す薄い HTTP クライアント。
13
+ class DdbjResourceClient
14
+ DDBJ_RESOURCE_BASE_URL = 'https://ddbj.nig.ac.jp/resource'
15
+ RESOURCE_RECORD_EXTENSION = '.json'
16
+ HTTPS_SCHEME = 'https'
17
+ HTTP_LOCATION_HEADER = 'location'
18
+ USER_AGENT_HEADER = 'User-Agent'
19
+ DEFAULT_REDIRECT_LIMIT = 5
20
+ DEFAULT_OPEN_TIMEOUT_SECONDS = 10
21
+ DEFAULT_READ_TIMEOUT_SECONDS = 30
22
+
23
+ def initialize(base_url: DDBJ_RESOURCE_BASE_URL, open_timeout: DEFAULT_OPEN_TIMEOUT_SECONDS,
24
+ read_timeout: DEFAULT_READ_TIMEOUT_SECONDS)
25
+ @base_url = base_url.delete_suffix('/')
26
+ @open_timeout = open_timeout
27
+ @read_timeout = read_timeout
28
+ end
29
+
30
+ def fetch_resource_record(type, accession)
31
+ fetch_json("#{@base_url}/#{type}/#{accession}#{RESOURCE_RECORD_EXTENSION}")
32
+ end
33
+
34
+ private
35
+
36
+ def fetch_json(request_url, redirects_remaining = DEFAULT_REDIRECT_LIMIT)
37
+ request_uri = URI(request_url)
38
+ response = get_http_response(request_uri)
39
+
40
+ case response
41
+ when Net::HTTPSuccess
42
+ JSON.parse(response.body)
43
+ when Net::HTTPRedirection
44
+ raise NetworkError, "too many redirects: #{request_url}" if redirects_remaining <= 0
45
+
46
+ location = response[HTTP_LOCATION_HEADER]
47
+ raise NetworkError, "redirect without location: #{request_url}" if location.to_s.empty?
48
+
49
+ fetch_json(URI.join(request_uri, location).to_s, redirects_remaining - 1)
50
+ when Net::HTTPNotFound
51
+ raise NotFoundError, "not found: #{request_url}"
52
+ else
53
+ raise NetworkError, "HTTP #{response.code}: #{request_url}"
54
+ end
55
+ rescue JSON::ParserError => error
56
+ raise NetworkError, "invalid JSON from #{request_url}: #{error.message}", cause: error
57
+ rescue Timeout::Error, IOError, SocketError, SystemCallError => error
58
+ message = "failed to fetch #{request_url}: #{error.class}: #{error.message}"
59
+ raise NetworkError, message, cause: error
60
+ end
61
+
62
+ def get_http_response(request_uri)
63
+ Net::HTTP.start(
64
+ request_uri.host,
65
+ request_uri.port,
66
+ use_ssl: request_uri.scheme == HTTPS_SCHEME,
67
+ open_timeout: @open_timeout,
68
+ read_timeout: @read_timeout
69
+ ) do |http|
70
+ http.get(request_uri.request_uri, USER_AGENT_HEADER => user_agent)
71
+ end
72
+ end
73
+
74
+ def user_agent
75
+ "#{NAME}/#{VERSION}"
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uri'
4
+
5
+ require_relative 'errors'
6
+
7
+ module Dratools
8
+ # 1 件のダウンロード候補を表す値オブジェクト。
9
+ class DownloadCandidate
10
+ FTP_PROTOCOL = 'ftp'
11
+ HTTPS_PROTOCOL = 'https'
12
+ HTTP_PROTOCOL = 'http'
13
+ HTTP_BASED_PROTOCOLS = [HTTPS_PROTOCOL, HTTP_PROTOCOL].freeze
14
+
15
+ attr_reader :run_accession, :type, :url, :ftp_url, :size, :md5
16
+
17
+ def initialize(type:, run_accession: nil, url: nil, ftp_url: nil, size: nil, md5: nil)
18
+ @run_accession = run_accession
19
+ @type = type
20
+ @url = url
21
+ @ftp_url = ftp_url
22
+ @size = size
23
+ @md5 = md5
24
+ end
25
+
26
+ def url_for_protocol(protocol)
27
+ case protocol.to_s
28
+ when FTP_PROTOCOL
29
+ ftp_url || url
30
+ when *HTTP_BASED_PROTOCOLS
31
+ url || ftp_url
32
+ else
33
+ raise InvalidProtocolError, "unknown protocol: #{protocol}"
34
+ end
35
+ end
36
+
37
+ def filename_for_protocol(protocol = HTTPS_PROTOCOL)
38
+ File.basename(URI(url_for_protocol(protocol)).path)
39
+ end
40
+
41
+ def directory_url_for_protocol?(protocol = HTTPS_PROTOCOL)
42
+ URI(url_for_protocol(protocol)).path.end_with?('/')
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'ddbj_record_fields'
4
+ require_relative 'download_candidate'
5
+
6
+ module Dratools
7
+ # DDBJ run レコードの downloadUrl/distribution から DownloadCandidate を作る。
8
+ class DownloadCandidateBuilder
9
+ def build_from_run_record(run_record)
10
+ run_accession = run_accession_from(run_record)
11
+ downloads = download_items_from(run_record).filter_map do |download_item|
12
+ build_from_download_item(run_accession, download_item)
13
+ end
14
+ downloads.uniq { |download| download_key(download) }
15
+ end
16
+
17
+ private
18
+
19
+ def run_accession_from(run_record)
20
+ run_record[DdbjRecordFields::ACCESSION_KEY] ||
21
+ run_record[DdbjRecordFields::IDENTIFIER_KEY] ||
22
+ run_record[DdbjRecordFields::ID_KEY] ||
23
+ run_record[DdbjRecordFields::PRIMARY_ID_KEY]
24
+ end
25
+
26
+ def download_items_from(ddbj_record)
27
+ ddbj_record.fetch(DdbjRecordFields::DOWNLOAD_URL_KEY, []) +
28
+ ddbj_record.fetch(DdbjRecordFields::DISTRIBUTION_KEY, [])
29
+ end
30
+
31
+ def build_from_download_item(run_accession, download_item)
32
+ return unless download_item.is_a?(Hash)
33
+
34
+ if download_item[DdbjRecordFields::CONTENT_URL_KEY]
35
+ build_from_distribution_item(run_accession, download_item)
36
+ else
37
+ build_from_download_url_item(run_accession, download_item)
38
+ end
39
+ end
40
+
41
+ def build_from_distribution_item(run_accession, download_item)
42
+ file_type = file_type_from_distribution(download_item)
43
+ return unless file_type
44
+
45
+ DownloadCandidate.new(
46
+ run_accession: run_accession,
47
+ type: file_type,
48
+ url: download_item[DdbjRecordFields::CONTENT_URL_KEY],
49
+ ftp_url: nil,
50
+ size: download_item[DdbjRecordFields::CONTENT_SIZE_KEY],
51
+ md5: download_item[DdbjRecordFields::MD5_KEY] || download_item[DdbjRecordFields::MD5_SUM_KEY]
52
+ )
53
+ end
54
+
55
+ def build_from_download_url_item(run_accession, download_item)
56
+ file_type = file_type_from_download_url(download_item)
57
+ return unless file_type
58
+
59
+ DownloadCandidate.new(
60
+ run_accession: run_accession,
61
+ type: file_type,
62
+ url: download_item[DdbjRecordFields::URL_KEY],
63
+ ftp_url: download_item[DdbjRecordFields::FTP_URL_KEY],
64
+ size: download_item[DdbjRecordFields::SIZE_KEY] || download_item[DdbjRecordFields::FILE_SIZE_KEY],
65
+ md5: download_item[DdbjRecordFields::MD5_KEY] || download_item[DdbjRecordFields::MD5_SUM_KEY]
66
+ )
67
+ end
68
+
69
+ def file_type_from_distribution(download_item)
70
+ file_type_from(download_item[DdbjRecordFields::ENCODING_FORMAT_KEY])
71
+ end
72
+
73
+ def file_type_from_download_url(download_item)
74
+ file_type_from(download_item[DdbjRecordFields::TYPE_KEY])
75
+ end
76
+
77
+ def file_type_from(value)
78
+ case value.to_s.downcase
79
+ when DdbjRecordFields::FILE_TYPE_SRA
80
+ DdbjRecordFields::FILE_TYPE_SRA
81
+ when DdbjRecordFields::FILE_TYPE_FASTQ
82
+ DdbjRecordFields::FILE_TYPE_FASTQ
83
+ end
84
+ end
85
+
86
+ def download_key(download)
87
+ [download.type, download.url, download.ftp_url]
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,221 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'cgi/escape'
4
+ require 'fileutils'
5
+ require 'net/http'
6
+ require 'timeout'
7
+ require 'uri'
8
+
9
+ require_relative 'download_candidate'
10
+ require_relative 'external_command_runner'
11
+ require_relative 'checksum_verifier'
12
+ require_relative 'errors'
13
+ require_relative 'version'
14
+
15
+ module Dratools
16
+ # 解決済みダウンロードの取得、probe、md5 検証、既存ファイル判定をまとめて扱う。
17
+ class DownloadService
18
+ DownloadResult = Struct.new(:path, :skipped, keyword_init: true) do
19
+ def skipped?
20
+ skipped
21
+ end
22
+
23
+ def to_s
24
+ path
25
+ end
26
+ end
27
+
28
+ DEFAULT_OUTPUT_DIRECTORY = '.'
29
+ DEFAULT_PROTOCOL = DownloadCandidate::HTTPS_PROTOCOL
30
+ DEFAULT_PROBE_TIMEOUT_SECONDS = 5
31
+ DEFAULT_SIZE_TIMEOUT_SECONDS = 10
32
+ DEFAULT_REDIRECT_LIMIT = 5
33
+ HTTP_LOCATION_HEADER = 'location'
34
+ HTTPS_SCHEME = 'https'
35
+ USER_AGENT_HEADER = 'User-Agent'
36
+ FASTQ_HREF_PATTERN = /href=(?<quote>["'])(?<href>[^"']*\.fastq[^"']*)\k<quote>/i
37
+
38
+ def initialize(runner: ExternalCommandRunner.new, checksum_verifier: ChecksumVerifier.new)
39
+ @runner = runner
40
+ @checksum_verifier = checksum_verifier
41
+ end
42
+
43
+ def probe_download(download, protocol: DEFAULT_PROTOCOL, timeout: DEFAULT_PROBE_TIMEOUT_SECONDS)
44
+ @runner.probe_url(download.url_for_protocol(protocol), timeout: timeout)
45
+ end
46
+
47
+ def content_lengths(download, protocol: DEFAULT_PROTOCOL, timeout: DEFAULT_SIZE_TIMEOUT_SECONDS)
48
+ download_url = download.url_for_protocol(protocol)
49
+ return [nil] unless http_url?(download_url)
50
+
51
+ if download.directory_url_for_protocol?(protocol)
52
+ file_urls = directory_file_urls(download_url, timeout: timeout)
53
+ return [nil] if file_urls.empty?
54
+
55
+ return file_urls.map { |file_url| safe_head_content_length(file_url, timeout: timeout) }
56
+ end
57
+
58
+ [safe_head_content_length(download_url, timeout: timeout)]
59
+ rescue Error
60
+ [nil]
61
+ end
62
+
63
+ def save_download(
64
+ download,
65
+ outdir: DEFAULT_OUTPUT_DIRECTORY,
66
+ protocol: DEFAULT_PROTOCOL,
67
+ verify: true,
68
+ force: false,
69
+ skip_existing: false
70
+ )
71
+ FileUtils.mkdir_p(outdir)
72
+ download_url = download.url_for_protocol(protocol)
73
+ if download.directory_url_for_protocol?(protocol)
74
+ raise InvalidRecordError, "download URL points to a directory: #{download_url}"
75
+ end
76
+
77
+ output_path = File.join(outdir, download.filename_for_protocol(protocol))
78
+ FileUtils.rm_f(output_path) if force && File.file?(output_path)
79
+ if should_skip_existing?(
80
+ output_path,
81
+ download,
82
+ download_url: download_url,
83
+ skip_existing: skip_existing
84
+ )
85
+ return DownloadResult.new(path: output_path, skipped: true)
86
+ end
87
+
88
+ @runner.download_url(download_url, output_path)
89
+ if verify && checksum_available?(download)
90
+ @checksum_verifier.verify_md5!(output_path, download.md5)
91
+ end
92
+ DownloadResult.new(path: output_path, skipped: false)
93
+ end
94
+
95
+ private
96
+
97
+ def should_skip_existing?(output_path, download, download_url:, skip_existing:)
98
+ return false unless File.file?(output_path)
99
+ return true if skip_existing
100
+ if checksum_available?(download)
101
+ return @checksum_verifier.md5_matches?(output_path, download.md5)
102
+ end
103
+
104
+ existing_file_complete?(output_path, download_url)
105
+ end
106
+
107
+ def existing_file_complete?(output_path, download_url)
108
+ remote_size = safe_head_content_length(download_url, timeout: DEFAULT_SIZE_TIMEOUT_SECONDS)
109
+ return false unless remote_size
110
+
111
+ local_size = File.size(output_path)
112
+ return true if local_size == remote_size
113
+ return false if local_size < remote_size
114
+
115
+ raise InvalidRecordError,
116
+ "existing file is larger than remote file: #{output_path} " \
117
+ "(local=#{local_size}, remote=#{remote_size}); use --force to re-download"
118
+ end
119
+
120
+ def checksum_available?(download)
121
+ !download.md5.to_s.strip.empty?
122
+ end
123
+
124
+ def http_url?(url)
125
+ DownloadCandidate::HTTP_BASED_PROTOCOLS.include?(URI(url).scheme)
126
+ rescue TypeError, URI::InvalidURIError
127
+ false
128
+ end
129
+
130
+ def directory_file_urls(directory_url, timeout:, redirects_remaining: DEFAULT_REDIRECT_LIMIT)
131
+ request_uri = URI(directory_url)
132
+ response = get_http_response(request_uri, timeout: timeout)
133
+
134
+ case response
135
+ when Net::HTTPSuccess
136
+ response.body.scan(FASTQ_HREF_PATTERN).map do |match|
137
+ href = CGI.unescapeHTML(match.last)
138
+ URI.join(request_uri, href).to_s
139
+ end.uniq
140
+ when Net::HTTPRedirection
141
+ raise NetworkError, "too many redirects: #{directory_url}" if redirects_remaining <= 0
142
+
143
+ location = response[HTTP_LOCATION_HEADER]
144
+ raise NetworkError, "redirect without location: #{directory_url}" if location.to_s.empty?
145
+
146
+ directory_file_urls(
147
+ URI.join(request_uri, location).to_s,
148
+ timeout: timeout,
149
+ redirects_remaining: redirects_remaining - 1
150
+ )
151
+ else
152
+ raise NetworkError, "HTTP #{response.code}: #{directory_url}"
153
+ end
154
+ rescue Timeout::Error, IOError, SocketError, SystemCallError, URI::InvalidURIError => error
155
+ raise NetworkError, fetch_failure_message(directory_url, error), cause: error
156
+ end
157
+
158
+ def head_content_length(request_url, timeout:, redirects_remaining: DEFAULT_REDIRECT_LIMIT)
159
+ request_uri = URI(request_url)
160
+ response = head_http_response(request_uri, timeout: timeout)
161
+
162
+ case response
163
+ when Net::HTTPSuccess
164
+ response.content_length
165
+ when Net::HTTPRedirection
166
+ raise NetworkError, "too many redirects: #{request_url}" if redirects_remaining <= 0
167
+
168
+ location = response[HTTP_LOCATION_HEADER]
169
+ raise NetworkError, "redirect without location: #{request_url}" if location.to_s.empty?
170
+
171
+ head_content_length(
172
+ URI.join(request_uri, location).to_s,
173
+ timeout: timeout,
174
+ redirects_remaining: redirects_remaining - 1
175
+ )
176
+ else
177
+ raise NetworkError, "HTTP #{response.code}: #{request_url}"
178
+ end
179
+ rescue Timeout::Error, IOError, SocketError, SystemCallError, URI::InvalidURIError => error
180
+ raise NetworkError, fetch_failure_message(request_url, error), cause: error
181
+ end
182
+
183
+ def safe_head_content_length(request_url, timeout:)
184
+ head_content_length(request_url, timeout: timeout)
185
+ rescue Error
186
+ nil
187
+ end
188
+
189
+ def get_http_response(request_uri, timeout:)
190
+ Net::HTTP.start(
191
+ request_uri.host,
192
+ request_uri.port,
193
+ use_ssl: request_uri.scheme == HTTPS_SCHEME,
194
+ open_timeout: timeout,
195
+ read_timeout: timeout
196
+ ) do |http|
197
+ http.get(request_uri.request_uri, USER_AGENT_HEADER => user_agent)
198
+ end
199
+ end
200
+
201
+ def head_http_response(request_uri, timeout:)
202
+ Net::HTTP.start(
203
+ request_uri.host,
204
+ request_uri.port,
205
+ use_ssl: request_uri.scheme == HTTPS_SCHEME,
206
+ open_timeout: timeout,
207
+ read_timeout: timeout
208
+ ) do |http|
209
+ http.head(request_uri.request_uri, USER_AGENT_HEADER => user_agent)
210
+ end
211
+ end
212
+
213
+ def user_agent
214
+ "#{NAME}/#{VERSION}"
215
+ end
216
+
217
+ def fetch_failure_message(url, error)
218
+ "failed to fetch #{url}: #{error.class}: #{error.message}"
219
+ end
220
+ end
221
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dratools
4
+ class Error < StandardError
5
+ end
6
+
7
+ class InputError < Error
8
+ end
9
+
10
+ class MissingAccessionError < InputError
11
+ end
12
+
13
+ class InputFileError < InputError
14
+ end
15
+
16
+ class InvalidOptionError < Error
17
+ end
18
+
19
+ class InvalidProtocolError < Error
20
+ end
21
+
22
+ class NotFoundError < Error
23
+ end
24
+
25
+ class InvalidRecordError < Error
26
+ end
27
+
28
+ class NetworkError < Error
29
+ end
30
+
31
+ class CommandError < Error
32
+ end
33
+
34
+ class ChecksumError < Error
35
+ end
36
+
37
+ class UnsupportedAccessionError < Error
38
+ end
39
+ end
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'open3'
4
+ require 'shellwords'
5
+ require 'English'
6
+
7
+ require_relative 'config'
8
+ require_relative 'errors'
9
+
10
+ module Dratools
11
+ # curl か wget を使って URL の確認とダウンロードを行うラッパー。
12
+ #
13
+ # probe は短時間・無出力で済ませ、download は外部コマンドの進捗を端末へ流す。
14
+ # 巨大ファイルを扱うため、download には総時間制限ではなく失速検知を使う。
15
+ class ExternalCommandRunner
16
+ CURL_COMMAND = 'curl'
17
+ WGET_COMMAND = 'wget'
18
+ SUPPORTED_COMMANDS = [CURL_COMMAND, WGET_COMMAND].freeze
19
+ COMMAND_NOT_FOUND_MESSAGE = 'curl または wget が見つかりません'
20
+ DEFAULT_PROBE_TIMEOUT_SECONDS = 5
21
+ PROBE_BYTE_RANGE = '0-0'
22
+ SINGLE_ATTEMPT_COUNT = 1
23
+
24
+ CURL_PROBE_OPTIONS = ['--location', '--fail', '--silent', '--show-error', '--range'].freeze
25
+ CURL_TIMEOUT_OPTION = '--max-time'
26
+ CURL_CONNECT_TIMEOUT_OPTION = '--connect-timeout'
27
+ CURL_SPEED_LIMIT_OPTION = '--speed-limit'
28
+ CURL_SPEED_TIME_OPTION = '--speed-time'
29
+ CURL_RETRY_OPTION = '--retry'
30
+ CURL_OUTPUT_OPTION = '--output'
31
+ CURL_DOWNLOAD_OPTIONS = ['--location', '--fail', '--continue-at', '-'].freeze
32
+
33
+ WGET_PROBE_OPTIONS = ['--spider'].freeze
34
+ WGET_TIMEOUT_OPTION = '--timeout'
35
+ WGET_CONNECT_TIMEOUT_OPTION = '--connect-timeout'
36
+ WGET_READ_TIMEOUT_OPTION = '--read-timeout'
37
+ WGET_TRIES_OPTION = '--tries'
38
+ WGET_WAITRETRY_OPTION = '--waitretry'
39
+ WGET_CONTINUE_OPTION = '--continue'
40
+ WGET_OUTPUT_OPTION = '--output-document'
41
+
42
+ def initialize(preferred: nil)
43
+ @preferred = preferred
44
+ end
45
+
46
+ def available_command
47
+ candidates = [@preferred, *SUPPORTED_COMMANDS].compact
48
+ candidates.find { |command_name| executable_command?(command_name) }
49
+ end
50
+
51
+ def probe_url(url, timeout: DEFAULT_PROBE_TIMEOUT_SECONDS)
52
+ tool = available_command || raise(CommandError, COMMAND_NOT_FOUND_MESSAGE)
53
+ # 巨大ファイルを落とさないよう、短時間・最小範囲の確認に留める。
54
+ command =
55
+ if File.basename(tool) == CURL_COMMAND
56
+ [tool, *CURL_PROBE_OPTIONS, PROBE_BYTE_RANGE, CURL_TIMEOUT_OPTION, timeout.to_s,
57
+ CURL_OUTPUT_OPTION, null_device, url]
58
+ else
59
+ [tool, *WGET_PROBE_OPTIONS, "#{WGET_TIMEOUT_OPTION}=#{timeout}",
60
+ "#{WGET_TRIES_OPTION}=#{SINGLE_ATTEMPT_COUNT}", url]
61
+ end
62
+ run_quietly(command)
63
+ end
64
+
65
+ def download_url(url, output_path)
66
+ tool = available_command || raise(CommandError, COMMAND_NOT_FOUND_MESSAGE)
67
+ command =
68
+ if File.basename(tool) == CURL_COMMAND
69
+ [tool, *CURL_DOWNLOAD_OPTIONS,
70
+ CURL_CONNECT_TIMEOUT_OPTION, Config.download_connect_timeout_seconds.to_s,
71
+ CURL_SPEED_LIMIT_OPTION, Config.download_stall_speed_bytes_per_second.to_s,
72
+ CURL_SPEED_TIME_OPTION, Config.download_stall_timeout_seconds.to_s,
73
+ CURL_RETRY_OPTION, Config.download_retry_count.to_s,
74
+ CURL_OUTPUT_OPTION, output_path, url]
75
+ else
76
+ [tool, WGET_CONTINUE_OPTION,
77
+ "#{WGET_CONNECT_TIMEOUT_OPTION}=#{Config.download_connect_timeout_seconds}",
78
+ "#{WGET_READ_TIMEOUT_OPTION}=#{Config.download_stall_timeout_seconds}",
79
+ "#{WGET_TRIES_OPTION}=#{Config.download_retry_count}",
80
+ "#{WGET_WAITRETRY_OPTION}=#{Config.download_retry_wait_seconds}",
81
+ WGET_OUTPUT_OPTION, output_path, url]
82
+ end
83
+ run_streaming(command)
84
+ end
85
+
86
+ private
87
+
88
+ def run_quietly(command)
89
+ out, err, status = Open3.capture3(*command)
90
+ return true if status.success?
91
+
92
+ raise CommandError, "#{command.shelljoin}\n#{out}#{err}"
93
+ end
94
+
95
+ def run_streaming(command)
96
+ # 配列形式で渡すことでシェルを介さず、curl/wget の stderr 進捗はそのまま見せる。
97
+ return true if system(*command)
98
+
99
+ status = $CHILD_STATUS
100
+ detail = status ? "exit status: #{status.exitstatus}" : 'command failed'
101
+ raise CommandError, "#{command.shelljoin}\n#{detail}"
102
+ end
103
+
104
+ def executable_command?(command_name)
105
+ ENV.fetch('PATH', '').split(File::PATH_SEPARATOR).any? do |directory|
106
+ command_path = File.join(directory, command_name)
107
+ File.file?(command_path) && File.executable?(command_path)
108
+ end
109
+ end
110
+
111
+ def null_device
112
+ File::NULL
113
+ end
114
+ end
115
+ end