dratools 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +120 -0
- data/bin/dratools +8 -0
- data/docs/design.md +80 -0
- data/docs/development.md +39 -0
- data/docs/environment.md +71 -0
- data/docs/usage.md +289 -0
- data/lib/dratools/accession_input_collector.rb +53 -0
- data/lib/dratools/accession_resolver.rb +104 -0
- data/lib/dratools/accession_resource_type_classifier.rb +34 -0
- data/lib/dratools/byte_formatter.rb +25 -0
- data/lib/dratools/checksum_verifier.rb +34 -0
- data/lib/dratools/command_line_interface.rb +138 -0
- data/lib/dratools/commands/base_command.rb +189 -0
- data/lib/dratools/commands/get_command.rb +87 -0
- data/lib/dratools/commands/meta_command.rb +123 -0
- data/lib/dratools/commands/probe_command.rb +55 -0
- data/lib/dratools/commands/runs_command.rb +70 -0
- data/lib/dratools/commands/size_command.rb +163 -0
- data/lib/dratools/commands/tree_command.rb +45 -0
- data/lib/dratools/commands/url_command.rb +118 -0
- data/lib/dratools/config.rb +114 -0
- data/lib/dratools/ddbj_record_fields.rb +56 -0
- data/lib/dratools/ddbj_resource_client.rb +78 -0
- data/lib/dratools/download_candidate.rb +45 -0
- data/lib/dratools/download_candidate_builder.rb +90 -0
- data/lib/dratools/download_service.rb +221 -0
- data/lib/dratools/errors.rb +39 -0
- data/lib/dratools/external_command_runner.rb +115 -0
- data/lib/dratools/run_record_collector.rb +198 -0
- data/lib/dratools/traversal_node.rb +68 -0
- data/lib/dratools/tree_renderer.rb +83 -0
- data/lib/dratools/version.rb +6 -0
- data/lib/dratools.rb +19 -0
- metadata +76 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'errors'
|
|
4
|
+
|
|
5
|
+
module Dratools
|
|
6
|
+
# 引数・ファイル・標準入力からアクセッションを集める。
|
|
7
|
+
class AccessionInputCollector
|
|
8
|
+
STANDARD_INPUT_PATH = '-'
|
|
9
|
+
MISSING_ACCESSION_ARGUMENT = 'ACCESSION'
|
|
10
|
+
INPUT_OPTION_NAME = '--input'
|
|
11
|
+
|
|
12
|
+
def initialize(argv:, input_path: nil, stdin: $stdin)
|
|
13
|
+
@argv = argv
|
|
14
|
+
@input_path = input_path
|
|
15
|
+
@stdin = stdin
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def collect_accessions
|
|
19
|
+
accessions = (positional_accessions + streamed_accessions).uniq
|
|
20
|
+
raise MissingAccessionError, "#{MISSING_ACCESSION_ARGUMENT} is required" if accessions.empty?
|
|
21
|
+
|
|
22
|
+
accessions
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def positional_accessions
|
|
28
|
+
@argv.map { |value| normalize_accession(value) }.reject(&:empty?)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def streamed_accessions
|
|
32
|
+
return parse_accessions(@stdin.read) if @input_path == STANDARD_INPUT_PATH
|
|
33
|
+
return parse_accessions(File.read(@input_path)) if @input_path
|
|
34
|
+
return [] if stdin_tty?
|
|
35
|
+
|
|
36
|
+
parse_accessions(@stdin.read)
|
|
37
|
+
rescue SystemCallError => error
|
|
38
|
+
raise InputFileError, "#{INPUT_OPTION_NAME}: #{error.message}"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def parse_accessions(content)
|
|
42
|
+
content.each_line.map { |value| normalize_accession(value) }.reject(&:empty?)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def normalize_accession(value)
|
|
46
|
+
value.to_s.strip.upcase
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def stdin_tty?
|
|
50
|
+
@stdin.respond_to?(:tty?) && @stdin.tty?
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'accession_resource_type_classifier'
|
|
4
|
+
require_relative 'ddbj_record_fields'
|
|
5
|
+
require_relative 'ddbj_resource_client'
|
|
6
|
+
require_relative 'download_candidate_builder'
|
|
7
|
+
require_relative 'errors'
|
|
8
|
+
require_relative 'run_record_collector'
|
|
9
|
+
require_relative 'traversal_node'
|
|
10
|
+
|
|
11
|
+
module Dratools
|
|
12
|
+
# accession を受け取り、DDBJ 上の実ファイル候補へ解決する調停役。
|
|
13
|
+
class AccessionResolver
|
|
14
|
+
FILE_TYPE_SRA = DdbjRecordFields::FILE_TYPE_SRA
|
|
15
|
+
FILE_TYPE_FASTQ = DdbjRecordFields::FILE_TYPE_FASTQ
|
|
16
|
+
FILE_TYPE_ALL = DdbjRecordFields::FILE_TYPE_ALL
|
|
17
|
+
|
|
18
|
+
def initialize(
|
|
19
|
+
client: DdbjResourceClient.new,
|
|
20
|
+
resource_type_classifier: AccessionResourceTypeClassifier.new,
|
|
21
|
+
run_record_collector: nil,
|
|
22
|
+
download_candidate_builder: DownloadCandidateBuilder.new
|
|
23
|
+
)
|
|
24
|
+
@client = client
|
|
25
|
+
@resource_type_classifier = resource_type_classifier
|
|
26
|
+
@run_record_collector = run_record_collector || RunRecordCollector.new(client: client)
|
|
27
|
+
@download_candidate_builder = download_candidate_builder
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def resolve_downloads(accession, file_type: FILE_TYPE_SRA)
|
|
31
|
+
accession = accession.to_s.upcase
|
|
32
|
+
ddbj_record = fetch_record_for(accession)
|
|
33
|
+
resolve_downloads_from_record(accession, ddbj_record, file_type: file_type)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def resolve_downloads_from_record(accession, ddbj_record, file_type: FILE_TYPE_SRA)
|
|
37
|
+
tree = resolve_tree_from_record(accession, ddbj_record, file_type: file_type, tolerant: false)
|
|
38
|
+
downloads = tree.downloads
|
|
39
|
+
if downloads.empty?
|
|
40
|
+
raise NotFoundError, "download URL not found: #{accession.to_s.upcase} (type=#{file_type})"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
downloads
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def resolve_tree(accession, file_type: FILE_TYPE_SRA, tolerant: true,
|
|
47
|
+
direct_run_fetch_limit: nil)
|
|
48
|
+
accession = accession.to_s.upcase
|
|
49
|
+
ddbj_record = fetch_record_for(accession)
|
|
50
|
+
resolve_tree_from_record(
|
|
51
|
+
accession,
|
|
52
|
+
ddbj_record,
|
|
53
|
+
file_type: file_type,
|
|
54
|
+
tolerant: tolerant,
|
|
55
|
+
direct_run_fetch_limit: direct_run_fetch_limit
|
|
56
|
+
)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def resolve_tree_from_record(_accession, ddbj_record, file_type: FILE_TYPE_SRA, tolerant: true,
|
|
60
|
+
direct_run_fetch_limit: nil)
|
|
61
|
+
tree = @run_record_collector.explore(
|
|
62
|
+
ddbj_record,
|
|
63
|
+
tolerant: tolerant,
|
|
64
|
+
direct_run_fetch_limit: direct_run_fetch_limit
|
|
65
|
+
)
|
|
66
|
+
attach_downloads(tree, file_type: file_type)
|
|
67
|
+
tree
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def fetch_record_for(accession)
|
|
71
|
+
resource_type = resource_type_for(accession)
|
|
72
|
+
@client.fetch_resource_record(resource_type, accession)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def resource_type_for(accession)
|
|
76
|
+
@resource_type_classifier.resource_type_for(accession)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
def attach_downloads(node, file_type:)
|
|
82
|
+
if node.run? && node.record
|
|
83
|
+
downloads = @download_candidate_builder.build_from_run_record(node.record)
|
|
84
|
+
downloads.select! { |download| file_type == FILE_TYPE_ALL || download.type == file_type }
|
|
85
|
+
node.children.concat(downloads.map { |download| download_node(download) })
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
node.children.each do |child|
|
|
89
|
+
attach_downloads(child, file_type: file_type) unless child.download?
|
|
90
|
+
end
|
|
91
|
+
node
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def download_node(download)
|
|
95
|
+
TraversalNode.new(
|
|
96
|
+
relation: TraversalNode::DOWNLOAD_RELATION,
|
|
97
|
+
type: download.type,
|
|
98
|
+
accession: download.run_accession,
|
|
99
|
+
url: download.url,
|
|
100
|
+
download: download
|
|
101
|
+
)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'ddbj_record_fields'
|
|
4
|
+
require_relative 'errors'
|
|
5
|
+
|
|
6
|
+
module Dratools
|
|
7
|
+
# accession の接頭辞から DDBJ resource API の type を判定する。
|
|
8
|
+
class AccessionResourceTypeClassifier
|
|
9
|
+
RUN_PREFIXES = /\A[DES]RR\d+\z/
|
|
10
|
+
EXPERIMENT_PREFIXES = /\A[DES]RX\d+\z/
|
|
11
|
+
SAMPLE_PREFIXES = /\A[DES]RS\d+\z/
|
|
12
|
+
STUDY_PREFIXES = /\A[DES]RP\d+\z/
|
|
13
|
+
SUBMISSION_PREFIXES = /\A[DES]RA\d+\z/
|
|
14
|
+
BIOPROJECT_PREFIXES = /\APRJ(?:DA|DB|NA|EB)\d+\z/
|
|
15
|
+
BIOSAMPLE_PREFIXES = /\ASAM(?:D|N|EA|EG)?\d+\z/
|
|
16
|
+
|
|
17
|
+
TYPE_BY_ACCESSION = [
|
|
18
|
+
[RUN_PREFIXES, DdbjRecordFields::SRA_RUN_RESOURCE_TYPE],
|
|
19
|
+
[EXPERIMENT_PREFIXES, DdbjRecordFields::SRA_EXPERIMENT_RESOURCE_TYPE],
|
|
20
|
+
[SAMPLE_PREFIXES, DdbjRecordFields::SRA_SAMPLE_RESOURCE_TYPE],
|
|
21
|
+
[STUDY_PREFIXES, DdbjRecordFields::SRA_STUDY_RESOURCE_TYPE],
|
|
22
|
+
[SUBMISSION_PREFIXES, DdbjRecordFields::SRA_SUBMISSION_RESOURCE_TYPE],
|
|
23
|
+
[BIOPROJECT_PREFIXES, DdbjRecordFields::BIOPROJECT_RESOURCE_TYPE],
|
|
24
|
+
[BIOSAMPLE_PREFIXES, DdbjRecordFields::BIOSAMPLE_RESOURCE_TYPE]
|
|
25
|
+
].freeze
|
|
26
|
+
|
|
27
|
+
def resource_type_for(accession)
|
|
28
|
+
matching_rule = TYPE_BY_ACCESSION.find { |pattern, _resource_type| pattern.match?(accession) }
|
|
29
|
+
raise UnsupportedAccessionError, "unsupported accession: #{accession}" unless matching_rule
|
|
30
|
+
|
|
31
|
+
matching_rule.last
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Dratools
|
|
4
|
+
# Byte count formatter for human-readable IEC units.
|
|
5
|
+
module ByteFormatter
|
|
6
|
+
UNITS = %w[B KiB MiB GiB TiB PiB].freeze
|
|
7
|
+
UNIT_BASE = 1024.0
|
|
8
|
+
|
|
9
|
+
module_function
|
|
10
|
+
|
|
11
|
+
def format(bytes)
|
|
12
|
+
value = bytes.to_f
|
|
13
|
+
unit_index = 0
|
|
14
|
+
|
|
15
|
+
while value >= UNIT_BASE && unit_index < UNITS.length - 1
|
|
16
|
+
value /= UNIT_BASE
|
|
17
|
+
unit_index += 1
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
return "#{bytes.to_i} B" if unit_index.zero?
|
|
21
|
+
|
|
22
|
+
"#{Kernel.format('%.1f', value)} #{UNITS[unit_index]}"
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest/md5'
|
|
4
|
+
|
|
5
|
+
require_relative 'errors'
|
|
6
|
+
|
|
7
|
+
module Dratools
|
|
8
|
+
# ダウンロード済みファイルのチェックサムを検証する。
|
|
9
|
+
#
|
|
10
|
+
# Digest::MD5.file はストリーム処理なので、巨大ファイルでも全体をメモリに載せない。
|
|
11
|
+
class ChecksumVerifier
|
|
12
|
+
def md5_matches?(path, expected_md5)
|
|
13
|
+
md5_for(path).casecmp?(normalize_md5(expected_md5))
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def verify_md5!(path, expected_md5)
|
|
17
|
+
expected_md5 = normalize_md5(expected_md5)
|
|
18
|
+
actual_md5 = md5_for(path)
|
|
19
|
+
return true if actual_md5.casecmp?(expected_md5)
|
|
20
|
+
|
|
21
|
+
raise ChecksumError, "MD5 mismatch for #{path}: expected #{expected_md5}, got #{actual_md5}"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
def normalize_md5(md5)
|
|
27
|
+
md5.to_s.strip
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def md5_for(path)
|
|
31
|
+
Digest::MD5.file(path).hexdigest
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'version'
|
|
4
|
+
require_relative 'accession_resolver'
|
|
5
|
+
require_relative 'download_service'
|
|
6
|
+
require_relative 'commands/url_command'
|
|
7
|
+
require_relative 'commands/get_command'
|
|
8
|
+
require_relative 'commands/probe_command'
|
|
9
|
+
require_relative 'commands/tree_command'
|
|
10
|
+
require_relative 'commands/meta_command'
|
|
11
|
+
require_relative 'commands/runs_command'
|
|
12
|
+
require_relative 'commands/size_command'
|
|
13
|
+
|
|
14
|
+
module Dratools
|
|
15
|
+
# サブコマンドを振り分ける CLI の入口。
|
|
16
|
+
class CommandLineInterface
|
|
17
|
+
COMMAND_NAME = Dratools::NAME
|
|
18
|
+
SUCCESS_EXIT_STATUS = 0
|
|
19
|
+
FAILURE_EXIT_STATUS = 1
|
|
20
|
+
|
|
21
|
+
SUBCOMMANDS = {
|
|
22
|
+
'url' => Commands::UrlCommand,
|
|
23
|
+
'get' => Commands::GetCommand,
|
|
24
|
+
'probe' => Commands::ProbeCommand,
|
|
25
|
+
'tree' => Commands::TreeCommand,
|
|
26
|
+
'meta' => Commands::MetaCommand,
|
|
27
|
+
'runs' => Commands::RunsCommand,
|
|
28
|
+
'size' => Commands::SizeCommand
|
|
29
|
+
}.freeze
|
|
30
|
+
|
|
31
|
+
# 単複の打ち間違いを救う別名。左を打っても右の正規コマンドが動く。
|
|
32
|
+
# ヘルプやエラー・バナーには常に正規名(右)を表示する。
|
|
33
|
+
SUBCOMMAND_ALIASES = {
|
|
34
|
+
'run' => 'runs',
|
|
35
|
+
'urls' => 'url',
|
|
36
|
+
'sizes' => 'size',
|
|
37
|
+
'trees' => 'tree'
|
|
38
|
+
}.freeze
|
|
39
|
+
|
|
40
|
+
HELP_FLAGS = ['-h', '--help', 'help'].freeze
|
|
41
|
+
VERSION_FLAGS = ['-v', '--version', 'version'].freeze
|
|
42
|
+
|
|
43
|
+
SUBCOMMAND_SUMMARIES = {
|
|
44
|
+
'url' => 'ダウンロード URL を表示する (--json で JSON)',
|
|
45
|
+
'get' => 'ファイルをダウンロードする',
|
|
46
|
+
'probe' => '短時間の接続確認だけ行う',
|
|
47
|
+
'tree' => '探索ツリーを表示する',
|
|
48
|
+
'meta' => 'レコードのメタ情報を表示する (--json で生 JSON)',
|
|
49
|
+
'runs' => 'run accession の一覧を出力する',
|
|
50
|
+
'size' => 'ダウンロード合計サイズを集計する'
|
|
51
|
+
}.freeze
|
|
52
|
+
|
|
53
|
+
USAGE_EXAMPLES = [
|
|
54
|
+
"#{COMMAND_NAME} url DRR000001",
|
|
55
|
+
"#{COMMAND_NAME} meta DRR000001",
|
|
56
|
+
"#{COMMAND_NAME} runs PRJNA341783",
|
|
57
|
+
"#{COMMAND_NAME} size PRJNA341783",
|
|
58
|
+
"#{COMMAND_NAME} get -O ~/Downloads DRR000001",
|
|
59
|
+
"#{COMMAND_NAME} tree PRJNA341783",
|
|
60
|
+
"#{COMMAND_NAME} url --input accessions.txt",
|
|
61
|
+
"printf 'DRR000001\\nDRR000002\\n' | #{COMMAND_NAME} url"
|
|
62
|
+
].freeze
|
|
63
|
+
|
|
64
|
+
def self.start(argv)
|
|
65
|
+
new(argv).run
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def initialize(
|
|
69
|
+
argv,
|
|
70
|
+
resolver: AccessionResolver.new,
|
|
71
|
+
downloader: DownloadService.new,
|
|
72
|
+
stdout: $stdout,
|
|
73
|
+
stderr: $stderr,
|
|
74
|
+
stdin: $stdin
|
|
75
|
+
)
|
|
76
|
+
@argv = argv
|
|
77
|
+
@resolver = resolver
|
|
78
|
+
@downloader = downloader
|
|
79
|
+
@stdout = stdout
|
|
80
|
+
@stderr = stderr
|
|
81
|
+
@stdin = stdin
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def run
|
|
85
|
+
name = @argv.first
|
|
86
|
+
|
|
87
|
+
if name.nil?
|
|
88
|
+
print_help(@stderr)
|
|
89
|
+
return FAILURE_EXIT_STATUS
|
|
90
|
+
end
|
|
91
|
+
if HELP_FLAGS.include?(name)
|
|
92
|
+
print_help(@stdout)
|
|
93
|
+
return SUCCESS_EXIT_STATUS
|
|
94
|
+
end
|
|
95
|
+
if VERSION_FLAGS.include?(name)
|
|
96
|
+
@stdout.puts VERSION
|
|
97
|
+
return SUCCESS_EXIT_STATUS
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
command_class = SUBCOMMANDS[name] || SUBCOMMANDS[SUBCOMMAND_ALIASES[name]]
|
|
101
|
+
unless command_class
|
|
102
|
+
expected = SUBCOMMANDS.keys.join(', ')
|
|
103
|
+
@stderr.puts "#{COMMAND_NAME}: unknown command '#{name}' (expected: #{expected})"
|
|
104
|
+
return FAILURE_EXIT_STATUS
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
command_class.new(
|
|
108
|
+
@argv.drop(1),
|
|
109
|
+
resolver: @resolver,
|
|
110
|
+
downloader: @downloader,
|
|
111
|
+
stdout: @stdout,
|
|
112
|
+
stderr: @stderr,
|
|
113
|
+
stdin: @stdin
|
|
114
|
+
).run
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
private
|
|
118
|
+
|
|
119
|
+
def print_help(stream)
|
|
120
|
+
stream.puts "Usage: #{COMMAND_NAME} <command> [options] [ACCESSION ...]"
|
|
121
|
+
stream.puts ''
|
|
122
|
+
stream.puts 'Commands:'
|
|
123
|
+
SUBCOMMAND_SUMMARIES.each do |name, summary|
|
|
124
|
+
stream.puts format(' %-7<name>s %<summary>s', name: name, summary: summary)
|
|
125
|
+
end
|
|
126
|
+
stream.puts ''
|
|
127
|
+
stream.puts 'Aliases:'
|
|
128
|
+
SUBCOMMAND_ALIASES.each do |alias_name, canonical|
|
|
129
|
+
stream.puts format(' %-7<a>s -> %<c>s', a: alias_name, c: canonical)
|
|
130
|
+
end
|
|
131
|
+
stream.puts ''
|
|
132
|
+
stream.puts "Run '#{COMMAND_NAME} <command> --help' for command options."
|
|
133
|
+
stream.puts ''
|
|
134
|
+
stream.puts 'Examples:'
|
|
135
|
+
USAGE_EXAMPLES.each { |example| stream.puts " #{example}" }
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'optparse'
|
|
4
|
+
|
|
5
|
+
require_relative '../errors'
|
|
6
|
+
require_relative '../download_candidate'
|
|
7
|
+
require_relative '../accession_resolver'
|
|
8
|
+
require_relative '../download_service'
|
|
9
|
+
require_relative '../accession_input_collector'
|
|
10
|
+
|
|
11
|
+
module Dratools
|
|
12
|
+
module Commands
|
|
13
|
+
# サブコマンド共通の土台。オプション解析・accession 収集・例外処理・終了コードを担う。
|
|
14
|
+
class BaseCommand
|
|
15
|
+
SUCCESS_EXIT_STATUS = 0
|
|
16
|
+
FAILURE_EXIT_STATUS = 1
|
|
17
|
+
|
|
18
|
+
DEFAULT_FILE_TYPE = AccessionResolver::FILE_TYPE_SRA
|
|
19
|
+
VALID_FILE_TYPES = [
|
|
20
|
+
AccessionResolver::FILE_TYPE_SRA,
|
|
21
|
+
AccessionResolver::FILE_TYPE_FASTQ,
|
|
22
|
+
AccessionResolver::FILE_TYPE_ALL
|
|
23
|
+
].freeze
|
|
24
|
+
DEFAULT_PROTOCOL = DownloadCandidate::HTTPS_PROTOCOL
|
|
25
|
+
VALID_PROTOCOLS = [DownloadCandidate::HTTPS_PROTOCOL, DownloadCandidate::FTP_PROTOCOL].freeze
|
|
26
|
+
# 値が無いことを表す TSV のプレースホルダ。
|
|
27
|
+
MISSING_VALUE = 'NA'
|
|
28
|
+
|
|
29
|
+
def initialize(argv, resolver:, downloader:, stdout:, stderr:, stdin:)
|
|
30
|
+
@argv = argv
|
|
31
|
+
@resolver = resolver
|
|
32
|
+
@downloader = downloader
|
|
33
|
+
@stdout = stdout
|
|
34
|
+
@stderr = stderr
|
|
35
|
+
@stdin = stdin
|
|
36
|
+
@options = default_options
|
|
37
|
+
@failed_count = 0
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def run
|
|
41
|
+
parse_options
|
|
42
|
+
return @halt unless @halt.nil?
|
|
43
|
+
|
|
44
|
+
begin
|
|
45
|
+
accessions = collect_accessions
|
|
46
|
+
rescue MissingAccessionError
|
|
47
|
+
@stderr.puts build_option_parser
|
|
48
|
+
return FAILURE_EXIT_STATUS
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
accessions.each do |accession|
|
|
52
|
+
process(accession)
|
|
53
|
+
rescue Error => error
|
|
54
|
+
report_error(error.message, accession: accession)
|
|
55
|
+
@failed_count += 1
|
|
56
|
+
end
|
|
57
|
+
finalize
|
|
58
|
+
@failed_count.zero? ? SUCCESS_EXIT_STATUS : FAILURE_EXIT_STATUS
|
|
59
|
+
rescue OptionParser::ParseError, Error => error
|
|
60
|
+
report_error(error.message)
|
|
61
|
+
FAILURE_EXIT_STATUS
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
attr_reader :options, :resolver, :downloader, :stdout, :stderr
|
|
67
|
+
|
|
68
|
+
# --- サブクラスで上書きするフック -------------------------------------
|
|
69
|
+
|
|
70
|
+
# サブコマンド名(バナーやエラー接頭辞に使う)。
|
|
71
|
+
def command_name
|
|
72
|
+
raise NotImplementedError
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# サブクラス固有の既定オプション。
|
|
76
|
+
def default_options
|
|
77
|
+
{ file_type: DEFAULT_FILE_TYPE, input: nil }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# サブクラス固有のオプションを parser に足す。
|
|
81
|
+
def configure_parser(parser); end
|
|
82
|
+
|
|
83
|
+
# サブクラス固有の使用例(1 行ずつ)。
|
|
84
|
+
def usage_examples
|
|
85
|
+
[]
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# accession 1 件の処理本体。
|
|
89
|
+
def process(accession)
|
|
90
|
+
raise NotImplementedError
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# 全 accession 処理後の後処理(JSON 出力やサマリなど)。
|
|
94
|
+
def finalize; end
|
|
95
|
+
|
|
96
|
+
# --- 共通処理 ---------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
def parse_options
|
|
99
|
+
parser = build_option_parser
|
|
100
|
+
parser.parse!(@argv)
|
|
101
|
+
return unless @halt.nil?
|
|
102
|
+
|
|
103
|
+
validate_options
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def build_option_parser
|
|
107
|
+
OptionParser.new do |parser|
|
|
108
|
+
parser.summary_width = 24
|
|
109
|
+
parser.banner = "Usage: #{Dratools::NAME} #{command_name} [options] [ACCESSION ...]"
|
|
110
|
+
add_common_options(parser)
|
|
111
|
+
configure_parser(parser)
|
|
112
|
+
add_examples(parser)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def add_common_options(parser)
|
|
117
|
+
file_type_description = "取得対象を指定する。sra, fastq, all (default: #{DEFAULT_FILE_TYPE})"
|
|
118
|
+
parser.on('--type TYPE', file_type_description) do |value|
|
|
119
|
+
@options[:file_type] = value
|
|
120
|
+
end
|
|
121
|
+
parser.on('-i', '--input FILE', 'accession 一覧をファイルまたは標準入力(-)から読む') do |value|
|
|
122
|
+
@options[:input] = value
|
|
123
|
+
end
|
|
124
|
+
parser.on('-h', '--help', 'ヘルプを表示する') do
|
|
125
|
+
@stdout.puts parser
|
|
126
|
+
@halt = SUCCESS_EXIT_STATUS
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def add_protocol_option(parser)
|
|
131
|
+
description = "優先する URL 種別を指定する。https, ftp (default: #{DEFAULT_PROTOCOL})"
|
|
132
|
+
parser.on('--protocol PROTOCOL', description) { |value| @options[:protocol] = value }
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def add_examples(parser)
|
|
136
|
+
examples = usage_examples
|
|
137
|
+
return if examples.empty?
|
|
138
|
+
|
|
139
|
+
parser.separator ''
|
|
140
|
+
parser.separator 'Examples:'
|
|
141
|
+
examples.each { |example| parser.separator " #{example}" }
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def validate_options
|
|
145
|
+
file_type = @options[:file_type]
|
|
146
|
+
return if VALID_FILE_TYPES.include?(file_type)
|
|
147
|
+
|
|
148
|
+
raise InvalidOptionError, invalid_message('--type', file_type, VALID_FILE_TYPES)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def validate_protocol
|
|
152
|
+
protocol = @options[:protocol]
|
|
153
|
+
return if VALID_PROTOCOLS.include?(protocol)
|
|
154
|
+
|
|
155
|
+
raise InvalidOptionError, invalid_message('--protocol', protocol, VALID_PROTOCOLS)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def invalid_message(name, value, expected)
|
|
159
|
+
"invalid #{name} '#{value}' (expected: #{expected.join(', ')})"
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def collect_accessions
|
|
163
|
+
AccessionInputCollector.new(
|
|
164
|
+
argv: @argv,
|
|
165
|
+
input_path: @options[:input],
|
|
166
|
+
stdin: @stdin
|
|
167
|
+
).collect_accessions
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def resolve_downloads(accession)
|
|
171
|
+
@resolver.resolve_downloads(accession, file_type: @options[:file_type])
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# 列名を `#` 始まりのヘッダ行として一度だけ出力する。
|
|
175
|
+
def emit_tsv_header(columns)
|
|
176
|
+
return if @tsv_header_printed
|
|
177
|
+
|
|
178
|
+
@stdout.puts "##{columns.join("\t")}"
|
|
179
|
+
@tsv_header_printed = true
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def report_error(message, accession: nil)
|
|
183
|
+
prefix = "#{Dratools::NAME} #{command_name}"
|
|
184
|
+
prefix = "#{prefix}: #{accession}" if accession
|
|
185
|
+
@stderr.puts "#{prefix}: #{message}"
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'base_command'
|
|
4
|
+
|
|
5
|
+
module Dratools
|
|
6
|
+
module Commands
|
|
7
|
+
# 実際にファイルをダウンロードする。
|
|
8
|
+
class GetCommand < BaseCommand
|
|
9
|
+
DEFAULT_OUTPUT_DIRECTORY = DownloadService::DEFAULT_OUTPUT_DIRECTORY
|
|
10
|
+
DOWNLOAD_SUCCESS_PREFIX = 'Downloaded'
|
|
11
|
+
DOWNLOAD_SKIPPED_PREFIX = 'Skipped'
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def command_name
|
|
16
|
+
'get'
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def default_options
|
|
20
|
+
super.merge(
|
|
21
|
+
protocol: DEFAULT_PROTOCOL,
|
|
22
|
+
outdir: DEFAULT_OUTPUT_DIRECTORY,
|
|
23
|
+
verify: true,
|
|
24
|
+
force: false,
|
|
25
|
+
skip_existing: false
|
|
26
|
+
)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def configure_parser(parser)
|
|
30
|
+
parser.on('-O', '--outdir DIR',
|
|
31
|
+
"ダウンロード先ディレクトリ (default: #{DEFAULT_OUTPUT_DIRECTORY})") do |value|
|
|
32
|
+
@options[:outdir] = value
|
|
33
|
+
end
|
|
34
|
+
add_protocol_option(parser)
|
|
35
|
+
parser.on('--no-verify', 'md5 がある場合のダウンロード後検証を省略する') { @options[:verify] = false }
|
|
36
|
+
parser.on('--force', '既存ファイルがあっても再取得する') { @options[:force] = true }
|
|
37
|
+
parser.on('--skip-existing', '既存ファイルがあれば検証せず再取得しない') { @options[:skip_existing] = true }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def usage_examples
|
|
41
|
+
[
|
|
42
|
+
"#{Dratools::NAME} get DRR000001",
|
|
43
|
+
"#{Dratools::NAME} get -O ~/Downloads DRR000001 DRR000002",
|
|
44
|
+
"#{Dratools::NAME} get --skip-existing -O ~/Downloads DRR000001"
|
|
45
|
+
]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def validate_options
|
|
49
|
+
super
|
|
50
|
+
validate_protocol
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def process(accession)
|
|
54
|
+
resolve_downloads(accession).each do |download|
|
|
55
|
+
save_one_download(accession, download)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def finalize
|
|
60
|
+
parts = ["#{@downloaded_count.to_i} downloaded", "#{@skipped_count.to_i} skipped"]
|
|
61
|
+
parts << "#{@failed_count} failed" if @failed_count.positive?
|
|
62
|
+
@stderr.puts "#{Dratools::NAME} #{command_name}: #{parts.join(', ')}"
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def save_one_download(accession, download)
|
|
66
|
+
result = @downloader.save_download(
|
|
67
|
+
download,
|
|
68
|
+
outdir: @options[:outdir],
|
|
69
|
+
protocol: @options[:protocol],
|
|
70
|
+
verify: @options[:verify],
|
|
71
|
+
force: @options[:force],
|
|
72
|
+
skip_existing: @options[:skip_existing]
|
|
73
|
+
)
|
|
74
|
+
if result.skipped?
|
|
75
|
+
@skipped_count = @skipped_count.to_i + 1
|
|
76
|
+
@stderr.puts "#{DOWNLOAD_SKIPPED_PREFIX}\t#{result.path}"
|
|
77
|
+
else
|
|
78
|
+
@downloaded_count = @downloaded_count.to_i + 1
|
|
79
|
+
@stderr.puts "#{DOWNLOAD_SUCCESS_PREFIX}\t#{result.path}"
|
|
80
|
+
end
|
|
81
|
+
rescue Error => error
|
|
82
|
+
report_error(error.message, accession: accession)
|
|
83
|
+
@failed_count += 1
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|