dratools 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +120 -0
- data/bin/dratools +8 -0
- data/docs/design.md +80 -0
- data/docs/development.md +39 -0
- data/docs/environment.md +71 -0
- data/docs/usage.md +289 -0
- data/lib/dratools/accession_input_collector.rb +53 -0
- data/lib/dratools/accession_resolver.rb +104 -0
- data/lib/dratools/accession_resource_type_classifier.rb +34 -0
- data/lib/dratools/byte_formatter.rb +25 -0
- data/lib/dratools/checksum_verifier.rb +34 -0
- data/lib/dratools/command_line_interface.rb +138 -0
- data/lib/dratools/commands/base_command.rb +189 -0
- data/lib/dratools/commands/get_command.rb +87 -0
- data/lib/dratools/commands/meta_command.rb +123 -0
- data/lib/dratools/commands/probe_command.rb +55 -0
- data/lib/dratools/commands/runs_command.rb +70 -0
- data/lib/dratools/commands/size_command.rb +163 -0
- data/lib/dratools/commands/tree_command.rb +45 -0
- data/lib/dratools/commands/url_command.rb +118 -0
- data/lib/dratools/config.rb +114 -0
- data/lib/dratools/ddbj_record_fields.rb +56 -0
- data/lib/dratools/ddbj_resource_client.rb +78 -0
- data/lib/dratools/download_candidate.rb +45 -0
- data/lib/dratools/download_candidate_builder.rb +90 -0
- data/lib/dratools/download_service.rb +221 -0
- data/lib/dratools/errors.rb +39 -0
- data/lib/dratools/external_command_runner.rb +115 -0
- data/lib/dratools/run_record_collector.rb +198 -0
- data/lib/dratools/traversal_node.rb +68 -0
- data/lib/dratools/tree_renderer.rb +83 -0
- data/lib/dratools/version.rb +6 -0
- data/lib/dratools.rb +19 -0
- metadata +76 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
require_relative 'base_command'
|
|
6
|
+
|
|
7
|
+
module Dratools
|
|
8
|
+
module Commands
|
|
9
|
+
# DDBJ resource JSON のメタ情報を要約表示する。
|
|
10
|
+
class MetaCommand < BaseCommand
|
|
11
|
+
LABEL_WIDTH = 18
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def command_name
|
|
16
|
+
'meta'
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def default_options
|
|
20
|
+
super.merge(json: false)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def configure_parser(parser)
|
|
24
|
+
parser.on('--json', '生の resource JSON を整形して表示する') { @options[:json] = true }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def usage_examples
|
|
28
|
+
[
|
|
29
|
+
"#{Dratools::NAME} meta DRR300000",
|
|
30
|
+
"#{Dratools::NAME} meta --json DRR300000"
|
|
31
|
+
]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def process(accession)
|
|
35
|
+
record = @resolver.fetch_record_for(accession)
|
|
36
|
+
if @options[:json]
|
|
37
|
+
json_buffer << record
|
|
38
|
+
return
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
print_summary(accession, record)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def finalize
|
|
45
|
+
return unless @options[:json]
|
|
46
|
+
|
|
47
|
+
payload = json_buffer.length == 1 ? json_buffer.first : json_buffer
|
|
48
|
+
@stdout.puts JSON.pretty_generate(payload)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def json_buffer
|
|
52
|
+
@json_buffer ||= []
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def print_summary(accession, record)
|
|
56
|
+
summary_pairs(record).each { |label, value| print_field(label, value) }
|
|
57
|
+
run_count = run_count_for(accession, record)
|
|
58
|
+
print_field('runs', run_count) if run_count
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def summary_pairs(record)
|
|
62
|
+
DdbjRecordFields::INFO_FIELD_KEYS.filter_map do |key|
|
|
63
|
+
value = normalized_value(record_value(record, key))
|
|
64
|
+
next if value.nil?
|
|
65
|
+
|
|
66
|
+
[field_label(key), value]
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def record_value(record, key)
|
|
71
|
+
return record[key] unless key == DdbjRecordFields::IDENTIFIER_KEY
|
|
72
|
+
|
|
73
|
+
record[DdbjRecordFields::IDENTIFIER_KEY] || record[DdbjRecordFields::ACCESSION_KEY]
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def normalized_value(value)
|
|
77
|
+
case value
|
|
78
|
+
when Array
|
|
79
|
+
values = value.map { |item| normalized_value(item) }.compact
|
|
80
|
+
values.empty? ? nil : values.join(', ')
|
|
81
|
+
when Hash
|
|
82
|
+
compact_hash = value.reject { |_key, item| blank?(item) }
|
|
83
|
+
compact_hash.empty? ? nil : normalized_hash_value(compact_hash)
|
|
84
|
+
else
|
|
85
|
+
normalized_scalar_value(value)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def normalized_scalar_value(value)
|
|
90
|
+
return nil if blank?(value)
|
|
91
|
+
|
|
92
|
+
value.to_s.gsub(/[[:space:]]+/, ' ').strip
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def normalized_hash_value(value)
|
|
96
|
+
name = value['name'] || value[:name]
|
|
97
|
+
blank?(name) ? value.values.join(', ') : name
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def run_count_for(accession, record)
|
|
101
|
+
case record[DdbjRecordFields::TYPE_KEY]
|
|
102
|
+
when DdbjRecordFields::SRA_RUN_RESOURCE_TYPE
|
|
103
|
+
1
|
|
104
|
+
when DdbjRecordFields::SRA_EXPERIMENT_RESOURCE_TYPE
|
|
105
|
+
tree = @resolver.resolve_tree(accession, file_type: @options[:file_type])
|
|
106
|
+
tree.run_accessions.uniq.length
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def blank?(value)
|
|
111
|
+
value.nil? || (value.respond_to?(:empty?) && value.empty?)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def field_label(key)
|
|
115
|
+
key == DdbjRecordFields::IDENTIFIER_KEY ? 'accession' : key
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def print_field(label, value)
|
|
119
|
+
@stdout.puts format("%-#{LABEL_WIDTH}s %s", "#{label}:", value)
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'base_command'
|
|
4
|
+
|
|
5
|
+
module Dratools
|
|
6
|
+
module Commands
|
|
7
|
+
# 短時間の接続確認だけを行う(完全なダウンロードはしない)。
|
|
8
|
+
class ProbeCommand < BaseCommand
|
|
9
|
+
DEFAULT_PROBE_TIMEOUT_SECONDS = DownloadService::DEFAULT_PROBE_TIMEOUT_SECONDS
|
|
10
|
+
PROBE_SUCCESS_PREFIX = 'OK'
|
|
11
|
+
|
|
12
|
+
private
|
|
13
|
+
|
|
14
|
+
def command_name
|
|
15
|
+
'probe'
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def default_options
|
|
19
|
+
super.merge(protocol: DEFAULT_PROTOCOL, timeout: DEFAULT_PROBE_TIMEOUT_SECONDS)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def configure_parser(parser)
|
|
23
|
+
add_protocol_option(parser)
|
|
24
|
+
parser.on('--timeout SEC', Integer,
|
|
25
|
+
"接続確認の秒数 (default: #{DEFAULT_PROBE_TIMEOUT_SECONDS})") do |value|
|
|
26
|
+
@options[:timeout] = value
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def usage_examples
|
|
31
|
+
[
|
|
32
|
+
"#{Dratools::NAME} probe DRR000001",
|
|
33
|
+
"#{Dratools::NAME} probe --timeout 10 DRR000001"
|
|
34
|
+
]
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def validate_options
|
|
38
|
+
super
|
|
39
|
+
validate_protocol
|
|
40
|
+
timeout = @options[:timeout]
|
|
41
|
+
return if timeout.positive?
|
|
42
|
+
|
|
43
|
+
raise InvalidOptionError, "invalid --timeout '#{timeout}' (expected: positive integer)"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def process(accession)
|
|
47
|
+
protocol = @options[:protocol]
|
|
48
|
+
resolve_downloads(accession).each do |download|
|
|
49
|
+
@downloader.probe_download(download, protocol: protocol, timeout: @options[:timeout])
|
|
50
|
+
@stdout.puts "#{PROBE_SUCCESS_PREFIX}\t#{download.url_for_protocol(protocol)}"
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'base_command'
|
|
4
|
+
|
|
5
|
+
module Dratools
|
|
6
|
+
module Commands
|
|
7
|
+
# accession を run accession のフラットな一覧に展開する。
|
|
8
|
+
class RunsCommand < BaseCommand
|
|
9
|
+
XREF_URL_PATTERN = %r{/(?:resource|search/entry)/sra-run/([^/?#.]+)}
|
|
10
|
+
|
|
11
|
+
private
|
|
12
|
+
|
|
13
|
+
def command_name
|
|
14
|
+
'runs'
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def usage_examples
|
|
18
|
+
[
|
|
19
|
+
"#{Dratools::NAME} runs PRJNA341783",
|
|
20
|
+
"#{Dratools::NAME} runs PRJNA341783 | #{Dratools::NAME} get -O ~/Downloads"
|
|
21
|
+
]
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def process(accession)
|
|
25
|
+
direct_runs = direct_run_accessions_for(accession)
|
|
26
|
+
if direct_runs.any?
|
|
27
|
+
run_accessions.concat(direct_runs)
|
|
28
|
+
return
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
tree = @resolver.resolve_tree(accession, file_type: @options[:file_type])
|
|
32
|
+
run_accessions.concat(tree.run_accessions)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def finalize
|
|
36
|
+
run_accessions.uniq.each { |run_accession| @stdout.puts run_accession }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def run_accessions
|
|
40
|
+
@run_accessions ||= []
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def direct_run_accessions_for(accession)
|
|
44
|
+
record = @resolver.fetch_record_for(accession)
|
|
45
|
+
if record[DdbjRecordFields::TYPE_KEY] == DdbjRecordFields::SRA_RUN_RESOURCE_TYPE
|
|
46
|
+
return [record_accession(record)].compact
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
record.fetch(DdbjRecordFields::DB_XREFS_KEY, []).filter_map do |xref|
|
|
50
|
+
next unless xref[DdbjRecordFields::TYPE_KEY] == DdbjRecordFields::SRA_RUN_RESOURCE_TYPE
|
|
51
|
+
|
|
52
|
+
xref[DdbjRecordFields::IDENTIFIER_KEY] ||
|
|
53
|
+
xref[DdbjRecordFields::ID_KEY] ||
|
|
54
|
+
run_accession_from_url(xref[DdbjRecordFields::URL_KEY])
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def record_accession(record)
|
|
59
|
+
record[DdbjRecordFields::ACCESSION_KEY] ||
|
|
60
|
+
record[DdbjRecordFields::IDENTIFIER_KEY] ||
|
|
61
|
+
record[DdbjRecordFields::ID_KEY] ||
|
|
62
|
+
record[DdbjRecordFields::PRIMARY_ID_KEY]
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def run_accession_from_url(url)
|
|
66
|
+
url.to_s.match(XREF_URL_PATTERN)&.[](1)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
require_relative '../byte_formatter'
|
|
6
|
+
require_relative '../config'
|
|
7
|
+
require_relative '../ddbj_record_fields'
|
|
8
|
+
require_relative 'base_command'
|
|
9
|
+
|
|
10
|
+
module Dratools
|
|
11
|
+
module Commands
|
|
12
|
+
# 実ファイルの Content-Length を HEAD で集計する。
|
|
13
|
+
class SizeCommand < BaseCommand
|
|
14
|
+
DEFAULT_SIZE_TIMEOUT_SECONDS = DownloadService::DEFAULT_SIZE_TIMEOUT_SECONDS
|
|
15
|
+
TSV_COLUMNS = %w[accession files size unresolved].freeze
|
|
16
|
+
|
|
17
|
+
private
|
|
18
|
+
|
|
19
|
+
def command_name
|
|
20
|
+
'size'
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def default_options
|
|
24
|
+
super.merge(
|
|
25
|
+
protocol: DEFAULT_PROTOCOL,
|
|
26
|
+
timeout: DEFAULT_SIZE_TIMEOUT_SECONDS,
|
|
27
|
+
bytes: false,
|
|
28
|
+
json: false,
|
|
29
|
+
per_run: false
|
|
30
|
+
)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def configure_parser(parser)
|
|
34
|
+
add_protocol_option(parser)
|
|
35
|
+
parser.on('--timeout SEC', Integer,
|
|
36
|
+
"HEAD/ディレクトリ取得の秒数 (default: #{DEFAULT_SIZE_TIMEOUT_SECONDS})") do |value|
|
|
37
|
+
@options[:timeout] = value
|
|
38
|
+
end
|
|
39
|
+
parser.on('--bytes', 'サイズをバイト数で表示する') { @options[:bytes] = true }
|
|
40
|
+
parser.on('-r', '--per-run', '親 accession を run accession ごとに分けて集計する') do
|
|
41
|
+
@options[:per_run] = true
|
|
42
|
+
end
|
|
43
|
+
parser.on('--json', 'サイズ集計を JSON で表示する') { @options[:json] = true }
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def usage_examples
|
|
47
|
+
[
|
|
48
|
+
"#{Dratools::NAME} size DRR000001",
|
|
49
|
+
"#{Dratools::NAME} size --type fastq PRJNA341783",
|
|
50
|
+
"#{Dratools::NAME} size --per-run DRX000001",
|
|
51
|
+
"#{Dratools::NAME} size --json DRR000001 DRR000002"
|
|
52
|
+
]
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def validate_options
|
|
56
|
+
super
|
|
57
|
+
validate_protocol
|
|
58
|
+
timeout = @options[:timeout]
|
|
59
|
+
return if timeout.positive?
|
|
60
|
+
|
|
61
|
+
raise InvalidOptionError, "invalid --timeout '#{timeout}' (expected: positive integer)"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def process(accession)
|
|
65
|
+
ddbj_record = fetch_record_for_size(accession)
|
|
66
|
+
downloads = @resolver.resolve_downloads_from_record(
|
|
67
|
+
accession,
|
|
68
|
+
ddbj_record,
|
|
69
|
+
file_type: @options[:file_type]
|
|
70
|
+
)
|
|
71
|
+
if @options[:per_run]
|
|
72
|
+
downloads.group_by(&:run_accession).each do |run_accession, group|
|
|
73
|
+
record_result(run_accession, group)
|
|
74
|
+
end
|
|
75
|
+
else
|
|
76
|
+
record_result(accession, downloads)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def record_result(label, downloads)
|
|
81
|
+
lengths = downloads.flat_map do |download|
|
|
82
|
+
@downloader.content_lengths(
|
|
83
|
+
download,
|
|
84
|
+
protocol: @options[:protocol],
|
|
85
|
+
timeout: @options[:timeout]
|
|
86
|
+
)
|
|
87
|
+
end
|
|
88
|
+
result = {
|
|
89
|
+
accession: label,
|
|
90
|
+
file_count: lengths.length,
|
|
91
|
+
total_size: lengths.compact.sum,
|
|
92
|
+
unresolved_count: lengths.count(&:nil?)
|
|
93
|
+
}
|
|
94
|
+
results << result
|
|
95
|
+
print_text_result(result) unless @options[:json]
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def finalize
|
|
99
|
+
if @options[:json]
|
|
100
|
+
@stdout.puts JSON.pretty_generate(results)
|
|
101
|
+
return
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
return if results.length < 2
|
|
105
|
+
|
|
106
|
+
# --per-run の集計行はデータ行と混ぜず標準エラーに出し、stdout を純粋な TSV に保つ。
|
|
107
|
+
print_text_result(total_result, io: @options[:per_run] ? @stderr : @stdout)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def results
|
|
111
|
+
@results ||= []
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def total_result
|
|
115
|
+
{
|
|
116
|
+
accession: 'total',
|
|
117
|
+
file_count: results.sum { |result| result[:file_count] },
|
|
118
|
+
total_size: results.sum { |result| result[:total_size] },
|
|
119
|
+
unresolved_count: results.sum { |result| result[:unresolved_count] }
|
|
120
|
+
}
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def print_text_result(result, io: @stdout)
|
|
124
|
+
emit_tsv_header(TSV_COLUMNS)
|
|
125
|
+
io.puts [
|
|
126
|
+
result[:accession],
|
|
127
|
+
result[:file_count],
|
|
128
|
+
formatted_size(result),
|
|
129
|
+
result[:unresolved_count]
|
|
130
|
+
].join("\t")
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def formatted_size(result)
|
|
134
|
+
size = result[:total_size]
|
|
135
|
+
return MISSING_VALUE if size.zero? && result[:unresolved_count].positive?
|
|
136
|
+
return size.to_s if @options[:bytes]
|
|
137
|
+
|
|
138
|
+
ByteFormatter.format(size)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def fetch_record_for_size(accession)
|
|
142
|
+
max_direct_runs = Config.size_max_direct_runs
|
|
143
|
+
ddbj_record = @resolver.fetch_record_for(accession)
|
|
144
|
+
validate_direct_run_expansion_size!(accession, ddbj_record, max_direct_runs)
|
|
145
|
+
ddbj_record
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def validate_direct_run_expansion_size!(accession, ddbj_record, max_direct_runs)
|
|
149
|
+
return unless max_direct_runs
|
|
150
|
+
|
|
151
|
+
direct_run_count = ddbj_record.fetch(DdbjRecordFields::DB_XREFS_KEY, []).count do |xref|
|
|
152
|
+
xref[DdbjRecordFields::TYPE_KEY] == DdbjRecordFields::SRA_RUN_RESOURCE_TYPE
|
|
153
|
+
end
|
|
154
|
+
return if direct_run_count <= max_direct_runs
|
|
155
|
+
|
|
156
|
+
raise InvalidRecordError,
|
|
157
|
+
"#{accession.to_s.upcase} has #{direct_run_count} direct runs; " \
|
|
158
|
+
"size expands at most #{max_direct_runs} direct runs from one parent accession. " \
|
|
159
|
+
"Use `#{Dratools::NAME} runs #{accession}` and pass narrower accessions."
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'base_command'
|
|
4
|
+
require_relative '../config'
|
|
5
|
+
require_relative '../tree_renderer'
|
|
6
|
+
|
|
7
|
+
module Dratools
|
|
8
|
+
module Commands
|
|
9
|
+
# accession から run へ辿る探索ツリーを表示する。
|
|
10
|
+
class TreeCommand < BaseCommand
|
|
11
|
+
private
|
|
12
|
+
|
|
13
|
+
def command_name
|
|
14
|
+
'tree'
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def usage_examples
|
|
18
|
+
[
|
|
19
|
+
"#{Dratools::NAME} tree PRJNA341783",
|
|
20
|
+
"#{Dratools::NAME} tree --type fastq PRJNA341783"
|
|
21
|
+
]
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def process(accession)
|
|
25
|
+
direct_run_fetch_limit = Config.tree_max_direct_runs
|
|
26
|
+
tree = @resolver.resolve_tree(
|
|
27
|
+
accession,
|
|
28
|
+
file_type: @options[:file_type],
|
|
29
|
+
direct_run_fetch_limit: direct_run_fetch_limit
|
|
30
|
+
)
|
|
31
|
+
@stdout.puts TreeRenderer.new(
|
|
32
|
+
file_type: @options[:file_type],
|
|
33
|
+
summary_threshold: summary_threshold(direct_run_fetch_limit)
|
|
34
|
+
).render(tree)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def summary_threshold(direct_run_fetch_limit)
|
|
38
|
+
[
|
|
39
|
+
TreeRenderer::DEFAULT_SUMMARY_THRESHOLD,
|
|
40
|
+
direct_run_fetch_limit
|
|
41
|
+
].compact.min
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
require_relative '../config'
|
|
6
|
+
require_relative '../ddbj_record_fields'
|
|
7
|
+
require_relative 'base_command'
|
|
8
|
+
|
|
9
|
+
module Dratools
|
|
10
|
+
module Commands
|
|
11
|
+
# ダウンロード URL を表示する(テキストまたは JSON)。
|
|
12
|
+
class UrlCommand < BaseCommand
|
|
13
|
+
TSV_COLUMNS = %w[run_accession type url size md5].freeze
|
|
14
|
+
|
|
15
|
+
private
|
|
16
|
+
|
|
17
|
+
def command_name
|
|
18
|
+
'url'
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def default_options
|
|
22
|
+
super.merge(protocol: DEFAULT_PROTOCOL, json: false, tsv: false)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def configure_parser(parser)
|
|
26
|
+
add_protocol_option(parser)
|
|
27
|
+
parser.on('--tsv', 'run_accession, type, url, size, md5 を TAB 区切りで表示する') do
|
|
28
|
+
@options[:tsv] = true
|
|
29
|
+
end
|
|
30
|
+
parser.on('--json', 'URL 情報を JSON で表示する') { @options[:json] = true }
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def usage_examples
|
|
34
|
+
[
|
|
35
|
+
"#{Dratools::NAME} url DRR000001",
|
|
36
|
+
"#{Dratools::NAME} url --protocol ftp DRR000001",
|
|
37
|
+
"#{Dratools::NAME} url --tsv DRR000001 | grep -v '^#' | cut -f3",
|
|
38
|
+
"#{Dratools::NAME} url --json DRR000001 DRR000002"
|
|
39
|
+
]
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def validate_options
|
|
43
|
+
super
|
|
44
|
+
validate_protocol
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def process(accession)
|
|
48
|
+
ddbj_record = fetch_record_for_url(accession)
|
|
49
|
+
downloads = @resolver.resolve_downloads_from_record(
|
|
50
|
+
accession,
|
|
51
|
+
ddbj_record,
|
|
52
|
+
file_type: @options[:file_type]
|
|
53
|
+
)
|
|
54
|
+
if @options[:json]
|
|
55
|
+
json_buffer.concat(downloads.map { |download| download_to_hash(download) })
|
|
56
|
+
elsif @options[:tsv]
|
|
57
|
+
emit_tsv_header(TSV_COLUMNS)
|
|
58
|
+
downloads.each { |download| @stdout.puts tsv_row(download) }
|
|
59
|
+
else
|
|
60
|
+
downloads.each { |download| @stdout.puts download.url_for_protocol(@options[:protocol]) }
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def finalize
|
|
65
|
+
return unless @options[:json]
|
|
66
|
+
|
|
67
|
+
@stdout.puts JSON.pretty_generate(json_buffer)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def json_buffer
|
|
71
|
+
@json_buffer ||= []
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def fetch_record_for_url(accession)
|
|
75
|
+
max_direct_runs = Config.url_max_direct_runs
|
|
76
|
+
ddbj_record = @resolver.fetch_record_for(accession)
|
|
77
|
+
validate_direct_run_expansion_size!(accession, ddbj_record, max_direct_runs)
|
|
78
|
+
ddbj_record
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def validate_direct_run_expansion_size!(accession, ddbj_record, max_direct_runs)
|
|
82
|
+
return unless max_direct_runs
|
|
83
|
+
|
|
84
|
+
direct_run_count = ddbj_record.fetch(DdbjRecordFields::DB_XREFS_KEY, []).count do |xref|
|
|
85
|
+
xref[DdbjRecordFields::TYPE_KEY] == DdbjRecordFields::SRA_RUN_RESOURCE_TYPE
|
|
86
|
+
end
|
|
87
|
+
return if direct_run_count <= max_direct_runs
|
|
88
|
+
|
|
89
|
+
raise InvalidRecordError,
|
|
90
|
+
"#{accession.to_s.upcase} has #{direct_run_count} direct runs; " \
|
|
91
|
+
"url expands at most #{max_direct_runs} direct runs from one parent accession. " \
|
|
92
|
+
"Use `#{Dratools::NAME} runs #{accession}` and pass narrower accessions, " \
|
|
93
|
+
"or set #{Config::URL_MAX_DIRECT_RUNS_ENV}=unlimited."
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def tsv_row(download)
|
|
97
|
+
[
|
|
98
|
+
download.run_accession,
|
|
99
|
+
download.type,
|
|
100
|
+
download.url_for_protocol(@options[:protocol]),
|
|
101
|
+
download.size || MISSING_VALUE,
|
|
102
|
+
download.md5 || MISSING_VALUE
|
|
103
|
+
].join("\t")
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def download_to_hash(download)
|
|
107
|
+
{
|
|
108
|
+
run_accession: download.run_accession,
|
|
109
|
+
type: download.type,
|
|
110
|
+
url: download.url,
|
|
111
|
+
ftp_url: download.ftp_url,
|
|
112
|
+
size: download.size,
|
|
113
|
+
md5: download.md5
|
|
114
|
+
}
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'errors'
|
|
4
|
+
|
|
5
|
+
module Dratools
|
|
6
|
+
# Reads advanced configuration from environment variables.
|
|
7
|
+
module Config
|
|
8
|
+
MAX_RECURSIVE_NON_RUN_XREFS_ENV = 'DRATOOLS_MAX_RECURSIVE_NON_RUN_XREFS'
|
|
9
|
+
TREE_MAX_DIRECT_RUNS_ENV = 'DRATOOLS_TREE_MAX_DIRECT_RUNS'
|
|
10
|
+
URL_MAX_DIRECT_RUNS_ENV = 'DRATOOLS_URL_MAX_DIRECT_RUNS'
|
|
11
|
+
SIZE_MAX_DIRECT_RUNS_ENV = 'DRATOOLS_SIZE_MAX_DIRECT_RUNS'
|
|
12
|
+
DOWNLOAD_CONNECT_TIMEOUT_ENV = 'DRATOOLS_DOWNLOAD_CONNECT_TIMEOUT'
|
|
13
|
+
DOWNLOAD_STALL_TIMEOUT_ENV = 'DRATOOLS_DOWNLOAD_STALL_TIMEOUT'
|
|
14
|
+
DOWNLOAD_STALL_SPEED_ENV = 'DRATOOLS_DOWNLOAD_STALL_SPEED'
|
|
15
|
+
DOWNLOAD_RETRY_COUNT_ENV = 'DRATOOLS_DOWNLOAD_RETRY_COUNT'
|
|
16
|
+
DOWNLOAD_RETRY_WAIT_ENV = 'DRATOOLS_DOWNLOAD_RETRY_WAIT'
|
|
17
|
+
|
|
18
|
+
DEFAULT_MAX_RECURSIVE_NON_RUN_XREFS = 100
|
|
19
|
+
DEFAULT_TREE_MAX_DIRECT_RUNS = 50
|
|
20
|
+
DEFAULT_URL_MAX_DIRECT_RUNS = 50
|
|
21
|
+
DEFAULT_SIZE_MAX_DIRECT_RUNS = 50
|
|
22
|
+
DEFAULT_DOWNLOAD_CONNECT_TIMEOUT_SECONDS = 30
|
|
23
|
+
DEFAULT_DOWNLOAD_STALL_TIMEOUT_SECONDS = 60
|
|
24
|
+
DEFAULT_DOWNLOAD_STALL_SPEED_BYTES_PER_SECOND = 1024
|
|
25
|
+
DEFAULT_DOWNLOAD_RETRY_COUNT = 3
|
|
26
|
+
DEFAULT_DOWNLOAD_RETRY_WAIT_SECONDS = 5
|
|
27
|
+
UNLIMITED_VALUE = 'unlimited'
|
|
28
|
+
|
|
29
|
+
module_function
|
|
30
|
+
|
|
31
|
+
def max_recursive_non_run_xrefs
|
|
32
|
+
positive_integer_or_unlimited(
|
|
33
|
+
MAX_RECURSIVE_NON_RUN_XREFS_ENV,
|
|
34
|
+
DEFAULT_MAX_RECURSIVE_NON_RUN_XREFS
|
|
35
|
+
)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def tree_max_direct_runs
|
|
39
|
+
positive_integer_or_unlimited(
|
|
40
|
+
TREE_MAX_DIRECT_RUNS_ENV,
|
|
41
|
+
DEFAULT_TREE_MAX_DIRECT_RUNS
|
|
42
|
+
)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def url_max_direct_runs
|
|
46
|
+
positive_integer_or_unlimited(URL_MAX_DIRECT_RUNS_ENV, DEFAULT_URL_MAX_DIRECT_RUNS)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def size_max_direct_runs
|
|
50
|
+
positive_integer_or_unlimited(SIZE_MAX_DIRECT_RUNS_ENV, DEFAULT_SIZE_MAX_DIRECT_RUNS)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def download_connect_timeout_seconds
|
|
54
|
+
positive_integer(DOWNLOAD_CONNECT_TIMEOUT_ENV, DEFAULT_DOWNLOAD_CONNECT_TIMEOUT_SECONDS)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def download_stall_timeout_seconds
|
|
58
|
+
positive_integer(DOWNLOAD_STALL_TIMEOUT_ENV, DEFAULT_DOWNLOAD_STALL_TIMEOUT_SECONDS)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def download_stall_speed_bytes_per_second
|
|
62
|
+
positive_integer(DOWNLOAD_STALL_SPEED_ENV, DEFAULT_DOWNLOAD_STALL_SPEED_BYTES_PER_SECOND)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def download_retry_count
|
|
66
|
+
non_negative_integer(DOWNLOAD_RETRY_COUNT_ENV, DEFAULT_DOWNLOAD_RETRY_COUNT)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def download_retry_wait_seconds
|
|
70
|
+
positive_integer(DOWNLOAD_RETRY_WAIT_ENV, DEFAULT_DOWNLOAD_RETRY_WAIT_SECONDS)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def positive_integer_or_unlimited(name, default)
|
|
74
|
+
value = ENV.fetch(name, '').strip
|
|
75
|
+
return default if value.empty?
|
|
76
|
+
return nil if value.casecmp?(UNLIMITED_VALUE)
|
|
77
|
+
|
|
78
|
+
integer = Integer(value, 10)
|
|
79
|
+
return integer if integer.positive?
|
|
80
|
+
|
|
81
|
+
invalid_environment_value!(name, value, "positive integer or #{UNLIMITED_VALUE}")
|
|
82
|
+
rescue ArgumentError
|
|
83
|
+
invalid_environment_value!(name, value, "positive integer or #{UNLIMITED_VALUE}")
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def positive_integer(name, default)
|
|
87
|
+
value = ENV.fetch(name, '').strip
|
|
88
|
+
return default if value.empty?
|
|
89
|
+
|
|
90
|
+
integer = Integer(value, 10)
|
|
91
|
+
return integer if integer.positive?
|
|
92
|
+
|
|
93
|
+
invalid_environment_value!(name, value, 'positive integer')
|
|
94
|
+
rescue ArgumentError
|
|
95
|
+
invalid_environment_value!(name, value, 'positive integer')
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def non_negative_integer(name, default)
|
|
99
|
+
value = ENV.fetch(name, '').strip
|
|
100
|
+
return default if value.empty?
|
|
101
|
+
|
|
102
|
+
integer = Integer(value, 10)
|
|
103
|
+
return integer unless integer.negative?
|
|
104
|
+
|
|
105
|
+
invalid_environment_value!(name, value, 'non-negative integer')
|
|
106
|
+
rescue ArgumentError
|
|
107
|
+
invalid_environment_value!(name, value, 'non-negative integer')
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def invalid_environment_value!(name, value, expected)
|
|
111
|
+
raise InvalidOptionError, "invalid #{name} '#{value}' (expected: #{expected})"
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|