miga-base 1.3.9.1 → 1.3.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/ncbi.rb +19 -2
- data/lib/miga/cli/action/get.rb +1 -1
- data/lib/miga/json.rb +14 -5
- data/lib/miga/remote_dataset/base.rb +5 -5
- data/lib/miga/remote_dataset/download.rb +32 -17
- data/lib/miga/result/stats.rb +28 -41
- data/lib/miga/version.rb +1 -1
- data/test/net_test.rb +20 -0
- data/test/remote_dataset_test.rb +1 -1
- metadata +15 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 32295b80e344eec3e534bfef0de472a19c14674c93a50ac6c066a3690be7499c
|
|
4
|
+
data.tar.gz: fa44c75572f39ae7dc60dabcdc2fcf11d2c17b5a4120dea0ea6fa66a4dc915ff
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d3fd8c46e2daf0b0e6ee82435bc91b5d62784a36f2f2e0cff1b254d335dc6cecb9b5aacef2c982c743d026d87163049611a2bc73f99245f5253af8331be284b6
|
|
7
|
+
data.tar.gz: c3b3c69514dbb2cc035b78380f49789483ff96d510f7b044602e487512911bd8f6243451c38d473e27581a986302ae6f10e2ba4f464886c22273e6ef4ed066aa
|
|
@@ -26,6 +26,9 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
|
26
26
|
cli[:scaffold] = true
|
|
27
27
|
cli[:contig] = true
|
|
28
28
|
end
|
|
29
|
+
opt.on('--ncbi-list-json STRING', '::HIDE::') do |v|
|
|
30
|
+
cli[:ncbi_list_json] = v
|
|
31
|
+
end
|
|
29
32
|
end
|
|
30
33
|
|
|
31
34
|
def cli_name_modifiers(opt)
|
|
@@ -35,7 +38,9 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
|
35
38
|
'Only affects --complete and --chromosome'
|
|
36
39
|
) { |v| cli[:add_version] = v }
|
|
37
40
|
# For backwards compatibility
|
|
38
|
-
|
|
41
|
+
opt.on('--legacy-name', '::HIDE::') do
|
|
42
|
+
warn 'Deprecated flag --legacy-name ignored'
|
|
43
|
+
end
|
|
39
44
|
end
|
|
40
45
|
|
|
41
46
|
def sanitize_cli
|
|
@@ -49,6 +54,11 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
|
49
54
|
end
|
|
50
55
|
|
|
51
56
|
def remote_list
|
|
57
|
+
if cli[:ncbi_list_json] && File.size?(cli[:ncbi_list_json])
|
|
58
|
+
cli.say "Reusing remote list: #{cli[:ncbi_list_json]}"
|
|
59
|
+
return MiGA::Json.parse(cli[:ncbi_list_json])
|
|
60
|
+
end
|
|
61
|
+
|
|
52
62
|
list = {}
|
|
53
63
|
query = remote_list_query
|
|
54
64
|
loop do
|
|
@@ -66,6 +76,12 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
|
66
76
|
break unless page[:next_page_token]
|
|
67
77
|
query[:page_token] = page[:next_page_token]
|
|
68
78
|
end
|
|
79
|
+
|
|
80
|
+
if cli[:ncbi_list_json]
|
|
81
|
+
cli.say "Saving remote list: #{cli[:ncbi_list_json]}"
|
|
82
|
+
MiGA::Json.generate_plain(list, cli[:ncbi_list_json])
|
|
83
|
+
end
|
|
84
|
+
|
|
69
85
|
list
|
|
70
86
|
end
|
|
71
87
|
|
|
@@ -80,7 +96,8 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
|
80
96
|
ds[n] = {
|
|
81
97
|
ids: [asm], db: :assembly, universe: :ncbi,
|
|
82
98
|
md: {
|
|
83
|
-
type: :genome, ncbi_asm: asm,
|
|
99
|
+
type: :genome, ncbi_asm: asm,
|
|
100
|
+
strain: r.dig(:organism, :infraspecific_names, :strain)
|
|
84
101
|
}
|
|
85
102
|
}
|
|
86
103
|
date = r.dig(:assembly_info, :release_date)
|
data/lib/miga/cli/action/get.rb
CHANGED
|
@@ -14,7 +14,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
|
|
|
14
14
|
opt.on(
|
|
15
15
|
'-I', '--ids ID1,ID2,...', Array,
|
|
16
16
|
'(Mandatory unless -F) IDs in the remote database separated by commas'
|
|
17
|
-
) { |v| cli[:ids] = v }
|
|
17
|
+
) { |v| cli[:ids] = v.map(&:strip) }
|
|
18
18
|
opt.on(
|
|
19
19
|
'-U', '--universe STRING',
|
|
20
20
|
"Universe of the remote database. By default: #{cli[:universe]}",
|
data/lib/miga/json.rb
CHANGED
|
@@ -65,17 +65,26 @@ class MiGA::Json < MiGA::MiGA
|
|
|
65
65
|
# Generates and returns prettyfied JSON to represent +obj+.
|
|
66
66
|
# If +path+ is passed, it saves the JSON in that file.
|
|
67
67
|
def generate(obj, path = nil)
|
|
68
|
-
|
|
69
|
-
File.open(path, 'w') { |fh| fh.print y } unless path.nil?
|
|
70
|
-
y
|
|
68
|
+
generate_generic(:pretty_generate, obj, path)
|
|
71
69
|
end
|
|
72
70
|
|
|
73
71
|
##
|
|
74
72
|
# Generates and returns plain JSON to represent +obj+.
|
|
75
73
|
# If +path+ is passed, it saves the JSON in that file.
|
|
76
74
|
def generate_plain(obj, path = nil)
|
|
77
|
-
|
|
78
|
-
|
|
75
|
+
generate_generic(:generate, obj, path)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
def generate_generic(method, obj, path)
|
|
81
|
+
y = JSON.send(method, obj)
|
|
82
|
+
return y unless path
|
|
83
|
+
|
|
84
|
+
io = StringIO.new(y)
|
|
85
|
+
File.open(path, 'w') do |fh|
|
|
86
|
+
fh.print(io.read(1024)) until io.eof?
|
|
87
|
+
end
|
|
79
88
|
y
|
|
80
89
|
end
|
|
81
90
|
end
|
|
@@ -64,8 +64,8 @@ module MiGA::RemoteDataset::Base
|
|
|
64
64
|
},
|
|
65
65
|
gtdb: {
|
|
66
66
|
dbs: {
|
|
67
|
-
# This is a dummy entry plugged directly to +
|
|
68
|
-
assembly: { stage: :assembly, format: :
|
|
67
|
+
# This is a dummy entry plugged directly to +ncbi_asm_get+
|
|
68
|
+
assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
|
|
69
69
|
# The 'taxon' namespace actually returns a list of genomes (+format+)
|
|
70
70
|
taxon: {
|
|
71
71
|
stage: :metadata, format: :genomes, map_to: [:assembly],
|
|
@@ -84,8 +84,8 @@ module MiGA::RemoteDataset::Base
|
|
|
84
84
|
},
|
|
85
85
|
seqcode: {
|
|
86
86
|
dbs: {
|
|
87
|
-
# These are dummy entries plugged directly to +ncbi_*
|
|
88
|
-
assembly: { stage: :assembly, format: :
|
|
87
|
+
# These are dummy entries plugged directly to +ncbi_*_get+
|
|
88
|
+
assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
|
|
89
89
|
nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
|
|
90
90
|
# This is the list of type genomes
|
|
91
91
|
:'type-genomes' => { stage: :metadata, format: :json }
|
|
@@ -100,7 +100,7 @@ module MiGA::RemoteDataset::Base
|
|
|
100
100
|
ncbi: {
|
|
101
101
|
dbs: {
|
|
102
102
|
nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
|
|
103
|
-
assembly: { stage: :assembly, format: :
|
|
103
|
+
assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
|
|
104
104
|
taxonomy: { stage: :metadata, format: :xml }
|
|
105
105
|
},
|
|
106
106
|
uri: lambda do |opts|
|
|
@@ -19,6 +19,10 @@ class MiGA::RemoteDataset
|
|
|
19
19
|
getter = database_hash[:getter] || :download
|
|
20
20
|
action = database_hash[:method] || universe_hash[:method]
|
|
21
21
|
|
|
22
|
+
# Clean IDs
|
|
23
|
+
ids =
|
|
24
|
+
|
|
25
|
+
# Return options
|
|
22
26
|
{
|
|
23
27
|
universe: universe, db: db, ids: ids.is_a?(Array) ? ids : [ids],
|
|
24
28
|
format: format, file: file, obj: obj,
|
|
@@ -50,22 +54,37 @@ class MiGA::RemoteDataset
|
|
|
50
54
|
# Supported +opts+ (Hash) include:
|
|
51
55
|
# +obj+ (mandatory): MiGA::RemoteDataset
|
|
52
56
|
# +ids+ (mandatory): String or Array of String
|
|
53
|
-
# +file
|
|
57
|
+
# +file+ (mandatory): String, assembly saved here
|
|
54
58
|
# +extra+: Hash, passed to download
|
|
55
|
-
# +format+: String,
|
|
59
|
+
# +format+: String, ignored
|
|
56
60
|
def ncbi_asm_get(opts)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
raise MiGA::RemoteDataMissingError.new(
|
|
60
|
-
"Missing ftppath_genbank in NCBI Assembly JSON"
|
|
61
|
-
)
|
|
62
|
-
end
|
|
61
|
+
require 'tempfile'
|
|
62
|
+
require 'zip'
|
|
63
63
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
:
|
|
67
|
-
opts[:format], opts[:file], opts[:extra], opts[:obj]
|
|
64
|
+
zipped = download(
|
|
65
|
+
:ncbi_datasets_download, :genome, opts[:ids],
|
|
66
|
+
:zip, nil, opts[:extra], opts[:obj]
|
|
68
67
|
)
|
|
68
|
+
zip_tmp = Tempfile.new('asm.zip')
|
|
69
|
+
zip_tmp.puts zipped
|
|
70
|
+
zip_tmp.close
|
|
71
|
+
|
|
72
|
+
o = ''
|
|
73
|
+
ofh = opts[:file] ? File.open(opts[:file], 'w') : nil
|
|
74
|
+
Zip::File.open(zip_tmp.path) do |zfh|
|
|
75
|
+
zfh.each do |entry|
|
|
76
|
+
if entry.file? && entry.name =~ /_genomic\.fna$/
|
|
77
|
+
DEBUG "Extracting: #{entry.name}"
|
|
78
|
+
entry.get_input_stream do |ifh|
|
|
79
|
+
cont = ifh.read
|
|
80
|
+
ofh&.puts cont
|
|
81
|
+
o += cont
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
ofh&.close
|
|
87
|
+
o
|
|
69
88
|
end
|
|
70
89
|
|
|
71
90
|
##
|
|
@@ -77,11 +96,7 @@ class MiGA::RemoteDataset
|
|
|
77
96
|
return o unless o.strip.empty?
|
|
78
97
|
|
|
79
98
|
MiGA::MiGA.DEBUG 'Empty sequence, attempting download from NCBI assembly'
|
|
80
|
-
opts[:format] = :
|
|
81
|
-
if opts[:file]
|
|
82
|
-
File.unlink(opts[:file]) if File.exist? opts[:file]
|
|
83
|
-
opts[:file] = "#{opts[:file]}.gz"
|
|
84
|
-
end
|
|
99
|
+
opts[:format] = :fasta
|
|
85
100
|
ncbi_asm_get(opts)
|
|
86
101
|
end
|
|
87
102
|
|
data/lib/miga/result/stats.rb
CHANGED
|
@@ -29,33 +29,15 @@ module MiGA::Result::Stats
|
|
|
29
29
|
seq_opts = { gc: true, x: true, skew: true }
|
|
30
30
|
if self[:files][:pair1].nil?
|
|
31
31
|
s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, seq_opts)
|
|
32
|
-
stats =
|
|
33
|
-
reads: s[:n],
|
|
34
|
-
length_average: [s[:avg], 'bp'],
|
|
35
|
-
length_standard_deviation: [s[:sd], 'bp'],
|
|
36
|
-
g_c_content: [s[:gc], '%'],
|
|
37
|
-
x_content: [s[:x], '%'],
|
|
38
|
-
g_c_skew: [s[:gc_skew], '%'],
|
|
39
|
-
a_t_skew: [s[:at_skew], '%']
|
|
40
|
-
}
|
|
32
|
+
stats = seqs_length_as_stats_hash(s)
|
|
41
33
|
else
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
forward_x_content: [s1[:x], '%'],
|
|
50
|
-
forward_g_c_skew: [s1[:gc_skew], '%'],
|
|
51
|
-
forward_a_t_skew: [s1[:at_skew], '%'],
|
|
52
|
-
reverse_length_average: [s2[:avg], 'bp'],
|
|
53
|
-
reverse_length_standard_deviation: [s2[:sd], 'bp'],
|
|
54
|
-
reverse_g_c_content: [s2[:gc], '%'],
|
|
55
|
-
reverse_x_content: [s2[:x], '%'],
|
|
56
|
-
reverse_g_c_skew: [s2[:gc_skew], '%'],
|
|
57
|
-
reverse_a_t_skew: [s2[:at_skew], '%']
|
|
58
|
-
}
|
|
34
|
+
stats = { read_pairs: nil }
|
|
35
|
+
{ pair1: :forward, pair2: :reverse }.each do |pair, direction|
|
|
36
|
+
s = MiGA::MiGA.seqs_length(file_path(pair), :fastq, seq_opts)
|
|
37
|
+
seqs_length_as_stats_hash(s).each do |k, v|
|
|
38
|
+
stats[k == :reads ? :read_pairs : :"#{direction}_#{k}"] ||= v
|
|
39
|
+
end
|
|
40
|
+
end
|
|
59
41
|
end
|
|
60
42
|
stats
|
|
61
43
|
end
|
|
@@ -63,15 +45,7 @@ module MiGA::Result::Stats
|
|
|
63
45
|
def compute_stats_trimmed_fasta
|
|
64
46
|
f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
|
|
65
47
|
s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true, skew: true)
|
|
66
|
-
|
|
67
|
-
reads: s[:n],
|
|
68
|
-
length_average: [s[:avg], 'bp'],
|
|
69
|
-
length_standard_deviation: [s[:sd], 'bp'],
|
|
70
|
-
g_c_content: [s[:gc], '%'],
|
|
71
|
-
x_content: [s[:x], '%'],
|
|
72
|
-
g_c_skew: [s[:gc_skew], '%'],
|
|
73
|
-
a_t_skew: [s[:at_skew], '%']
|
|
74
|
-
}
|
|
48
|
+
seqs_length_as_stats_hash(s)
|
|
75
49
|
end
|
|
76
50
|
|
|
77
51
|
def compute_stats_assembly
|
|
@@ -79,16 +53,17 @@ module MiGA::Result::Stats
|
|
|
79
53
|
file_path(:largecontigs), :fasta,
|
|
80
54
|
n50: true, gc: true, x: true, skew: true
|
|
81
55
|
)
|
|
56
|
+
h = seqs_length_as_stats_hash(s)
|
|
82
57
|
{
|
|
83
58
|
contigs: s[:n],
|
|
84
59
|
n50: [s[:n50], 'bp'],
|
|
85
60
|
total_length: [s[:tot], 'bp'],
|
|
86
|
-
longest_sequence: [s[:max], 'bp']
|
|
87
|
-
|
|
88
|
-
x_content
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
61
|
+
longest_sequence: [s[:max], 'bp']
|
|
62
|
+
}.tap do |stats|
|
|
63
|
+
%i[g_c_content x_content g_c_skew a_t_skew].each do |i|
|
|
64
|
+
stats[i] = h[i]
|
|
65
|
+
end
|
|
66
|
+
end
|
|
92
67
|
end
|
|
93
68
|
|
|
94
69
|
def compute_stats_cds
|
|
@@ -253,4 +228,16 @@ module MiGA::Result::Stats
|
|
|
253
228
|
add_file(:raw_report, "#{source.name}.ess/log")
|
|
254
229
|
add_file(:report, "#{source.name}.ess/log.domain")
|
|
255
230
|
end
|
|
231
|
+
|
|
232
|
+
def seqs_length_as_stats_hash(s)
|
|
233
|
+
{
|
|
234
|
+
reads: s[:n],
|
|
235
|
+
length_average: [s[:avg], 'bp'],
|
|
236
|
+
length_standard_deviation: [s[:sd], 'bp'],
|
|
237
|
+
g_c_content: [s[:gc], '%'],
|
|
238
|
+
x_content: [s[:x], '%'],
|
|
239
|
+
g_c_skew: [s[:gc_skew], '%'],
|
|
240
|
+
a_t_skew: [s[:at_skew], '%']
|
|
241
|
+
}
|
|
242
|
+
end
|
|
256
243
|
end
|
data/lib/miga/version.rb
CHANGED
|
@@ -12,7 +12,7 @@ module MiGA
|
|
|
12
12
|
# - String indicating release status:
|
|
13
13
|
# - rc* release candidate, not released as gem
|
|
14
14
|
# - [0-9]+ stable release, released as gem
|
|
15
|
-
VERSION = [1.3, 9,
|
|
15
|
+
VERSION = [1.3, 9, 3].freeze
|
|
16
16
|
|
|
17
17
|
##
|
|
18
18
|
# Nickname for the current major.minor version.
|
data/test/net_test.rb
CHANGED
|
@@ -47,4 +47,24 @@ class FormatTest < Test::Unit::TestCase
|
|
|
47
47
|
### m.download_file_ftp(:miga_db, '../api_test.txt', f)
|
|
48
48
|
### assert_equal('miga', File.read(f).chomp)
|
|
49
49
|
end
|
|
50
|
+
|
|
51
|
+
def test_encoding
|
|
52
|
+
# Test original encoding
|
|
53
|
+
t1 = '()!@*#àøo'
|
|
54
|
+
t2 = "#{t1}"
|
|
55
|
+
assert_equal(t1, t2)
|
|
56
|
+
assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
|
|
57
|
+
|
|
58
|
+
# Test with a different encoding
|
|
59
|
+
t2 = t2.encode('windows-1252')
|
|
60
|
+
assert_equal('Windows-1252', t2.encoding.to_s)
|
|
61
|
+
assert_not_equal(t1, t2)
|
|
62
|
+
assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
|
|
63
|
+
|
|
64
|
+
# Test with a different encoding wrongly declared
|
|
65
|
+
t2.force_encoding('utf-8')
|
|
66
|
+
assert_equal('UTF-8', t2.encoding.to_s)
|
|
67
|
+
assert_not_equal(t1, t2)
|
|
68
|
+
assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
|
|
69
|
+
end
|
|
50
70
|
end
|
data/test/remote_dataset_test.rb
CHANGED
|
@@ -142,7 +142,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
|
142
142
|
|
|
143
143
|
def test_missing_data
|
|
144
144
|
declare_remote_access
|
|
145
|
-
rd = MiGA::RemoteDataset.new('
|
|
145
|
+
rd = MiGA::RemoteDataset.new('XYZ_GCA_000484975.1', :assembly, :ncbi)
|
|
146
146
|
assert_raise(MiGA::RemoteDataMissingError) { rd.save_to(project, 'bad') }
|
|
147
147
|
end
|
|
148
148
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: miga-base
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.3.9.
|
|
4
|
+
version: 1.3.9.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Luis M. Rodriguez-R
|
|
@@ -80,6 +80,20 @@ dependencies:
|
|
|
80
80
|
- - ">="
|
|
81
81
|
- !ruby/object:Gem::Version
|
|
82
82
|
version: '0'
|
|
83
|
+
- !ruby/object:Gem::Dependency
|
|
84
|
+
name: rubyzip
|
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - ">="
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: '0'
|
|
90
|
+
type: :runtime
|
|
91
|
+
prerelease: false
|
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
93
|
+
requirements:
|
|
94
|
+
- - ">="
|
|
95
|
+
- !ruby/object:Gem::Version
|
|
96
|
+
version: '0'
|
|
83
97
|
- !ruby/object:Gem::Dependency
|
|
84
98
|
name: rake
|
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|