miga-base 1.3.9.2 → 1.3.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/ncbi.rb +1 -3
- data/lib/miga/cli/action/get.rb +1 -1
- data/lib/miga/json.rb +14 -5
- data/lib/miga/remote_dataset/base.rb +5 -5
- data/lib/miga/remote_dataset/download.rb +33 -17
- data/lib/miga/version.rb +1 -1
- data/test/net_test.rb +20 -0
- data/test/remote_dataset_test.rb +1 -1
- metadata +15 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 436166fe9bbc76b69f014879610440e9646261f1a06dbc134418b79fa3da9b08
|
4
|
+
data.tar.gz: fb835c9bfa4829a3ac1c5ae8bdc1c5998cd8b9c4a55d3988538b9c24b2f4329f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 77e76a0dea664321aad58b4eee310d91cd9fa34c6ae0339451c83d2d169cbf9c160ed6c469401d0155280a28612ae8cde47a0c00be739371986f64621e44ef68
|
7
|
+
data.tar.gz: cdece24eb38d804b729c3c7f9018f7f3e835db258a59abe1ce560baf8bce29223a957f8498b96d6a647994361903d2d8179f2c6e6890451923dd44c5ae6cbcd8
|
@@ -79,9 +79,7 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
79
79
|
|
80
80
|
if cli[:ncbi_list_json]
|
81
81
|
cli.say "Saving remote list: #{cli[:ncbi_list_json]}"
|
82
|
-
|
83
|
-
fh.puts MiGA::Json.generate_plain(list)
|
84
|
-
end
|
82
|
+
MiGA::Json.generate_plain(list, cli[:ncbi_list_json])
|
85
83
|
end
|
86
84
|
|
87
85
|
list
|
data/lib/miga/cli/action/get.rb
CHANGED
@@ -14,7 +14,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
|
|
14
14
|
opt.on(
|
15
15
|
'-I', '--ids ID1,ID2,...', Array,
|
16
16
|
'(Mandatory unless -F) IDs in the remote database separated by commas'
|
17
|
-
) { |v| cli[:ids] = v }
|
17
|
+
) { |v| cli[:ids] = v.map(&:strip) }
|
18
18
|
opt.on(
|
19
19
|
'-U', '--universe STRING',
|
20
20
|
"Universe of the remote database. By default: #{cli[:universe]}",
|
data/lib/miga/json.rb
CHANGED
@@ -65,17 +65,26 @@ class MiGA::Json < MiGA::MiGA
|
|
65
65
|
# Generates and returns prettyfied JSON to represent +obj+.
|
66
66
|
# If +path+ is passed, it saves the JSON in that file.
|
67
67
|
def generate(obj, path = nil)
|
68
|
-
|
69
|
-
File.open(path, 'w') { |fh| fh.print y } unless path.nil?
|
70
|
-
y
|
68
|
+
generate_generic(:pretty_generate, obj, path)
|
71
69
|
end
|
72
70
|
|
73
71
|
##
|
74
72
|
# Generates and returns plain JSON to represent +obj+.
|
75
73
|
# If +path+ is passed, it saves the JSON in that file.
|
76
74
|
def generate_plain(obj, path = nil)
|
77
|
-
|
78
|
-
|
75
|
+
generate_generic(:generate, obj, path)
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def generate_generic(method, obj, path)
|
81
|
+
y = JSON.send(method, obj)
|
82
|
+
return y unless path
|
83
|
+
|
84
|
+
io = StringIO.new(y)
|
85
|
+
File.open(path, 'w') do |fh|
|
86
|
+
fh.print(io.read(1024)) until io.eof?
|
87
|
+
end
|
79
88
|
y
|
80
89
|
end
|
81
90
|
end
|
@@ -64,8 +64,8 @@ module MiGA::RemoteDataset::Base
|
|
64
64
|
},
|
65
65
|
gtdb: {
|
66
66
|
dbs: {
|
67
|
-
# This is a dummy entry plugged directly to +
|
68
|
-
assembly: { stage: :assembly, format: :
|
67
|
+
# This is a dummy entry plugged directly to +ncbi_asm_get+
|
68
|
+
assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
|
69
69
|
# The 'taxon' namespace actually returns a list of genomes (+format+)
|
70
70
|
taxon: {
|
71
71
|
stage: :metadata, format: :genomes, map_to: [:assembly],
|
@@ -84,8 +84,8 @@ module MiGA::RemoteDataset::Base
|
|
84
84
|
},
|
85
85
|
seqcode: {
|
86
86
|
dbs: {
|
87
|
-
# These are dummy entries plugged directly to +ncbi_*
|
88
|
-
assembly: { stage: :assembly, format: :
|
87
|
+
# These are dummy entries plugged directly to +ncbi_*_get+
|
88
|
+
assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
|
89
89
|
nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
|
90
90
|
# This is the list of type genomes
|
91
91
|
:'type-genomes' => { stage: :metadata, format: :json }
|
@@ -100,7 +100,7 @@ module MiGA::RemoteDataset::Base
|
|
100
100
|
ncbi: {
|
101
101
|
dbs: {
|
102
102
|
nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
|
103
|
-
assembly: { stage: :assembly, format: :
|
103
|
+
assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
|
104
104
|
taxonomy: { stage: :metadata, format: :xml }
|
105
105
|
},
|
106
106
|
uri: lambda do |opts|
|
@@ -19,6 +19,10 @@ class MiGA::RemoteDataset
|
|
19
19
|
getter = database_hash[:getter] || :download
|
20
20
|
action = database_hash[:method] || universe_hash[:method]
|
21
21
|
|
22
|
+
# Clean IDs
|
23
|
+
ids =
|
24
|
+
|
25
|
+
# Return options
|
22
26
|
{
|
23
27
|
universe: universe, db: db, ids: ids.is_a?(Array) ? ids : [ids],
|
24
28
|
format: format, file: file, obj: obj,
|
@@ -50,22 +54,38 @@ class MiGA::RemoteDataset
|
|
50
54
|
# Supported +opts+ (Hash) include:
|
51
55
|
# +obj+ (mandatory): MiGA::RemoteDataset
|
52
56
|
# +ids+ (mandatory): String or Array of String
|
53
|
-
# +file
|
57
|
+
# +file+ (mandatory): String, assembly saved here
|
54
58
|
# +extra+: Hash, passed to download
|
55
|
-
# +format+: String,
|
59
|
+
# +format+: String, ignored
|
56
60
|
def ncbi_asm_get(opts)
|
57
|
-
|
58
|
-
|
59
|
-
raise MiGA::RemoteDataMissingError.new(
|
60
|
-
"Missing ftppath_genbank in NCBI Assembly JSON"
|
61
|
-
)
|
62
|
-
end
|
61
|
+
require 'tempfile'
|
62
|
+
require 'zip'
|
63
63
|
|
64
|
-
|
65
|
-
|
66
|
-
:
|
67
|
-
opts[:format], opts[:file], opts[:extra], opts[:obj]
|
64
|
+
zipped = download(
|
65
|
+
:ncbi_datasets_download, :genome, opts[:ids],
|
66
|
+
:zip, nil, opts[:extra], opts[:obj]
|
68
67
|
)
|
68
|
+
zip_tmp = Tempfile.new('asm.zip')
|
69
|
+
zip_tmp.print(zipped)
|
70
|
+
zip_tmp.close
|
71
|
+
|
72
|
+
o = ''
|
73
|
+
ofh = opts[:file] ? File.open(opts[:file], 'w') : nil
|
74
|
+
Zip::File.open(zip_tmp.path) do |zfh|
|
75
|
+
zfh.each do |entry|
|
76
|
+
if entry.file? && entry.name =~ /_genomic\.fna$/
|
77
|
+
DEBUG "Extracting: #{entry.name}"
|
78
|
+
entry.get_input_stream do |ifh|
|
79
|
+
cont = MiGA::MiGA.normalize_encoding(ifh.read) + "\n"
|
80
|
+
ofh&.print(cont)
|
81
|
+
o += cont
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
ofh&.close
|
87
|
+
File.unlink(zip_tmp.path)
|
88
|
+
o
|
69
89
|
end
|
70
90
|
|
71
91
|
##
|
@@ -77,11 +97,7 @@ class MiGA::RemoteDataset
|
|
77
97
|
return o unless o.strip.empty?
|
78
98
|
|
79
99
|
MiGA::MiGA.DEBUG 'Empty sequence, attempting download from NCBI assembly'
|
80
|
-
opts[:format] = :
|
81
|
-
if opts[:file]
|
82
|
-
File.unlink(opts[:file]) if File.exist? opts[:file]
|
83
|
-
opts[:file] = "#{opts[:file]}.gz"
|
84
|
-
end
|
100
|
+
opts[:format] = :fasta
|
85
101
|
ncbi_asm_get(opts)
|
86
102
|
end
|
87
103
|
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3, 9,
|
15
|
+
VERSION = [1.3, 9, 4].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
data/test/net_test.rb
CHANGED
@@ -47,4 +47,24 @@ class FormatTest < Test::Unit::TestCase
|
|
47
47
|
### m.download_file_ftp(:miga_db, '../api_test.txt', f)
|
48
48
|
### assert_equal('miga', File.read(f).chomp)
|
49
49
|
end
|
50
|
+
|
51
|
+
def test_encoding
|
52
|
+
# Test original encoding
|
53
|
+
t1 = '()!@*#àøo'
|
54
|
+
t2 = "#{t1}"
|
55
|
+
assert_equal(t1, t2)
|
56
|
+
assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
|
57
|
+
|
58
|
+
# Test with a different encoding
|
59
|
+
t2 = t2.encode('windows-1252')
|
60
|
+
assert_equal('Windows-1252', t2.encoding.to_s)
|
61
|
+
assert_not_equal(t1, t2)
|
62
|
+
assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
|
63
|
+
|
64
|
+
# Test with a different encoding wrongly declared
|
65
|
+
t2.force_encoding('utf-8')
|
66
|
+
assert_equal('UTF-8', t2.encoding.to_s)
|
67
|
+
assert_not_equal(t1, t2)
|
68
|
+
assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
|
69
|
+
end
|
50
70
|
end
|
data/test/remote_dataset_test.rb
CHANGED
@@ -142,7 +142,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
142
142
|
|
143
143
|
def test_missing_data
|
144
144
|
declare_remote_access
|
145
|
-
rd = MiGA::RemoteDataset.new('
|
145
|
+
rd = MiGA::RemoteDataset.new('XYZ_GCA_000484975.1', :assembly, :ncbi)
|
146
146
|
assert_raise(MiGA::RemoteDataMissingError) { rd.save_to(project, 'bad') }
|
147
147
|
end
|
148
148
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.9.
|
4
|
+
version: 1.3.9.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rubyzip
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '2.3'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '2.3'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: rake
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|