miga-base 1.3.9.2 → 1.3.9.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/ncbi.rb +1 -3
- data/lib/miga/cli/action/get.rb +1 -1
- data/lib/miga/json.rb +14 -5
- data/lib/miga/remote_dataset/base.rb +5 -5
- data/lib/miga/remote_dataset/download.rb +32 -17
- data/lib/miga/version.rb +1 -1
- data/test/net_test.rb +20 -0
- data/test/remote_dataset_test.rb +1 -1
- metadata +15 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 32295b80e344eec3e534bfef0de472a19c14674c93a50ac6c066a3690be7499c
|
4
|
+
data.tar.gz: fa44c75572f39ae7dc60dabcdc2fcf11d2c17b5a4120dea0ea6fa66a4dc915ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3fd8c46e2daf0b0e6ee82435bc91b5d62784a36f2f2e0cff1b254d335dc6cecb9b5aacef2c982c743d026d87163049611a2bc73f99245f5253af8331be284b6
|
7
|
+
data.tar.gz: c3b3c69514dbb2cc035b78380f49789483ff96d510f7b044602e487512911bd8f6243451c38d473e27581a986302ae6f10e2ba4f464886c22273e6ef4ed066aa
|
@@ -79,9 +79,7 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
79
79
|
|
80
80
|
if cli[:ncbi_list_json]
|
81
81
|
cli.say "Saving remote list: #{cli[:ncbi_list_json]}"
|
82
|
-
|
83
|
-
fh.puts MiGA::Json.generate_plain(list)
|
84
|
-
end
|
82
|
+
MiGA::Json.generate_plain(list, cli[:ncbi_list_json])
|
85
83
|
end
|
86
84
|
|
87
85
|
list
|
data/lib/miga/cli/action/get.rb
CHANGED
@@ -14,7 +14,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
|
|
14
14
|
opt.on(
|
15
15
|
'-I', '--ids ID1,ID2,...', Array,
|
16
16
|
'(Mandatory unless -F) IDs in the remote database separated by commas'
|
17
|
-
) { |v| cli[:ids] = v }
|
17
|
+
) { |v| cli[:ids] = v.map(&:strip) }
|
18
18
|
opt.on(
|
19
19
|
'-U', '--universe STRING',
|
20
20
|
"Universe of the remote database. By default: #{cli[:universe]}",
|
data/lib/miga/json.rb
CHANGED
@@ -65,17 +65,26 @@ class MiGA::Json < MiGA::MiGA
|
|
65
65
|
# Generates and returns prettyfied JSON to represent +obj+.
|
66
66
|
# If +path+ is passed, it saves the JSON in that file.
|
67
67
|
def generate(obj, path = nil)
|
68
|
-
|
69
|
-
File.open(path, 'w') { |fh| fh.print y } unless path.nil?
|
70
|
-
y
|
68
|
+
generate_generic(:pretty_generate, obj, path)
|
71
69
|
end
|
72
70
|
|
73
71
|
##
|
74
72
|
# Generates and returns plain JSON to represent +obj+.
|
75
73
|
# If +path+ is passed, it saves the JSON in that file.
|
76
74
|
def generate_plain(obj, path = nil)
|
77
|
-
|
78
|
-
|
75
|
+
generate_generic(:generate, obj, path)
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def generate_generic(method, obj, path)
|
81
|
+
y = JSON.send(method, obj)
|
82
|
+
return y unless path
|
83
|
+
|
84
|
+
io = StringIO.new(y)
|
85
|
+
File.open(path, 'w') do |fh|
|
86
|
+
fh.print(io.read(1024)) until io.eof?
|
87
|
+
end
|
79
88
|
y
|
80
89
|
end
|
81
90
|
end
|
@@ -64,8 +64,8 @@ module MiGA::RemoteDataset::Base
|
|
64
64
|
},
|
65
65
|
gtdb: {
|
66
66
|
dbs: {
|
67
|
-
# This is a dummy entry plugged directly to +
|
68
|
-
assembly: { stage: :assembly, format: :
|
67
|
+
# This is a dummy entry plugged directly to +ncbi_asm_get+
|
68
|
+
assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
|
69
69
|
# The 'taxon' namespace actually returns a list of genomes (+format+)
|
70
70
|
taxon: {
|
71
71
|
stage: :metadata, format: :genomes, map_to: [:assembly],
|
@@ -84,8 +84,8 @@ module MiGA::RemoteDataset::Base
|
|
84
84
|
},
|
85
85
|
seqcode: {
|
86
86
|
dbs: {
|
87
|
-
# These are dummy entries plugged directly to +ncbi_*
|
88
|
-
assembly: { stage: :assembly, format: :
|
87
|
+
# These are dummy entries plugged directly to +ncbi_*_get+
|
88
|
+
assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
|
89
89
|
nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
|
90
90
|
# This is the list of type genomes
|
91
91
|
:'type-genomes' => { stage: :metadata, format: :json }
|
@@ -100,7 +100,7 @@ module MiGA::RemoteDataset::Base
|
|
100
100
|
ncbi: {
|
101
101
|
dbs: {
|
102
102
|
nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
|
103
|
-
assembly: { stage: :assembly, format: :
|
103
|
+
assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
|
104
104
|
taxonomy: { stage: :metadata, format: :xml }
|
105
105
|
},
|
106
106
|
uri: lambda do |opts|
|
@@ -19,6 +19,10 @@ class MiGA::RemoteDataset
|
|
19
19
|
getter = database_hash[:getter] || :download
|
20
20
|
action = database_hash[:method] || universe_hash[:method]
|
21
21
|
|
22
|
+
# Clean IDs
|
23
|
+
ids =
|
24
|
+
|
25
|
+
# Return options
|
22
26
|
{
|
23
27
|
universe: universe, db: db, ids: ids.is_a?(Array) ? ids : [ids],
|
24
28
|
format: format, file: file, obj: obj,
|
@@ -50,22 +54,37 @@ class MiGA::RemoteDataset
|
|
50
54
|
# Supported +opts+ (Hash) include:
|
51
55
|
# +obj+ (mandatory): MiGA::RemoteDataset
|
52
56
|
# +ids+ (mandatory): String or Array of String
|
53
|
-
# +file
|
57
|
+
# +file+ (mandatory): String, assembly saved here
|
54
58
|
# +extra+: Hash, passed to download
|
55
|
-
# +format+: String,
|
59
|
+
# +format+: String, ignored
|
56
60
|
def ncbi_asm_get(opts)
|
57
|
-
|
58
|
-
|
59
|
-
raise MiGA::RemoteDataMissingError.new(
|
60
|
-
"Missing ftppath_genbank in NCBI Assembly JSON"
|
61
|
-
)
|
62
|
-
end
|
61
|
+
require 'tempfile'
|
62
|
+
require 'zip'
|
63
63
|
|
64
|
-
|
65
|
-
|
66
|
-
:
|
67
|
-
opts[:format], opts[:file], opts[:extra], opts[:obj]
|
64
|
+
zipped = download(
|
65
|
+
:ncbi_datasets_download, :genome, opts[:ids],
|
66
|
+
:zip, nil, opts[:extra], opts[:obj]
|
68
67
|
)
|
68
|
+
zip_tmp = Tempfile.new('asm.zip')
|
69
|
+
zip_tmp.puts zipped
|
70
|
+
zip_tmp.close
|
71
|
+
|
72
|
+
o = ''
|
73
|
+
ofh = opts[:file] ? File.open(opts[:file], 'w') : nil
|
74
|
+
Zip::File.open(zip_tmp.path) do |zfh|
|
75
|
+
zfh.each do |entry|
|
76
|
+
if entry.file? && entry.name =~ /_genomic\.fna$/
|
77
|
+
DEBUG "Extracting: #{entry.name}"
|
78
|
+
entry.get_input_stream do |ifh|
|
79
|
+
cont = ifh.read
|
80
|
+
ofh&.puts cont
|
81
|
+
o += cont
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
ofh&.close
|
87
|
+
o
|
69
88
|
end
|
70
89
|
|
71
90
|
##
|
@@ -77,11 +96,7 @@ class MiGA::RemoteDataset
|
|
77
96
|
return o unless o.strip.empty?
|
78
97
|
|
79
98
|
MiGA::MiGA.DEBUG 'Empty sequence, attempting download from NCBI assembly'
|
80
|
-
opts[:format] = :
|
81
|
-
if opts[:file]
|
82
|
-
File.unlink(opts[:file]) if File.exist? opts[:file]
|
83
|
-
opts[:file] = "#{opts[:file]}.gz"
|
84
|
-
end
|
99
|
+
opts[:format] = :fasta
|
85
100
|
ncbi_asm_get(opts)
|
86
101
|
end
|
87
102
|
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3, 9,
|
15
|
+
VERSION = [1.3, 9, 3].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
data/test/net_test.rb
CHANGED
@@ -47,4 +47,24 @@ class FormatTest < Test::Unit::TestCase
|
|
47
47
|
### m.download_file_ftp(:miga_db, '../api_test.txt', f)
|
48
48
|
### assert_equal('miga', File.read(f).chomp)
|
49
49
|
end
|
50
|
+
|
51
|
+
def test_encoding
|
52
|
+
# Test original encoding
|
53
|
+
t1 = '()!@*#àøo'
|
54
|
+
t2 = "#{t1}"
|
55
|
+
assert_equal(t1, t2)
|
56
|
+
assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
|
57
|
+
|
58
|
+
# Test with a different encoding
|
59
|
+
t2 = t2.encode('windows-1252')
|
60
|
+
assert_equal('Windows-1252', t2.encoding.to_s)
|
61
|
+
assert_not_equal(t1, t2)
|
62
|
+
assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
|
63
|
+
|
64
|
+
# Test with a different encoding wrongly declared
|
65
|
+
t2.force_encoding('utf-8')
|
66
|
+
assert_equal('UTF-8', t2.encoding.to_s)
|
67
|
+
assert_not_equal(t1, t2)
|
68
|
+
assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
|
69
|
+
end
|
50
70
|
end
|
data/test/remote_dataset_test.rb
CHANGED
@@ -142,7 +142,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
142
142
|
|
143
143
|
def test_missing_data
|
144
144
|
declare_remote_access
|
145
|
-
rd = MiGA::RemoteDataset.new('
|
145
|
+
rd = MiGA::RemoteDataset.new('XYZ_GCA_000484975.1', :assembly, :ncbi)
|
146
146
|
assert_raise(MiGA::RemoteDataMissingError) { rd.save_to(project, 'bad') }
|
147
147
|
end
|
148
148
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.9.
|
4
|
+
version: 1.3.9.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rubyzip
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: rake
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|