miga-base 1.3.9.2 → 1.3.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2603688836b87b4be16c9c88bacbb206a93bd6b71b19a5a7febc219638f4e912
4
- data.tar.gz: 1df2051e7d1d88facce4e0738828274ca9743807cdf90da959697addde25e424
3
+ metadata.gz: 436166fe9bbc76b69f014879610440e9646261f1a06dbc134418b79fa3da9b08
4
+ data.tar.gz: fb835c9bfa4829a3ac1c5ae8bdc1c5998cd8b9c4a55d3988538b9c24b2f4329f
5
5
  SHA512:
6
- metadata.gz: a28d606569bdd37e5d4d33710564573639b103747c9622fae0a3a2d254f6293bf8462decb09bdf67d7656f84bca84fc7094e217c66ff081e75c5f5622bd9711c
7
- data.tar.gz: 7e18ecf2487c31b5d99d3fb53b8716ce0dea2f2a8b2bab5015a871be9b703dd63c4384fb050380dad4dd28f5de3d1de5c9bcb1cb8544726870c7314fab626ca9
6
+ metadata.gz: 77e76a0dea664321aad58b4eee310d91cd9fa34c6ae0339451c83d2d169cbf9c160ed6c469401d0155280a28612ae8cde47a0c00be739371986f64621e44ef68
7
+ data.tar.gz: cdece24eb38d804b729c3c7f9018f7f3e835db258a59abe1ce560baf8bce29223a957f8498b96d6a647994361903d2d8179f2c6e6890451923dd44c5ae6cbcd8
@@ -79,9 +79,7 @@ module MiGA::Cli::Action::Download::Ncbi
79
79
 
80
80
  if cli[:ncbi_list_json]
81
81
  cli.say "Saving remote list: #{cli[:ncbi_list_json]}"
82
- File.open(cli[:ncbi_list_json], 'w') do |fh|
83
- fh.puts MiGA::Json.generate_plain(list)
84
- end
82
+ MiGA::Json.generate_plain(list, cli[:ncbi_list_json])
85
83
  end
86
84
 
87
85
  list
@@ -14,7 +14,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
14
14
  opt.on(
15
15
  '-I', '--ids ID1,ID2,...', Array,
16
16
  '(Mandatory unless -F) IDs in the remote database separated by commas'
17
- ) { |v| cli[:ids] = v }
17
+ ) { |v| cli[:ids] = v.map(&:strip) }
18
18
  opt.on(
19
19
  '-U', '--universe STRING',
20
20
  "Universe of the remote database. By default: #{cli[:universe]}",
data/lib/miga/json.rb CHANGED
@@ -65,17 +65,26 @@ class MiGA::Json < MiGA::MiGA
65
65
  # Generates and returns prettyfied JSON to represent +obj+.
66
66
  # If +path+ is passed, it saves the JSON in that file.
67
67
  def generate(obj, path = nil)
68
- y = JSON.pretty_generate(obj)
69
- File.open(path, 'w') { |fh| fh.print y } unless path.nil?
70
- y
68
+ generate_generic(:pretty_generate, obj, path)
71
69
  end
72
70
 
73
71
  ##
74
72
  # Generates and returns plain JSON to represent +obj+.
75
73
  # If +path+ is passed, it saves the JSON in that file.
76
74
  def generate_plain(obj, path = nil)
77
- y = JSON.generate(obj)
78
- File.open(path, 'w') { |fh| fh.print y } unless path.nil?
75
+ generate_generic(:generate, obj, path)
76
+ end
77
+
78
+ private
79
+
80
+ def generate_generic(method, obj, path)
81
+ y = JSON.send(method, obj)
82
+ return y unless path
83
+
84
+ io = StringIO.new(y)
85
+ File.open(path, 'w') do |fh|
86
+ fh.print(io.read(1024)) until io.eof?
87
+ end
79
88
  y
80
89
  end
81
90
  end
@@ -64,8 +64,8 @@ module MiGA::RemoteDataset::Base
64
64
  },
65
65
  gtdb: {
66
66
  dbs: {
67
- # This is a dummy entry plugged directly to +ncbi_asm_rest+
68
- assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
67
+ # This is a dummy entry plugged directly to +ncbi_asm_get+
68
+ assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
69
69
  # The 'taxon' namespace actually returns a list of genomes (+format+)
70
70
  taxon: {
71
71
  stage: :metadata, format: :genomes, map_to: [:assembly],
@@ -84,8 +84,8 @@ module MiGA::RemoteDataset::Base
84
84
  },
85
85
  seqcode: {
86
86
  dbs: {
87
- # These are dummy entries plugged directly to +ncbi_*_rest+
88
- assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
87
+ # These are dummy entries plugged directly to +ncbi_*_get+
88
+ assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
89
89
  nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
90
90
  # This is the list of type genomes
91
91
  :'type-genomes' => { stage: :metadata, format: :json }
@@ -100,7 +100,7 @@ module MiGA::RemoteDataset::Base
100
100
  ncbi: {
101
101
  dbs: {
102
102
  nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
103
- assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
103
+ assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
104
104
  taxonomy: { stage: :metadata, format: :xml }
105
105
  },
106
106
  uri: lambda do |opts|
@@ -19,6 +19,10 @@ class MiGA::RemoteDataset
19
19
  getter = database_hash[:getter] || :download
20
20
  action = database_hash[:method] || universe_hash[:method]
21
21
 
22
+ # Clean IDs
23
+ ids =
24
+
25
+ # Return options
22
26
  {
23
27
  universe: universe, db: db, ids: ids.is_a?(Array) ? ids : [ids],
24
28
  format: format, file: file, obj: obj,
@@ -50,22 +54,38 @@ class MiGA::RemoteDataset
50
54
  # Supported +opts+ (Hash) include:
51
55
  # +obj+ (mandatory): MiGA::RemoteDataset
52
56
  # +ids+ (mandatory): String or Array of String
53
- # +file+: String, passed to download
57
+ # +file+ (mandatory): String, assembly saved here
54
58
  # +extra+: Hash, passed to download
55
- # +format+: String, passed to download
59
+ # +format+: String, ignored
56
60
  def ncbi_asm_get(opts)
57
- url_dir = opts[:obj].ncbi_asm_json_doc&.dig('ftppath_genbank')
58
- if url_dir.nil? || url_dir.empty?
59
- raise MiGA::RemoteDataMissingError.new(
60
- "Missing ftppath_genbank in NCBI Assembly JSON"
61
- )
62
- end
61
+ require 'tempfile'
62
+ require 'zip'
63
63
 
64
- url = '%s/%s_genomic.fna.gz' % [url_dir, File.basename(url_dir)]
65
- download(
66
- :web, :assembly_gz, url,
67
- opts[:format], opts[:file], opts[:extra], opts[:obj]
64
+ zipped = download(
65
+ :ncbi_datasets_download, :genome, opts[:ids],
66
+ :zip, nil, opts[:extra], opts[:obj]
68
67
  )
68
+ zip_tmp = Tempfile.new('asm.zip')
69
+ zip_tmp.print(zipped)
70
+ zip_tmp.close
71
+
72
+ o = ''
73
+ ofh = opts[:file] ? File.open(opts[:file], 'w') : nil
74
+ Zip::File.open(zip_tmp.path) do |zfh|
75
+ zfh.each do |entry|
76
+ if entry.file? && entry.name =~ /_genomic\.fna$/
77
+ DEBUG "Extracting: #{entry.name}"
78
+ entry.get_input_stream do |ifh|
79
+ cont = MiGA::MiGA.normalize_encoding(ifh.read) + "\n"
80
+ ofh&.print(cont)
81
+ o += cont
82
+ end
83
+ end
84
+ end
85
+ end
86
+ ofh&.close
87
+ File.unlink(zip_tmp.path)
88
+ o
69
89
  end
70
90
 
71
91
  ##
@@ -77,11 +97,7 @@ class MiGA::RemoteDataset
77
97
  return o unless o.strip.empty?
78
98
 
79
99
  MiGA::MiGA.DEBUG 'Empty sequence, attempting download from NCBI assembly'
80
- opts[:format] = :fasta_gz
81
- if opts[:file]
82
- File.unlink(opts[:file]) if File.exist? opts[:file]
83
- opts[:file] = "#{opts[:file]}.gz"
84
- end
100
+ opts[:format] = :fasta
85
101
  ncbi_asm_get(opts)
86
102
  end
87
103
 
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.3, 9, 2].freeze
15
+ VERSION = [1.3, 9, 4].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
data/test/net_test.rb CHANGED
@@ -47,4 +47,24 @@ class FormatTest < Test::Unit::TestCase
47
47
  ### m.download_file_ftp(:miga_db, '../api_test.txt', f)
48
48
  ### assert_equal('miga', File.read(f).chomp)
49
49
  end
50
+
51
+ def test_encoding
52
+ # Test original encoding
53
+ t1 = '()!@*#àøo'
54
+ t2 = "#{t1}"
55
+ assert_equal(t1, t2)
56
+ assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
57
+
58
+ # Test with a different encoding
59
+ t2 = t2.encode('windows-1252')
60
+ assert_equal('Windows-1252', t2.encoding.to_s)
61
+ assert_not_equal(t1, t2)
62
+ assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
63
+
64
+ # Test with a different encoding wrongly declared
65
+ t2.force_encoding('utf-8')
66
+ assert_equal('UTF-8', t2.encoding.to_s)
67
+ assert_not_equal(t1, t2)
68
+ assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
69
+ end
50
70
  end
@@ -142,7 +142,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
142
142
 
143
143
  def test_missing_data
144
144
  declare_remote_access
145
- rd = MiGA::RemoteDataset.new('GCA_000484975.1', :assembly, :ncbi)
145
+ rd = MiGA::RemoteDataset.new('XYZ_GCA_000484975.1', :assembly, :ncbi)
146
146
  assert_raise(MiGA::RemoteDataMissingError) { rd.save_to(project, 'bad') }
147
147
  end
148
148
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.9.2
4
+ version: 1.3.9.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubyzip
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.3'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '2.3'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: rake
85
99
  requirement: !ruby/object:Gem::Requirement