miga-base 1.3.9.2 → 1.3.9.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2603688836b87b4be16c9c88bacbb206a93bd6b71b19a5a7febc219638f4e912
4
- data.tar.gz: 1df2051e7d1d88facce4e0738828274ca9743807cdf90da959697addde25e424
3
+ metadata.gz: 32295b80e344eec3e534bfef0de472a19c14674c93a50ac6c066a3690be7499c
4
+ data.tar.gz: fa44c75572f39ae7dc60dabcdc2fcf11d2c17b5a4120dea0ea6fa66a4dc915ff
5
5
  SHA512:
6
- metadata.gz: a28d606569bdd37e5d4d33710564573639b103747c9622fae0a3a2d254f6293bf8462decb09bdf67d7656f84bca84fc7094e217c66ff081e75c5f5622bd9711c
7
- data.tar.gz: 7e18ecf2487c31b5d99d3fb53b8716ce0dea2f2a8b2bab5015a871be9b703dd63c4384fb050380dad4dd28f5de3d1de5c9bcb1cb8544726870c7314fab626ca9
6
+ metadata.gz: d3fd8c46e2daf0b0e6ee82435bc91b5d62784a36f2f2e0cff1b254d335dc6cecb9b5aacef2c982c743d026d87163049611a2bc73f99245f5253af8331be284b6
7
+ data.tar.gz: c3b3c69514dbb2cc035b78380f49789483ff96d510f7b044602e487512911bd8f6243451c38d473e27581a986302ae6f10e2ba4f464886c22273e6ef4ed066aa
@@ -79,9 +79,7 @@ module MiGA::Cli::Action::Download::Ncbi
79
79
 
80
80
  if cli[:ncbi_list_json]
81
81
  cli.say "Saving remote list: #{cli[:ncbi_list_json]}"
82
- File.open(cli[:ncbi_list_json], 'w') do |fh|
83
- fh.puts MiGA::Json.generate_plain(list)
84
- end
82
+ MiGA::Json.generate_plain(list, cli[:ncbi_list_json])
85
83
  end
86
84
 
87
85
  list
@@ -14,7 +14,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
14
14
  opt.on(
15
15
  '-I', '--ids ID1,ID2,...', Array,
16
16
  '(Mandatory unless -F) IDs in the remote database separated by commas'
17
- ) { |v| cli[:ids] = v }
17
+ ) { |v| cli[:ids] = v.map(&:strip) }
18
18
  opt.on(
19
19
  '-U', '--universe STRING',
20
20
  "Universe of the remote database. By default: #{cli[:universe]}",
data/lib/miga/json.rb CHANGED
@@ -65,17 +65,26 @@ class MiGA::Json < MiGA::MiGA
65
65
  # Generates and returns prettyfied JSON to represent +obj+.
66
66
  # If +path+ is passed, it saves the JSON in that file.
67
67
  def generate(obj, path = nil)
68
- y = JSON.pretty_generate(obj)
69
- File.open(path, 'w') { |fh| fh.print y } unless path.nil?
70
- y
68
+ generate_generic(:pretty_generate, obj, path)
71
69
  end
72
70
 
73
71
  ##
74
72
  # Generates and returns plain JSON to represent +obj+.
75
73
  # If +path+ is passed, it saves the JSON in that file.
76
74
  def generate_plain(obj, path = nil)
77
- y = JSON.generate(obj)
78
- File.open(path, 'w') { |fh| fh.print y } unless path.nil?
75
+ generate_generic(:generate, obj, path)
76
+ end
77
+
78
+ private
79
+
80
+ def generate_generic(method, obj, path)
81
+ y = JSON.send(method, obj)
82
+ return y unless path
83
+
84
+ io = StringIO.new(y)
85
+ File.open(path, 'w') do |fh|
86
+ fh.print(io.read(1024)) until io.eof?
87
+ end
79
88
  y
80
89
  end
81
90
  end
@@ -64,8 +64,8 @@ module MiGA::RemoteDataset::Base
64
64
  },
65
65
  gtdb: {
66
66
  dbs: {
67
- # This is a dummy entry plugged directly to +ncbi_asm_rest+
68
- assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
67
+ # This is a dummy entry plugged directly to +ncbi_asm_get+
68
+ assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
69
69
  # The 'taxon' namespace actually returns a list of genomes (+format+)
70
70
  taxon: {
71
71
  stage: :metadata, format: :genomes, map_to: [:assembly],
@@ -84,8 +84,8 @@ module MiGA::RemoteDataset::Base
84
84
  },
85
85
  seqcode: {
86
86
  dbs: {
87
- # These are dummy entries plugged directly to +ncbi_*_rest+
88
- assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
87
+ # These are dummy entries plugged directly to +ncbi_*_get+
88
+ assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
89
89
  nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
90
90
  # This is the list of type genomes
91
91
  :'type-genomes' => { stage: :metadata, format: :json }
@@ -100,7 +100,7 @@ module MiGA::RemoteDataset::Base
100
100
  ncbi: {
101
101
  dbs: {
102
102
  nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
103
- assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
103
+ assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
104
104
  taxonomy: { stage: :metadata, format: :xml }
105
105
  },
106
106
  uri: lambda do |opts|
@@ -19,6 +19,10 @@ class MiGA::RemoteDataset
19
19
  getter = database_hash[:getter] || :download
20
20
  action = database_hash[:method] || universe_hash[:method]
21
21
 
22
+ # Clean IDs
23
+ ids =
24
+
25
+ # Return options
22
26
  {
23
27
  universe: universe, db: db, ids: ids.is_a?(Array) ? ids : [ids],
24
28
  format: format, file: file, obj: obj,
@@ -50,22 +54,37 @@ class MiGA::RemoteDataset
50
54
  # Supported +opts+ (Hash) include:
51
55
  # +obj+ (mandatory): MiGA::RemoteDataset
52
56
  # +ids+ (mandatory): String or Array of String
53
- # +file+: String, passed to download
57
+ # +file+ (mandatory): String, assembly saved here
54
58
  # +extra+: Hash, passed to download
55
- # +format+: String, passed to download
59
+ # +format+: String, ignored
56
60
  def ncbi_asm_get(opts)
57
- url_dir = opts[:obj].ncbi_asm_json_doc&.dig('ftppath_genbank')
58
- if url_dir.nil? || url_dir.empty?
59
- raise MiGA::RemoteDataMissingError.new(
60
- "Missing ftppath_genbank in NCBI Assembly JSON"
61
- )
62
- end
61
+ require 'tempfile'
62
+ require 'zip'
63
63
 
64
- url = '%s/%s_genomic.fna.gz' % [url_dir, File.basename(url_dir)]
65
- download(
66
- :web, :assembly_gz, url,
67
- opts[:format], opts[:file], opts[:extra], opts[:obj]
64
+ zipped = download(
65
+ :ncbi_datasets_download, :genome, opts[:ids],
66
+ :zip, nil, opts[:extra], opts[:obj]
68
67
  )
68
+ zip_tmp = Tempfile.new('asm.zip')
69
+ zip_tmp.puts zipped
70
+ zip_tmp.close
71
+
72
+ o = ''
73
+ ofh = opts[:file] ? File.open(opts[:file], 'w') : nil
74
+ Zip::File.open(zip_tmp.path) do |zfh|
75
+ zfh.each do |entry|
76
+ if entry.file? && entry.name =~ /_genomic\.fna$/
77
+ DEBUG "Extracting: #{entry.name}"
78
+ entry.get_input_stream do |ifh|
79
+ cont = ifh.read
80
+ ofh&.puts cont
81
+ o += cont
82
+ end
83
+ end
84
+ end
85
+ end
86
+ ofh&.close
87
+ o
69
88
  end
70
89
 
71
90
  ##
@@ -77,11 +96,7 @@ class MiGA::RemoteDataset
77
96
  return o unless o.strip.empty?
78
97
 
79
98
  MiGA::MiGA.DEBUG 'Empty sequence, attempting download from NCBI assembly'
80
- opts[:format] = :fasta_gz
81
- if opts[:file]
82
- File.unlink(opts[:file]) if File.exist? opts[:file]
83
- opts[:file] = "#{opts[:file]}.gz"
84
- end
99
+ opts[:format] = :fasta
85
100
  ncbi_asm_get(opts)
86
101
  end
87
102
 
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.3, 9, 2].freeze
15
+ VERSION = [1.3, 9, 3].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
data/test/net_test.rb CHANGED
@@ -47,4 +47,24 @@ class FormatTest < Test::Unit::TestCase
47
47
  ### m.download_file_ftp(:miga_db, '../api_test.txt', f)
48
48
  ### assert_equal('miga', File.read(f).chomp)
49
49
  end
50
+
51
+ def test_encoding
52
+ # Test original encoding
53
+ t1 = '()!@*#àøo'
54
+ t2 = "#{t1}"
55
+ assert_equal(t1, t2)
56
+ assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
57
+
58
+ # Test with a different encoding
59
+ t2 = t2.encode('windows-1252')
60
+ assert_equal('Windows-1252', t2.encoding.to_s)
61
+ assert_not_equal(t1, t2)
62
+ assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
63
+
64
+ # Test with a different encoding wrongly declared
65
+ t2.force_encoding('utf-8')
66
+ assert_equal('UTF-8', t2.encoding.to_s)
67
+ assert_not_equal(t1, t2)
68
+ assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
69
+ end
50
70
  end
@@ -142,7 +142,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
142
142
 
143
143
  def test_missing_data
144
144
  declare_remote_access
145
- rd = MiGA::RemoteDataset.new('GCA_000484975.1', :assembly, :ncbi)
145
+ rd = MiGA::RemoteDataset.new('XYZ_GCA_000484975.1', :assembly, :ncbi)
146
146
  assert_raise(MiGA::RemoteDataMissingError) { rd.save_to(project, 'bad') }
147
147
  end
148
148
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.9.2
4
+ version: 1.3.9.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubyzip
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: rake
85
99
  requirement: !ruby/object:Gem::Requirement