miga-base 1.3.9.1 → 1.3.9.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '09d0eb94fc3898aecede549b45d2a0478c944719c3e14b1efcefd83b8ba0f08d'
4
- data.tar.gz: 667f5cca17495e22a1f80b396eb1d1d64b2f273c27dbab4628c6d758da51a4a6
3
+ metadata.gz: 32295b80e344eec3e534bfef0de472a19c14674c93a50ac6c066a3690be7499c
4
+ data.tar.gz: fa44c75572f39ae7dc60dabcdc2fcf11d2c17b5a4120dea0ea6fa66a4dc915ff
5
5
  SHA512:
6
- metadata.gz: c73e6b163aa3be26e728744e29c8a1a6fcba73e3e27c9d2ff40604d0351a263fd6db6b9582bb4eef59a408f937af8ebac99d5360fc3cdc8c2ba1fd1c406c33c0
7
- data.tar.gz: ffd3639052114019007db0600a0d84b272e9ef52c4d00f6e8f8ca5a172d36b05f51a365a038521415411f6eddb256132e9e98d1e22624713aed20e5149d0995c
6
+ metadata.gz: d3fd8c46e2daf0b0e6ee82435bc91b5d62784a36f2f2e0cff1b254d335dc6cecb9b5aacef2c982c743d026d87163049611a2bc73f99245f5253af8331be284b6
7
+ data.tar.gz: c3b3c69514dbb2cc035b78380f49789483ff96d510f7b044602e487512911bd8f6243451c38d473e27581a986302ae6f10e2ba4f464886c22273e6ef4ed066aa
@@ -26,6 +26,9 @@ module MiGA::Cli::Action::Download::Ncbi
26
26
  cli[:scaffold] = true
27
27
  cli[:contig] = true
28
28
  end
29
+ opt.on('--ncbi-list-json STRING', '::HIDE::') do |v|
30
+ cli[:ncbi_list_json] = v
31
+ end
29
32
  end
30
33
 
31
34
  def cli_name_modifiers(opt)
@@ -35,7 +38,9 @@ module MiGA::Cli::Action::Download::Ncbi
35
38
  'Only affects --complete and --chromosome'
36
39
  ) { |v| cli[:add_version] = v }
37
40
  # For backwards compatibility
38
- cli.opt_flag(opt, 'legacy-name', '::HIDE::', :legacy_name)
41
+ opt.on('--legacy-name', '::HIDE::') do
42
+ warn 'Deprecated flag --legacy-name ignored'
43
+ end
39
44
  end
40
45
 
41
46
  def sanitize_cli
@@ -49,6 +54,11 @@ module MiGA::Cli::Action::Download::Ncbi
49
54
  end
50
55
 
51
56
  def remote_list
57
+ if cli[:ncbi_list_json] && File.size?(cli[:ncbi_list_json])
58
+ cli.say "Reusing remote list: #{cli[:ncbi_list_json]}"
59
+ return MiGA::Json.parse(cli[:ncbi_list_json])
60
+ end
61
+
52
62
  list = {}
53
63
  query = remote_list_query
54
64
  loop do
@@ -66,6 +76,12 @@ module MiGA::Cli::Action::Download::Ncbi
66
76
  break unless page[:next_page_token]
67
77
  query[:page_token] = page[:next_page_token]
68
78
  end
79
+
80
+ if cli[:ncbi_list_json]
81
+ cli.say "Saving remote list: #{cli[:ncbi_list_json]}"
82
+ MiGA::Json.generate_plain(list, cli[:ncbi_list_json])
83
+ end
84
+
69
85
  list
70
86
  end
71
87
 
@@ -80,7 +96,8 @@ module MiGA::Cli::Action::Download::Ncbi
80
96
  ds[n] = {
81
97
  ids: [asm], db: :assembly, universe: :ncbi,
82
98
  md: {
83
- type: :genome, ncbi_asm: asm, strain: r.dig(:organism, :infraspecific_names, :strain)
99
+ type: :genome, ncbi_asm: asm,
100
+ strain: r.dig(:organism, :infraspecific_names, :strain)
84
101
  }
85
102
  }
86
103
  date = r.dig(:assembly_info, :release_date)
@@ -14,7 +14,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
14
14
  opt.on(
15
15
  '-I', '--ids ID1,ID2,...', Array,
16
16
  '(Mandatory unless -F) IDs in the remote database separated by commas'
17
- ) { |v| cli[:ids] = v }
17
+ ) { |v| cli[:ids] = v.map(&:strip) }
18
18
  opt.on(
19
19
  '-U', '--universe STRING',
20
20
  "Universe of the remote database. By default: #{cli[:universe]}",
data/lib/miga/json.rb CHANGED
@@ -65,17 +65,26 @@ class MiGA::Json < MiGA::MiGA
65
65
  # Generates and returns prettyfied JSON to represent +obj+.
66
66
  # If +path+ is passed, it saves the JSON in that file.
67
67
  def generate(obj, path = nil)
68
- y = JSON.pretty_generate(obj)
69
- File.open(path, 'w') { |fh| fh.print y } unless path.nil?
70
- y
68
+ generate_generic(:pretty_generate, obj, path)
71
69
  end
72
70
 
73
71
  ##
74
72
  # Generates and returns plain JSON to represent +obj+.
75
73
  # If +path+ is passed, it saves the JSON in that file.
76
74
  def generate_plain(obj, path = nil)
77
- y = JSON.generate(obj)
78
- File.open(path, 'w') { |fh| fh.print y } unless path.nil?
75
+ generate_generic(:generate, obj, path)
76
+ end
77
+
78
+ private
79
+
80
+ def generate_generic(method, obj, path)
81
+ y = JSON.send(method, obj)
82
+ return y unless path
83
+
84
+ io = StringIO.new(y)
85
+ File.open(path, 'w') do |fh|
86
+ fh.print(io.read(1024)) until io.eof?
87
+ end
79
88
  y
80
89
  end
81
90
  end
@@ -64,8 +64,8 @@ module MiGA::RemoteDataset::Base
64
64
  },
65
65
  gtdb: {
66
66
  dbs: {
67
- # This is a dummy entry plugged directly to +ncbi_asm_rest+
68
- assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
67
+ # This is a dummy entry plugged directly to +ncbi_asm_get+
68
+ assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
69
69
  # The 'taxon' namespace actually returns a list of genomes (+format+)
70
70
  taxon: {
71
71
  stage: :metadata, format: :genomes, map_to: [:assembly],
@@ -84,8 +84,8 @@ module MiGA::RemoteDataset::Base
84
84
  },
85
85
  seqcode: {
86
86
  dbs: {
87
- # These are dummy entries plugged directly to +ncbi_*_rest+
88
- assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
87
+ # These are dummy entries plugged directly to +ncbi_*_get+
88
+ assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
89
89
  nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
90
90
  # This is the list of type genomes
91
91
  :'type-genomes' => { stage: :metadata, format: :json }
@@ -100,7 +100,7 @@ module MiGA::RemoteDataset::Base
100
100
  ncbi: {
101
101
  dbs: {
102
102
  nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
103
- assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
103
+ assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
104
104
  taxonomy: { stage: :metadata, format: :xml }
105
105
  },
106
106
  uri: lambda do |opts|
@@ -19,6 +19,10 @@ class MiGA::RemoteDataset
19
19
  getter = database_hash[:getter] || :download
20
20
  action = database_hash[:method] || universe_hash[:method]
21
21
 
22
+ # Clean IDs
23
+ ids =
24
+
25
+ # Return options
22
26
  {
23
27
  universe: universe, db: db, ids: ids.is_a?(Array) ? ids : [ids],
24
28
  format: format, file: file, obj: obj,
@@ -50,22 +54,37 @@ class MiGA::RemoteDataset
50
54
  # Supported +opts+ (Hash) include:
51
55
  # +obj+ (mandatory): MiGA::RemoteDataset
52
56
  # +ids+ (mandatory): String or Array of String
53
- # +file+: String, passed to download
57
+ # +file+ (mandatory): String, assembly saved here
54
58
  # +extra+: Hash, passed to download
55
- # +format+: String, passed to download
59
+ # +format+: String, ignored
56
60
  def ncbi_asm_get(opts)
57
- url_dir = opts[:obj].ncbi_asm_json_doc&.dig('ftppath_genbank')
58
- if url_dir.nil? || url_dir.empty?
59
- raise MiGA::RemoteDataMissingError.new(
60
- "Missing ftppath_genbank in NCBI Assembly JSON"
61
- )
62
- end
61
+ require 'tempfile'
62
+ require 'zip'
63
63
 
64
- url = '%s/%s_genomic.fna.gz' % [url_dir, File.basename(url_dir)]
65
- download(
66
- :web, :assembly_gz, url,
67
- opts[:format], opts[:file], opts[:extra], opts[:obj]
64
+ zipped = download(
65
+ :ncbi_datasets_download, :genome, opts[:ids],
66
+ :zip, nil, opts[:extra], opts[:obj]
68
67
  )
68
+ zip_tmp = Tempfile.new('asm.zip')
69
+ zip_tmp.puts zipped
70
+ zip_tmp.close
71
+
72
+ o = ''
73
+ ofh = opts[:file] ? File.open(opts[:file], 'w') : nil
74
+ Zip::File.open(zip_tmp.path) do |zfh|
75
+ zfh.each do |entry|
76
+ if entry.file? && entry.name =~ /_genomic\.fna$/
77
+ DEBUG "Extracting: #{entry.name}"
78
+ entry.get_input_stream do |ifh|
79
+ cont = ifh.read
80
+ ofh&.puts cont
81
+ o += cont
82
+ end
83
+ end
84
+ end
85
+ end
86
+ ofh&.close
87
+ o
69
88
  end
70
89
 
71
90
  ##
@@ -77,11 +96,7 @@ class MiGA::RemoteDataset
77
96
  return o unless o.strip.empty?
78
97
 
79
98
  MiGA::MiGA.DEBUG 'Empty sequence, attempting download from NCBI assembly'
80
- opts[:format] = :fasta_gz
81
- if opts[:file]
82
- File.unlink(opts[:file]) if File.exist? opts[:file]
83
- opts[:file] = "#{opts[:file]}.gz"
84
- end
99
+ opts[:format] = :fasta
85
100
  ncbi_asm_get(opts)
86
101
  end
87
102
 
@@ -29,33 +29,15 @@ module MiGA::Result::Stats
29
29
  seq_opts = { gc: true, x: true, skew: true }
30
30
  if self[:files][:pair1].nil?
31
31
  s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, seq_opts)
32
- stats = {
33
- reads: s[:n],
34
- length_average: [s[:avg], 'bp'],
35
- length_standard_deviation: [s[:sd], 'bp'],
36
- g_c_content: [s[:gc], '%'],
37
- x_content: [s[:x], '%'],
38
- g_c_skew: [s[:gc_skew], '%'],
39
- a_t_skew: [s[:at_skew], '%']
40
- }
32
+ stats = seqs_length_as_stats_hash(s)
41
33
  else
42
- s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, seq_opts)
43
- s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, seq_opts)
44
- stats = {
45
- read_pairs: s1[:n],
46
- forward_length_average: [s1[:avg], 'bp'],
47
- forward_length_standard_deviation: [s1[:sd], 'bp'],
48
- forward_g_c_content: [s1[:gc], '%'],
49
- forward_x_content: [s1[:x], '%'],
50
- forward_g_c_skew: [s1[:gc_skew], '%'],
51
- forward_a_t_skew: [s1[:at_skew], '%'],
52
- reverse_length_average: [s2[:avg], 'bp'],
53
- reverse_length_standard_deviation: [s2[:sd], 'bp'],
54
- reverse_g_c_content: [s2[:gc], '%'],
55
- reverse_x_content: [s2[:x], '%'],
56
- reverse_g_c_skew: [s2[:gc_skew], '%'],
57
- reverse_a_t_skew: [s2[:at_skew], '%']
58
- }
34
+ stats = { read_pairs: nil }
35
+ { pair1: :forward, pair2: :reverse }.each do |pair, direction|
36
+ s = MiGA::MiGA.seqs_length(file_path(pair), :fastq, seq_opts)
37
+ seqs_length_as_stats_hash(s).each do |k, v|
38
+ stats[k == :reads ? :read_pairs : :"#{direction}_#{k}"] ||= v
39
+ end
40
+ end
59
41
  end
60
42
  stats
61
43
  end
@@ -63,15 +45,7 @@ module MiGA::Result::Stats
63
45
  def compute_stats_trimmed_fasta
64
46
  f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
65
47
  s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true, skew: true)
66
- {
67
- reads: s[:n],
68
- length_average: [s[:avg], 'bp'],
69
- length_standard_deviation: [s[:sd], 'bp'],
70
- g_c_content: [s[:gc], '%'],
71
- x_content: [s[:x], '%'],
72
- g_c_skew: [s[:gc_skew], '%'],
73
- a_t_skew: [s[:at_skew], '%']
74
- }
48
+ seqs_length_as_stats_hash(s)
75
49
  end
76
50
 
77
51
  def compute_stats_assembly
@@ -79,16 +53,17 @@ module MiGA::Result::Stats
79
53
  file_path(:largecontigs), :fasta,
80
54
  n50: true, gc: true, x: true, skew: true
81
55
  )
56
+ h = seqs_length_as_stats_hash(s)
82
57
  {
83
58
  contigs: s[:n],
84
59
  n50: [s[:n50], 'bp'],
85
60
  total_length: [s[:tot], 'bp'],
86
- longest_sequence: [s[:max], 'bp'],
87
- g_c_content: [s[:gc], '%'],
88
- x_content: [s[:x], '%'],
89
- g_c_skew: [s[:gc_skew], '%'],
90
- a_t_skew: [s[:at_skew], '%']
91
- }
61
+ longest_sequence: [s[:max], 'bp']
62
+ }.tap do |stats|
63
+ %i[g_c_content x_content g_c_skew a_t_skew].each do |i|
64
+ stats[i] = h[i]
65
+ end
66
+ end
92
67
  end
93
68
 
94
69
  def compute_stats_cds
@@ -253,4 +228,16 @@ module MiGA::Result::Stats
253
228
  add_file(:raw_report, "#{source.name}.ess/log")
254
229
  add_file(:report, "#{source.name}.ess/log.domain")
255
230
  end
231
+
232
+ def seqs_length_as_stats_hash(s)
233
+ {
234
+ reads: s[:n],
235
+ length_average: [s[:avg], 'bp'],
236
+ length_standard_deviation: [s[:sd], 'bp'],
237
+ g_c_content: [s[:gc], '%'],
238
+ x_content: [s[:x], '%'],
239
+ g_c_skew: [s[:gc_skew], '%'],
240
+ a_t_skew: [s[:at_skew], '%']
241
+ }
242
+ end
256
243
  end
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.3, 9, 1].freeze
15
+ VERSION = [1.3, 9, 3].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
data/test/net_test.rb CHANGED
@@ -47,4 +47,24 @@ class FormatTest < Test::Unit::TestCase
47
47
  ### m.download_file_ftp(:miga_db, '../api_test.txt', f)
48
48
  ### assert_equal('miga', File.read(f).chomp)
49
49
  end
50
+
51
+ def test_encoding
52
+ # Test original encoding
53
+ t1 = '()!@*#àøo'
54
+ t2 = "#{t1}"
55
+ assert_equal(t1, t2)
56
+ assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
57
+
58
+ # Test with a different encoding
59
+ t2 = t2.encode('windows-1252')
60
+ assert_equal('Windows-1252', t2.encoding.to_s)
61
+ assert_not_equal(t1, t2)
62
+ assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
63
+
64
+ # Test with a different encoding wrongly declared
65
+ t2.force_encoding('utf-8')
66
+ assert_equal('UTF-8', t2.encoding.to_s)
67
+ assert_not_equal(t1, t2)
68
+ assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
69
+ end
50
70
  end
@@ -142,7 +142,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
142
142
 
143
143
  def test_missing_data
144
144
  declare_remote_access
145
- rd = MiGA::RemoteDataset.new('GCA_000484975.1', :assembly, :ncbi)
145
+ rd = MiGA::RemoteDataset.new('XYZ_GCA_000484975.1', :assembly, :ncbi)
146
146
  assert_raise(MiGA::RemoteDataMissingError) { rd.save_to(project, 'bad') }
147
147
  end
148
148
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.9.1
4
+ version: 1.3.9.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubyzip
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: rake
85
99
  requirement: !ruby/object:Gem::Requirement