miga-base 1.3.9.1 → 1.3.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '09d0eb94fc3898aecede549b45d2a0478c944719c3e14b1efcefd83b8ba0f08d'
4
- data.tar.gz: 667f5cca17495e22a1f80b396eb1d1d64b2f273c27dbab4628c6d758da51a4a6
3
+ metadata.gz: 32295b80e344eec3e534bfef0de472a19c14674c93a50ac6c066a3690be7499c
4
+ data.tar.gz: fa44c75572f39ae7dc60dabcdc2fcf11d2c17b5a4120dea0ea6fa66a4dc915ff
5
5
  SHA512:
6
- metadata.gz: c73e6b163aa3be26e728744e29c8a1a6fcba73e3e27c9d2ff40604d0351a263fd6db6b9582bb4eef59a408f937af8ebac99d5360fc3cdc8c2ba1fd1c406c33c0
7
- data.tar.gz: ffd3639052114019007db0600a0d84b272e9ef52c4d00f6e8f8ca5a172d36b05f51a365a038521415411f6eddb256132e9e98d1e22624713aed20e5149d0995c
6
+ metadata.gz: d3fd8c46e2daf0b0e6ee82435bc91b5d62784a36f2f2e0cff1b254d335dc6cecb9b5aacef2c982c743d026d87163049611a2bc73f99245f5253af8331be284b6
7
+ data.tar.gz: c3b3c69514dbb2cc035b78380f49789483ff96d510f7b044602e487512911bd8f6243451c38d473e27581a986302ae6f10e2ba4f464886c22273e6ef4ed066aa
@@ -26,6 +26,9 @@ module MiGA::Cli::Action::Download::Ncbi
26
26
  cli[:scaffold] = true
27
27
  cli[:contig] = true
28
28
  end
29
+ opt.on('--ncbi-list-json STRING', '::HIDE::') do |v|
30
+ cli[:ncbi_list_json] = v
31
+ end
29
32
  end
30
33
 
31
34
  def cli_name_modifiers(opt)
@@ -35,7 +38,9 @@ module MiGA::Cli::Action::Download::Ncbi
35
38
  'Only affects --complete and --chromosome'
36
39
  ) { |v| cli[:add_version] = v }
37
40
  # For backwards compatibility
38
- cli.opt_flag(opt, 'legacy-name', '::HIDE::', :legacy_name)
41
+ opt.on('--legacy-name', '::HIDE::') do
42
+ warn 'Deprecated flag --legacy-name ignored'
43
+ end
39
44
  end
40
45
 
41
46
  def sanitize_cli
@@ -49,6 +54,11 @@ module MiGA::Cli::Action::Download::Ncbi
49
54
  end
50
55
 
51
56
  def remote_list
57
+ if cli[:ncbi_list_json] && File.size?(cli[:ncbi_list_json])
58
+ cli.say "Reusing remote list: #{cli[:ncbi_list_json]}"
59
+ return MiGA::Json.parse(cli[:ncbi_list_json])
60
+ end
61
+
52
62
  list = {}
53
63
  query = remote_list_query
54
64
  loop do
@@ -66,6 +76,12 @@ module MiGA::Cli::Action::Download::Ncbi
66
76
  break unless page[:next_page_token]
67
77
  query[:page_token] = page[:next_page_token]
68
78
  end
79
+
80
+ if cli[:ncbi_list_json]
81
+ cli.say "Saving remote list: #{cli[:ncbi_list_json]}"
82
+ MiGA::Json.generate_plain(list, cli[:ncbi_list_json])
83
+ end
84
+
69
85
  list
70
86
  end
71
87
 
@@ -80,7 +96,8 @@ module MiGA::Cli::Action::Download::Ncbi
80
96
  ds[n] = {
81
97
  ids: [asm], db: :assembly, universe: :ncbi,
82
98
  md: {
83
- type: :genome, ncbi_asm: asm, strain: r.dig(:organism, :infraspecific_names, :strain)
99
+ type: :genome, ncbi_asm: asm,
100
+ strain: r.dig(:organism, :infraspecific_names, :strain)
84
101
  }
85
102
  }
86
103
  date = r.dig(:assembly_info, :release_date)
@@ -14,7 +14,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
14
14
  opt.on(
15
15
  '-I', '--ids ID1,ID2,...', Array,
16
16
  '(Mandatory unless -F) IDs in the remote database separated by commas'
17
- ) { |v| cli[:ids] = v }
17
+ ) { |v| cli[:ids] = v.map(&:strip) }
18
18
  opt.on(
19
19
  '-U', '--universe STRING',
20
20
  "Universe of the remote database. By default: #{cli[:universe]}",
data/lib/miga/json.rb CHANGED
@@ -65,17 +65,26 @@ class MiGA::Json < MiGA::MiGA
65
65
  # Generates and returns prettyfied JSON to represent +obj+.
66
66
  # If +path+ is passed, it saves the JSON in that file.
67
67
  def generate(obj, path = nil)
68
- y = JSON.pretty_generate(obj)
69
- File.open(path, 'w') { |fh| fh.print y } unless path.nil?
70
- y
68
+ generate_generic(:pretty_generate, obj, path)
71
69
  end
72
70
 
73
71
  ##
74
72
  # Generates and returns plain JSON to represent +obj+.
75
73
  # If +path+ is passed, it saves the JSON in that file.
76
74
  def generate_plain(obj, path = nil)
77
- y = JSON.generate(obj)
78
- File.open(path, 'w') { |fh| fh.print y } unless path.nil?
75
+ generate_generic(:generate, obj, path)
76
+ end
77
+
78
+ private
79
+
80
+ def generate_generic(method, obj, path)
81
+ y = JSON.send(method, obj)
82
+ return y unless path
83
+
84
+ io = StringIO.new(y)
85
+ File.open(path, 'w') do |fh|
86
+ fh.print(io.read(1024)) until io.eof?
87
+ end
79
88
  y
80
89
  end
81
90
  end
@@ -64,8 +64,8 @@ module MiGA::RemoteDataset::Base
64
64
  },
65
65
  gtdb: {
66
66
  dbs: {
67
- # This is a dummy entry plugged directly to +ncbi_asm_rest+
68
- assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
67
+ # This is a dummy entry plugged directly to +ncbi_asm_get+
68
+ assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
69
69
  # The 'taxon' namespace actually returns a list of genomes (+format+)
70
70
  taxon: {
71
71
  stage: :metadata, format: :genomes, map_to: [:assembly],
@@ -84,8 +84,8 @@ module MiGA::RemoteDataset::Base
84
84
  },
85
85
  seqcode: {
86
86
  dbs: {
87
- # These are dummy entries plugged directly to +ncbi_*_rest+
88
- assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
87
+ # These are dummy entries plugged directly to +ncbi_*_get+
88
+ assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
89
89
  nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
90
90
  # This is the list of type genomes
91
91
  :'type-genomes' => { stage: :metadata, format: :json }
@@ -100,7 +100,7 @@ module MiGA::RemoteDataset::Base
100
100
  ncbi: {
101
101
  dbs: {
102
102
  nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
103
- assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
103
+ assembly: { stage: :assembly, format: :fasta, getter: :ncbi_asm },
104
104
  taxonomy: { stage: :metadata, format: :xml }
105
105
  },
106
106
  uri: lambda do |opts|
@@ -19,6 +19,10 @@ class MiGA::RemoteDataset
19
19
  getter = database_hash[:getter] || :download
20
20
  action = database_hash[:method] || universe_hash[:method]
21
21
 
22
+ # Clean IDs
23
+ ids =
24
+
25
+ # Return options
22
26
  {
23
27
  universe: universe, db: db, ids: ids.is_a?(Array) ? ids : [ids],
24
28
  format: format, file: file, obj: obj,
@@ -50,22 +54,37 @@ class MiGA::RemoteDataset
50
54
  # Supported +opts+ (Hash) include:
51
55
  # +obj+ (mandatory): MiGA::RemoteDataset
52
56
  # +ids+ (mandatory): String or Array of String
53
- # +file+: String, passed to download
57
+ # +file+ (mandatory): String, assembly saved here
54
58
  # +extra+: Hash, passed to download
55
- # +format+: String, passed to download
59
+ # +format+: String, ignored
56
60
  def ncbi_asm_get(opts)
57
- url_dir = opts[:obj].ncbi_asm_json_doc&.dig('ftppath_genbank')
58
- if url_dir.nil? || url_dir.empty?
59
- raise MiGA::RemoteDataMissingError.new(
60
- "Missing ftppath_genbank in NCBI Assembly JSON"
61
- )
62
- end
61
+ require 'tempfile'
62
+ require 'zip'
63
63
 
64
- url = '%s/%s_genomic.fna.gz' % [url_dir, File.basename(url_dir)]
65
- download(
66
- :web, :assembly_gz, url,
67
- opts[:format], opts[:file], opts[:extra], opts[:obj]
64
+ zipped = download(
65
+ :ncbi_datasets_download, :genome, opts[:ids],
66
+ :zip, nil, opts[:extra], opts[:obj]
68
67
  )
68
+ zip_tmp = Tempfile.new('asm.zip')
69
+ zip_tmp.puts zipped
70
+ zip_tmp.close
71
+
72
+ o = ''
73
+ ofh = opts[:file] ? File.open(opts[:file], 'w') : nil
74
+ Zip::File.open(zip_tmp.path) do |zfh|
75
+ zfh.each do |entry|
76
+ if entry.file? && entry.name =~ /_genomic\.fna$/
77
+ DEBUG "Extracting: #{entry.name}"
78
+ entry.get_input_stream do |ifh|
79
+ cont = ifh.read
80
+ ofh&.puts cont
81
+ o += cont
82
+ end
83
+ end
84
+ end
85
+ end
86
+ ofh&.close
87
+ o
69
88
  end
70
89
 
71
90
  ##
@@ -77,11 +96,7 @@ class MiGA::RemoteDataset
77
96
  return o unless o.strip.empty?
78
97
 
79
98
  MiGA::MiGA.DEBUG 'Empty sequence, attempting download from NCBI assembly'
80
- opts[:format] = :fasta_gz
81
- if opts[:file]
82
- File.unlink(opts[:file]) if File.exist? opts[:file]
83
- opts[:file] = "#{opts[:file]}.gz"
84
- end
99
+ opts[:format] = :fasta
85
100
  ncbi_asm_get(opts)
86
101
  end
87
102
 
@@ -29,33 +29,15 @@ module MiGA::Result::Stats
29
29
  seq_opts = { gc: true, x: true, skew: true }
30
30
  if self[:files][:pair1].nil?
31
31
  s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, seq_opts)
32
- stats = {
33
- reads: s[:n],
34
- length_average: [s[:avg], 'bp'],
35
- length_standard_deviation: [s[:sd], 'bp'],
36
- g_c_content: [s[:gc], '%'],
37
- x_content: [s[:x], '%'],
38
- g_c_skew: [s[:gc_skew], '%'],
39
- a_t_skew: [s[:at_skew], '%']
40
- }
32
+ stats = seqs_length_as_stats_hash(s)
41
33
  else
42
- s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, seq_opts)
43
- s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, seq_opts)
44
- stats = {
45
- read_pairs: s1[:n],
46
- forward_length_average: [s1[:avg], 'bp'],
47
- forward_length_standard_deviation: [s1[:sd], 'bp'],
48
- forward_g_c_content: [s1[:gc], '%'],
49
- forward_x_content: [s1[:x], '%'],
50
- forward_g_c_skew: [s1[:gc_skew], '%'],
51
- forward_a_t_skew: [s1[:at_skew], '%'],
52
- reverse_length_average: [s2[:avg], 'bp'],
53
- reverse_length_standard_deviation: [s2[:sd], 'bp'],
54
- reverse_g_c_content: [s2[:gc], '%'],
55
- reverse_x_content: [s2[:x], '%'],
56
- reverse_g_c_skew: [s2[:gc_skew], '%'],
57
- reverse_a_t_skew: [s2[:at_skew], '%']
58
- }
34
+ stats = { read_pairs: nil }
35
+ { pair1: :forward, pair2: :reverse }.each do |pair, direction|
36
+ s = MiGA::MiGA.seqs_length(file_path(pair), :fastq, seq_opts)
37
+ seqs_length_as_stats_hash(s).each do |k, v|
38
+ stats[k == :reads ? :read_pairs : :"#{direction}_#{k}"] ||= v
39
+ end
40
+ end
59
41
  end
60
42
  stats
61
43
  end
@@ -63,15 +45,7 @@ module MiGA::Result::Stats
63
45
  def compute_stats_trimmed_fasta
64
46
  f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
65
47
  s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true, skew: true)
66
- {
67
- reads: s[:n],
68
- length_average: [s[:avg], 'bp'],
69
- length_standard_deviation: [s[:sd], 'bp'],
70
- g_c_content: [s[:gc], '%'],
71
- x_content: [s[:x], '%'],
72
- g_c_skew: [s[:gc_skew], '%'],
73
- a_t_skew: [s[:at_skew], '%']
74
- }
48
+ seqs_length_as_stats_hash(s)
75
49
  end
76
50
 
77
51
  def compute_stats_assembly
@@ -79,16 +53,17 @@ module MiGA::Result::Stats
79
53
  file_path(:largecontigs), :fasta,
80
54
  n50: true, gc: true, x: true, skew: true
81
55
  )
56
+ h = seqs_length_as_stats_hash(s)
82
57
  {
83
58
  contigs: s[:n],
84
59
  n50: [s[:n50], 'bp'],
85
60
  total_length: [s[:tot], 'bp'],
86
- longest_sequence: [s[:max], 'bp'],
87
- g_c_content: [s[:gc], '%'],
88
- x_content: [s[:x], '%'],
89
- g_c_skew: [s[:gc_skew], '%'],
90
- a_t_skew: [s[:at_skew], '%']
91
- }
61
+ longest_sequence: [s[:max], 'bp']
62
+ }.tap do |stats|
63
+ %i[g_c_content x_content g_c_skew a_t_skew].each do |i|
64
+ stats[i] = h[i]
65
+ end
66
+ end
92
67
  end
93
68
 
94
69
  def compute_stats_cds
@@ -253,4 +228,16 @@ module MiGA::Result::Stats
253
228
  add_file(:raw_report, "#{source.name}.ess/log")
254
229
  add_file(:report, "#{source.name}.ess/log.domain")
255
230
  end
231
+
232
+ def seqs_length_as_stats_hash(s)
233
+ {
234
+ reads: s[:n],
235
+ length_average: [s[:avg], 'bp'],
236
+ length_standard_deviation: [s[:sd], 'bp'],
237
+ g_c_content: [s[:gc], '%'],
238
+ x_content: [s[:x], '%'],
239
+ g_c_skew: [s[:gc_skew], '%'],
240
+ a_t_skew: [s[:at_skew], '%']
241
+ }
242
+ end
256
243
  end
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.3, 9, 1].freeze
15
+ VERSION = [1.3, 9, 3].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
data/test/net_test.rb CHANGED
@@ -47,4 +47,24 @@ class FormatTest < Test::Unit::TestCase
47
47
  ### m.download_file_ftp(:miga_db, '../api_test.txt', f)
48
48
  ### assert_equal('miga', File.read(f).chomp)
49
49
  end
50
+
51
+ def test_encoding
52
+ # Test original encoding
53
+ t1 = '()!@*#àøo'
54
+ t2 = "#{t1}"
55
+ assert_equal(t1, t2)
56
+ assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
57
+
58
+ # Test with a different encoding
59
+ t2 = t2.encode('windows-1252')
60
+ assert_equal('Windows-1252', t2.encoding.to_s)
61
+ assert_not_equal(t1, t2)
62
+ assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
63
+
64
+ # Test with a different encoding wrongly declared
65
+ t2.force_encoding('utf-8')
66
+ assert_equal('UTF-8', t2.encoding.to_s)
67
+ assert_not_equal(t1, t2)
68
+ assert_equal(t1, MiGA::MiGA.normalize_encoding(t2))
69
+ end
50
70
  end
@@ -142,7 +142,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
142
142
 
143
143
  def test_missing_data
144
144
  declare_remote_access
145
- rd = MiGA::RemoteDataset.new('GCA_000484975.1', :assembly, :ncbi)
145
+ rd = MiGA::RemoteDataset.new('XYZ_GCA_000484975.1', :assembly, :ncbi)
146
146
  assert_raise(MiGA::RemoteDataMissingError) { rd.save_to(project, 'bad') }
147
147
  end
148
148
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.9.1
4
+ version: 1.3.9.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubyzip
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: rake
85
99
  requirement: !ruby/object:Gem::Requirement