miga-base 1.3.18.0 → 1.3.20.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bf9ebc2ca06846d9b76c981c2a89896080690bc44d979ba28b4a9173f7386c7a
4
- data.tar.gz: a156a4f0894f1988862b8daf296b916c80cf197eb36fb7c13c4698975403c35e
3
+ metadata.gz: c63c60b5324c31420fa616747435b2e33e6fc7d0855cab55381f139789ccedc1
4
+ data.tar.gz: 5cbbd4de1f66b452ea72e4dface8617606aec919af08aaddea1a0f05e745873a
5
5
  SHA512:
6
- metadata.gz: b6e1318553d24d93af467670754e86e5281c7418d4242ad11b56125889188fb7cde48a1858f20e1b07bab04a372c4aac718e04c005bdfadccf641c15cd7cf2ed
7
- data.tar.gz: e584f1b4442a099fb16c4b0c07515b9332867c5b72438bae9e33467f9d0911c7bd4811a6861121482cada1b88f44aaec2efd6d2f668fa6a05c722c9fe56995bc
6
+ metadata.gz: 57c3abcf3030051fb6d9839b58f0f9bfecd75aff18d8deb1f0b9d7d5244d6103b9764de844409d5dbec2ffffcffadaeff0b4a804419d21e8a74e09caf5ee7f73
7
+ data.tar.gz: bef9383f052d1331b0154b61d7c9e8dba07194775e5d05aea110771c26597fd6549ed4884d9a4c92c8484952c8f26d33f924281f56957e01d3110cb81c3d2d98
@@ -0,0 +1,45 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::Relatives < MiGA::Cli::Action
7
+ def parse_cli
8
+ cli.defaults = { metric: :aai, external: false, how_many: 5 }
9
+ cli.parse do |opt|
10
+ cli.opt_object(opt, [:project, :dataset_opt])
11
+ cli.opt_filter_datasets(opt)
12
+ opt.on(
13
+ '--metric STRING',
14
+ 'Use this metric of identity, one of ANI or AAI (default)'
15
+ ) { |v| cli[:metric] = v.downcase.to_sym }
16
+ opt.on(
17
+ '--ref-project',
18
+ 'Report distances to the external reference project used for taxonomy',
19
+ 'By default: report distances to other datasets in the project'
20
+ ) { |v| cli[:external] = v }
21
+ opt.on(
22
+ '-n', '--how-many INT', Integer,
23
+ 'Number of top values to report'
24
+ ) { |v| cli[:how_many] = v }
25
+ opt.on(
26
+ '--tab',
27
+ 'Return a tab-delimited table'
28
+ ) { |v| cli[:tabular] = v }
29
+ opt.on(
30
+ '-o', '--output PATH',
31
+ 'Create output file instead of returning to STDOUT'
32
+ ) { |v| cli[:output] = v }
33
+ end
34
+ end
35
+
36
+ def perform
37
+ cr = []
38
+ cli.load_and_filter_datasets.each do |d|
39
+ d_cr = d.closest_relatives(cli[:how_many], cli[:external], cli[:metric])
40
+ cr += d_cr.map { |i| [d.name] + i }
41
+ end
42
+ io = cli[:output].nil? ? $stdout : File.open(cli[:output], 'w')
43
+ cli.table(['dataset_A', 'dataset_B', cli[:metric]], cr, io)
44
+ end
45
+ end
@@ -23,8 +23,9 @@ module MiGA::Common::Format
23
23
  end
24
24
 
25
25
  ##
26
- # Cleans a FastA file in place.
27
- def clean_fasta_file(file)
26
+ # Cleans a FastA file in place, removing all sequences shorter than
27
+ # +min_len+
28
+ def clean_fasta_file(file, min_len = 1)
28
29
  tmp_fh = nil
29
30
  tmp_path = nil
30
31
  begin
@@ -39,19 +40,24 @@ module MiGA::Common::Format
39
40
  tmp_path = tmp_fh.path
40
41
  fh = File.open(file, 'r')
41
42
  end
42
- buffer = ''.dup
43
+ next_seq = ['', '']
43
44
  fh.each_line do |ln|
44
45
  ln.chomp!
45
46
  if ln =~ /^>\s*(\S+)(.*)/
46
47
  id, df = $1, $2
47
- tmp_fh.print buffer.wrap_width(80)
48
- buffer = ''.dup
49
- tmp_fh.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, '_')}#{df}"
48
+ if next_seq[1].length >= min_len
49
+ tmp_fh.puts next_seq[0]
50
+ tmp_fh.print next_seq[1].wrap_width(80)
51
+ end
52
+ next_seq = [">#{id.gsub(/[^A-Za-z0-9_\|\.]/, '_')}#{df}", '']
50
53
  else
51
- buffer << ln.gsub(/[^A-Za-z\.\-]/, '')
54
+ next_seq[1] += ln.gsub(/[^A-Za-z\.\-]/, '')
52
55
  end
53
56
  end
54
- tmp_fh.print buffer.wrap_width(80)
57
+ if next_seq[1].length >= min_len
58
+ tmp_fh.puts next_seq[0]
59
+ tmp_fh.print next_seq[1].wrap_width(80)
60
+ end
55
61
  tmp_fh.close
56
62
  fh.close
57
63
  FileUtils.mv(tmp_path, file)
@@ -84,7 +84,7 @@ module MiGA::Dataset::Result::Add
84
84
  opts[:is_clean] ||= false
85
85
  r.clean! if opts[:is_clean]
86
86
  unless r.clean?
87
- MiGA::MiGA.clean_fasta_file(r.file_path(:largecontigs))
87
+ MiGA::MiGA.clean_fasta_file(r.file_path(:largecontigs), 1000)
88
88
  r.clean!
89
89
  end
90
90
  r
data/lib/miga/dataset.rb CHANGED
@@ -177,22 +177,23 @@ class MiGA::Dataset < MiGA::MiGA
177
177
  alias is_active? active?
178
178
 
179
179
  ##
180
- # Returns an Array of +how_many+ duples (Arrays) sorted by AAI:
181
- # - +0+: A String with the name(s) of the reference dataset.
182
- # - +1+: A Float with the AAI.
183
- # This function is currently only supported for query datasets when
184
- # +ref_project+ is false (default), and only for reference dataset when
185
- # +ref_project+ is true. It returns +nil+ if this analysis is not supported.
186
- def closest_relatives(how_many = 1, ref_project = false)
187
- return nil if (ref? != ref_project) || multi?
180
+ # Returns an Array of +how_many+ duples (Arrays) sorted by +metric+
181
+ # (one of +:aai+ or +:ani+):
182
+ # - +0+: A String with the name(s) of the reference dataset
183
+ # - +1+: A Float with the AAI/ANI
184
+ def closest_relatives(how_many = 1, ref_project = false, metric = :aai)
185
+ return nil if multi?
188
186
 
189
187
  r = result(ref_project ? :taxonomy : :distances)
190
188
  return nil if r.nil?
191
189
 
192
190
  require 'miga/sqlite'
193
- MiGA::SQLite.new(r.file_path(:aai_db)).run(
194
- 'SELECT seq2, aai FROM aai WHERE seq2 != ? ' \
195
- 'GROUP BY seq2 ORDER BY aai DESC LIMIT ?', [name, how_many]
191
+ metric = metric.to_s.downcase.to_sym
192
+ db_key = :"#{metric}_db"
193
+ MiGA::SQLite.new(r.file_path(db_key)).run(
194
+ "SELECT seq2, #{metric} FROM #{metric} WHERE seq2 != ? " \
195
+ "GROUP BY seq2 ORDER BY #{metric} DESC LIMIT ?",
196
+ [name, how_many]
196
197
  )
197
198
  end
198
199
  end
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.3, 18, 0].freeze
15
+ VERSION = [1.3, 20, 0].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2024, 7, 12)
23
+ VERSION_DATE = Date.new(2024, 7, 23)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -58,7 +58,7 @@ if [[ -s "$DATASET/scaffold.fa" ]] ; then
58
58
  else
59
59
  ln -s "$DATASET/contig.fa" "$DATASET.AllContigs.fna"
60
60
  fi
61
- FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
61
+ FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2 >= 1000 { print $1 }' \
62
62
  | FastA.filter.pl /dev/stdin "$DATASET.AllContigs.fna" \
63
63
  > "$DATASET.LargeContigs.fna"
64
64
 
@@ -83,7 +83,7 @@ class ResultStatsTest < Test::Unit::TestCase
83
83
  # Prepare result
84
84
  dir = 'data/05.assembly'
85
85
  fa = file_path(dir, '.LargeContigs.fna')
86
- File.open(fa, 'w') { |fh| fh.puts '>1', 'ACTAC' }
86
+ File.open(fa, 'w') { |fh| fh.puts('>1', 'ACTAC' * 500) }
87
87
  touch_done(dir)
88
88
  r = dataset.add_result(:assembly)
89
89
 
@@ -91,15 +91,29 @@ class ResultStatsTest < Test::Unit::TestCase
91
91
  assert_equal({}, r[:stats])
92
92
  r.compute_stats
93
93
  assert_equal(1, r[:stats][:contigs])
94
- assert_equal([5, 'bp'], r[:stats][:total_length])
94
+ assert_equal([2500, 'bp'], r[:stats][:total_length])
95
95
  assert_equal([40.0, '%'], r[:stats][:g_c_content])
96
96
  end
97
97
 
98
+ def test_large_contigs
99
+ # Prepare result
100
+ dir = 'data/05.assembly'
101
+ fa = file_path(dir, '.LargeContigs.fna')
102
+ File.open(fa, 'w') { |fh| fh.puts('>1', 'ACTAC' * 50) }
103
+ touch_done(dir)
104
+ r = dataset.add_result(:assembly)
105
+
106
+ # Test assertions
107
+ assert_equal({}, r[:stats])
108
+ r.compute_stats
109
+ assert_equal(0, r[:stats][:contigs])
110
+ end
111
+
98
112
  def test_cds
99
113
  # Prepare result
100
114
  dir = 'data/06.cds'
101
115
  fa = file_path(dir, '.faa')
102
- File.open(fa, 'w') { |fh| fh.puts '>1', 'M' }
116
+ File.open(fa, 'w') { |fh| fh.puts('>1', 'M' * 500) }
103
117
  gff = file_path(dir, '.gff3.gz')
104
118
  Zlib::GzipWriter.open(gff) do |fh|
105
119
  fh.puts '# Model Data: a=b;transl_table=11;'
@@ -111,7 +125,7 @@ class ResultStatsTest < Test::Unit::TestCase
111
125
  assert_equal({}, r[:stats])
112
126
  r.compute_stats
113
127
  assert_equal(1, r[:stats][:predicted_proteins])
114
- assert_equal([1.0, 'aa'], r[:stats][:average_length])
128
+ assert_equal([500.0, 'aa'], r[:stats][:average_length])
115
129
  assert_nil(r[:stats][:coding_density])
116
130
  test_assembly
117
131
  r.compute_stats
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.18.0
4
+ version: 1.3.20.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-12 00:00:00.000000000 Z
11
+ date: 2024-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -212,6 +212,7 @@ files:
212
212
  - lib/miga/cli/action/option.rb
213
213
  - lib/miga/cli/action/preproc_wf.rb
214
214
  - lib/miga/cli/action/quality_wf.rb
215
+ - lib/miga/cli/action/relatives.rb
215
216
  - lib/miga/cli/action/rm.rb
216
217
  - lib/miga/cli/action/run.rb
217
218
  - lib/miga/cli/action/seqcode_get.rb