miga-base 1.3.18.0 → 1.3.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bf9ebc2ca06846d9b76c981c2a89896080690bc44d979ba28b4a9173f7386c7a
4
- data.tar.gz: a156a4f0894f1988862b8daf296b916c80cf197eb36fb7c13c4698975403c35e
3
+ metadata.gz: c63c60b5324c31420fa616747435b2e33e6fc7d0855cab55381f139789ccedc1
4
+ data.tar.gz: 5cbbd4de1f66b452ea72e4dface8617606aec919af08aaddea1a0f05e745873a
5
5
  SHA512:
6
- metadata.gz: b6e1318553d24d93af467670754e86e5281c7418d4242ad11b56125889188fb7cde48a1858f20e1b07bab04a372c4aac718e04c005bdfadccf641c15cd7cf2ed
7
- data.tar.gz: e584f1b4442a099fb16c4b0c07515b9332867c5b72438bae9e33467f9d0911c7bd4811a6861121482cada1b88f44aaec2efd6d2f668fa6a05c722c9fe56995bc
6
+ metadata.gz: 57c3abcf3030051fb6d9839b58f0f9bfecd75aff18d8deb1f0b9d7d5244d6103b9764de844409d5dbec2ffffcffadaeff0b4a804419d21e8a74e09caf5ee7f73
7
+ data.tar.gz: bef9383f052d1331b0154b61d7c9e8dba07194775e5d05aea110771c26597fd6549ed4884d9a4c92c8484952c8f26d33f924281f56957e01d3110cb81c3d2d98
@@ -0,0 +1,45 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::Relatives < MiGA::Cli::Action
7
+ def parse_cli
8
+ cli.defaults = { metric: :aai, external: false, how_many: 5 }
9
+ cli.parse do |opt|
10
+ cli.opt_object(opt, [:project, :dataset_opt])
11
+ cli.opt_filter_datasets(opt)
12
+ opt.on(
13
+ '--metric STRING',
14
+ 'Use this metric of identity, one of ANI or AAI (default)'
15
+ ) { |v| cli[:metric] = v.downcase.to_sym }
16
+ opt.on(
17
+ '--ref-project',
18
+ 'Report distances to the external reference project used for taxonomy',
19
+ 'By default: report distances to other datasets in the project'
20
+ ) { |v| cli[:external] = v }
21
+ opt.on(
22
+ '-n', '--how-many INT', Integer,
23
+ 'Number of top values to report'
24
+ ) { |v| cli[:how_many] = v }
25
+ opt.on(
26
+ '--tab',
27
+ 'Return a tab-delimited table'
28
+ ) { |v| cli[:tabular] = v }
29
+ opt.on(
30
+ '-o', '--output PATH',
31
+ 'Create output file instead of returning to STDOUT'
32
+ ) { |v| cli[:output] = v }
33
+ end
34
+ end
35
+
36
+ def perform
37
+ cr = []
38
+ cli.load_and_filter_datasets.each do |d|
39
+ d_cr = d.closest_relatives(cli[:how_many], cli[:external], cli[:metric])
40
+ cr += d_cr.map { |i| [d.name] + i }
41
+ end
42
+ io = cli[:output].nil? ? $stdout : File.open(cli[:output], 'w')
43
+ cli.table(['dataset_A', 'dataset_B', cli[:metric]], cr, io)
44
+ end
45
+ end
@@ -23,8 +23,9 @@ module MiGA::Common::Format
23
23
  end
24
24
 
25
25
  ##
26
- # Cleans a FastA file in place.
27
- def clean_fasta_file(file)
26
+ # Cleans a FastA file in place, removing all sequences shorter than
27
+ # +min_len+
28
+ def clean_fasta_file(file, min_len = 1)
28
29
  tmp_fh = nil
29
30
  tmp_path = nil
30
31
  begin
@@ -39,19 +40,24 @@ module MiGA::Common::Format
39
40
  tmp_path = tmp_fh.path
40
41
  fh = File.open(file, 'r')
41
42
  end
42
- buffer = ''.dup
43
+ next_seq = ['', '']
43
44
  fh.each_line do |ln|
44
45
  ln.chomp!
45
46
  if ln =~ /^>\s*(\S+)(.*)/
46
47
  id, df = $1, $2
47
- tmp_fh.print buffer.wrap_width(80)
48
- buffer = ''.dup
49
- tmp_fh.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, '_')}#{df}"
48
+ if next_seq[1].length >= min_len
49
+ tmp_fh.puts next_seq[0]
50
+ tmp_fh.print next_seq[1].wrap_width(80)
51
+ end
52
+ next_seq = [">#{id.gsub(/[^A-Za-z0-9_\|\.]/, '_')}#{df}", '']
50
53
  else
51
- buffer << ln.gsub(/[^A-Za-z\.\-]/, '')
54
+ next_seq[1] += ln.gsub(/[^A-Za-z\.\-]/, '')
52
55
  end
53
56
  end
54
- tmp_fh.print buffer.wrap_width(80)
57
+ if next_seq[1].length >= min_len
58
+ tmp_fh.puts next_seq[0]
59
+ tmp_fh.print next_seq[1].wrap_width(80)
60
+ end
55
61
  tmp_fh.close
56
62
  fh.close
57
63
  FileUtils.mv(tmp_path, file)
@@ -84,7 +84,7 @@ module MiGA::Dataset::Result::Add
84
84
  opts[:is_clean] ||= false
85
85
  r.clean! if opts[:is_clean]
86
86
  unless r.clean?
87
- MiGA::MiGA.clean_fasta_file(r.file_path(:largecontigs))
87
+ MiGA::MiGA.clean_fasta_file(r.file_path(:largecontigs), 1000)
88
88
  r.clean!
89
89
  end
90
90
  r
data/lib/miga/dataset.rb CHANGED
@@ -177,22 +177,23 @@ class MiGA::Dataset < MiGA::MiGA
177
177
  alias is_active? active?
178
178
 
179
179
  ##
180
- # Returns an Array of +how_many+ duples (Arrays) sorted by AAI:
181
- # - +0+: A String with the name(s) of the reference dataset.
182
- # - +1+: A Float with the AAI.
183
- # This function is currently only supported for query datasets when
184
- # +ref_project+ is false (default), and only for reference dataset when
185
- # +ref_project+ is true. It returns +nil+ if this analysis is not supported.
186
- def closest_relatives(how_many = 1, ref_project = false)
187
- return nil if (ref? != ref_project) || multi?
180
+ # Returns an Array of +how_many+ duples (Arrays) sorted by +metric+
181
+ # (one of +:aai+ or +:ani+):
182
+ # - +0+: A String with the name(s) of the reference dataset
183
+ # - +1+: A Float with the AAI/ANI
184
+ def closest_relatives(how_many = 1, ref_project = false, metric = :aai)
185
+ return nil if multi?
188
186
 
189
187
  r = result(ref_project ? :taxonomy : :distances)
190
188
  return nil if r.nil?
191
189
 
192
190
  require 'miga/sqlite'
193
- MiGA::SQLite.new(r.file_path(:aai_db)).run(
194
- 'SELECT seq2, aai FROM aai WHERE seq2 != ? ' \
195
- 'GROUP BY seq2 ORDER BY aai DESC LIMIT ?', [name, how_many]
191
+ metric = metric.to_s.downcase.to_sym
192
+ db_key = :"#{metric}_db"
193
+ MiGA::SQLite.new(r.file_path(db_key)).run(
194
+ "SELECT seq2, #{metric} FROM #{metric} WHERE seq2 != ? " \
195
+ "GROUP BY seq2 ORDER BY #{metric} DESC LIMIT ?",
196
+ [name, how_many]
196
197
  )
197
198
  end
198
199
  end
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.3, 18, 0].freeze
15
+ VERSION = [1.3, 20, 0].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2024, 7, 12)
23
+ VERSION_DATE = Date.new(2024, 7, 23)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -58,7 +58,7 @@ if [[ -s "$DATASET/scaffold.fa" ]] ; then
58
58
  else
59
59
  ln -s "$DATASET/contig.fa" "$DATASET.AllContigs.fna"
60
60
  fi
61
- FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
61
+ FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2 >= 1000 { print $1 }' \
62
62
  | FastA.filter.pl /dev/stdin "$DATASET.AllContigs.fna" \
63
63
  > "$DATASET.LargeContigs.fna"
64
64
 
@@ -83,7 +83,7 @@ class ResultStatsTest < Test::Unit::TestCase
83
83
  # Prepare result
84
84
  dir = 'data/05.assembly'
85
85
  fa = file_path(dir, '.LargeContigs.fna')
86
- File.open(fa, 'w') { |fh| fh.puts '>1', 'ACTAC' }
86
+ File.open(fa, 'w') { |fh| fh.puts('>1', 'ACTAC' * 500) }
87
87
  touch_done(dir)
88
88
  r = dataset.add_result(:assembly)
89
89
 
@@ -91,15 +91,29 @@ class ResultStatsTest < Test::Unit::TestCase
91
91
  assert_equal({}, r[:stats])
92
92
  r.compute_stats
93
93
  assert_equal(1, r[:stats][:contigs])
94
- assert_equal([5, 'bp'], r[:stats][:total_length])
94
+ assert_equal([2500, 'bp'], r[:stats][:total_length])
95
95
  assert_equal([40.0, '%'], r[:stats][:g_c_content])
96
96
  end
97
97
 
98
+ def test_large_contigs
99
+ # Prepare result
100
+ dir = 'data/05.assembly'
101
+ fa = file_path(dir, '.LargeContigs.fna')
102
+ File.open(fa, 'w') { |fh| fh.puts('>1', 'ACTAC' * 50) }
103
+ touch_done(dir)
104
+ r = dataset.add_result(:assembly)
105
+
106
+ # Test assertions
107
+ assert_equal({}, r[:stats])
108
+ r.compute_stats
109
+ assert_equal(0, r[:stats][:contigs])
110
+ end
111
+
98
112
  def test_cds
99
113
  # Prepare result
100
114
  dir = 'data/06.cds'
101
115
  fa = file_path(dir, '.faa')
102
- File.open(fa, 'w') { |fh| fh.puts '>1', 'M' }
116
+ File.open(fa, 'w') { |fh| fh.puts('>1', 'M' * 500) }
103
117
  gff = file_path(dir, '.gff3.gz')
104
118
  Zlib::GzipWriter.open(gff) do |fh|
105
119
  fh.puts '# Model Data: a=b;transl_table=11;'
@@ -111,7 +125,7 @@ class ResultStatsTest < Test::Unit::TestCase
111
125
  assert_equal({}, r[:stats])
112
126
  r.compute_stats
113
127
  assert_equal(1, r[:stats][:predicted_proteins])
114
- assert_equal([1.0, 'aa'], r[:stats][:average_length])
128
+ assert_equal([500.0, 'aa'], r[:stats][:average_length])
115
129
  assert_nil(r[:stats][:coding_density])
116
130
  test_assembly
117
131
  r.compute_stats
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.18.0
4
+ version: 1.3.20.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-12 00:00:00.000000000 Z
11
+ date: 2024-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -212,6 +212,7 @@ files:
212
212
  - lib/miga/cli/action/option.rb
213
213
  - lib/miga/cli/action/preproc_wf.rb
214
214
  - lib/miga/cli/action/quality_wf.rb
215
+ - lib/miga/cli/action/relatives.rb
215
216
  - lib/miga/cli/action/rm.rb
216
217
  - lib/miga/cli/action/run.rb
217
218
  - lib/miga/cli/action/seqcode_get.rb