miga-base 1.3.18.0 → 1.3.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/relatives.rb +45 -0
- data/lib/miga/common/format.rb +14 -8
- data/lib/miga/dataset/result/add.rb +1 -1
- data/lib/miga/dataset.rb +12 -11
- data/lib/miga/version.rb +2 -2
- data/scripts/assembly.bash +1 -1
- data/test/result_stats_test.rb +18 -4
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c63c60b5324c31420fa616747435b2e33e6fc7d0855cab55381f139789ccedc1
|
4
|
+
data.tar.gz: 5cbbd4de1f66b452ea72e4dface8617606aec919af08aaddea1a0f05e745873a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 57c3abcf3030051fb6d9839b58f0f9bfecd75aff18d8deb1f0b9d7d5244d6103b9764de844409d5dbec2ffffcffadaeff0b4a804419d21e8a74e09caf5ee7f73
|
7
|
+
data.tar.gz: bef9383f052d1331b0154b61d7c9e8dba07194775e5d05aea110771c26597fd6549ed4884d9a4c92c8484952c8f26d33f924281f56957e01d3110cb81c3d2d98
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# @package MiGA
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require 'miga/cli/action'
|
5
|
+
|
6
|
+
class MiGA::Cli::Action::Relatives < MiGA::Cli::Action
|
7
|
+
def parse_cli
|
8
|
+
cli.defaults = { metric: :aai, external: false, how_many: 5 }
|
9
|
+
cli.parse do |opt|
|
10
|
+
cli.opt_object(opt, [:project, :dataset_opt])
|
11
|
+
cli.opt_filter_datasets(opt)
|
12
|
+
opt.on(
|
13
|
+
'--metric STRING',
|
14
|
+
'Use this metric of identity, one of ANI or AAI (default)'
|
15
|
+
) { |v| cli[:metric] = v.downcase.to_sym }
|
16
|
+
opt.on(
|
17
|
+
'--ref-project',
|
18
|
+
'Report distances to the external reference project used for taxonomy',
|
19
|
+
'By default: report distances to other datasets in the project'
|
20
|
+
) { |v| cli[:external] = v }
|
21
|
+
opt.on(
|
22
|
+
'-n', '--how-many INT', Integer,
|
23
|
+
'Number of top values to report'
|
24
|
+
) { |v| cli[:how_many] = v }
|
25
|
+
opt.on(
|
26
|
+
'--tab',
|
27
|
+
'Return a tab-delimited table'
|
28
|
+
) { |v| cli[:tabular] = v }
|
29
|
+
opt.on(
|
30
|
+
'-o', '--output PATH',
|
31
|
+
'Create output file instead of returning to STDOUT'
|
32
|
+
) { |v| cli[:output] = v }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def perform
|
37
|
+
cr = []
|
38
|
+
cli.load_and_filter_datasets.each do |d|
|
39
|
+
d_cr = d.closest_relatives(cli[:how_many], cli[:external], cli[:metric])
|
40
|
+
cr += d_cr.map { |i| [d.name] + i }
|
41
|
+
end
|
42
|
+
io = cli[:output].nil? ? $stdout : File.open(cli[:output], 'w')
|
43
|
+
cli.table(['dataset_A', 'dataset_B', cli[:metric]], cr, io)
|
44
|
+
end
|
45
|
+
end
|
data/lib/miga/common/format.rb
CHANGED
@@ -23,8 +23,9 @@ module MiGA::Common::Format
|
|
23
23
|
end
|
24
24
|
|
25
25
|
##
|
26
|
-
# Cleans a FastA file in place
|
27
|
-
|
26
|
+
# Cleans a FastA file in place, removing all sequences shorter than
|
27
|
+
# +min_len+
|
28
|
+
def clean_fasta_file(file, min_len = 1)
|
28
29
|
tmp_fh = nil
|
29
30
|
tmp_path = nil
|
30
31
|
begin
|
@@ -39,19 +40,24 @@ module MiGA::Common::Format
|
|
39
40
|
tmp_path = tmp_fh.path
|
40
41
|
fh = File.open(file, 'r')
|
41
42
|
end
|
42
|
-
|
43
|
+
next_seq = ['', '']
|
43
44
|
fh.each_line do |ln|
|
44
45
|
ln.chomp!
|
45
46
|
if ln =~ /^>\s*(\S+)(.*)/
|
46
47
|
id, df = $1, $2
|
47
|
-
|
48
|
-
|
49
|
-
|
48
|
+
if next_seq[1].length >= min_len
|
49
|
+
tmp_fh.puts next_seq[0]
|
50
|
+
tmp_fh.print next_seq[1].wrap_width(80)
|
51
|
+
end
|
52
|
+
next_seq = [">#{id.gsub(/[^A-Za-z0-9_\|\.]/, '_')}#{df}", '']
|
50
53
|
else
|
51
|
-
|
54
|
+
next_seq[1] += ln.gsub(/[^A-Za-z\.\-]/, '')
|
52
55
|
end
|
53
56
|
end
|
54
|
-
|
57
|
+
if next_seq[1].length >= min_len
|
58
|
+
tmp_fh.puts next_seq[0]
|
59
|
+
tmp_fh.print next_seq[1].wrap_width(80)
|
60
|
+
end
|
55
61
|
tmp_fh.close
|
56
62
|
fh.close
|
57
63
|
FileUtils.mv(tmp_path, file)
|
@@ -84,7 +84,7 @@ module MiGA::Dataset::Result::Add
|
|
84
84
|
opts[:is_clean] ||= false
|
85
85
|
r.clean! if opts[:is_clean]
|
86
86
|
unless r.clean?
|
87
|
-
MiGA::MiGA.clean_fasta_file(r.file_path(:largecontigs))
|
87
|
+
MiGA::MiGA.clean_fasta_file(r.file_path(:largecontigs), 1000)
|
88
88
|
r.clean!
|
89
89
|
end
|
90
90
|
r
|
data/lib/miga/dataset.rb
CHANGED
@@ -177,22 +177,23 @@ class MiGA::Dataset < MiGA::MiGA
|
|
177
177
|
alias is_active? active?
|
178
178
|
|
179
179
|
##
|
180
|
-
# Returns an Array of +how_many+ duples (Arrays) sorted by
|
181
|
-
#
|
182
|
-
# - +
|
183
|
-
#
|
184
|
-
|
185
|
-
|
186
|
-
def closest_relatives(how_many = 1, ref_project = false)
|
187
|
-
return nil if (ref? != ref_project) || multi?
|
180
|
+
# Returns an Array of +how_many+ duples (Arrays) sorted by +metric+
|
181
|
+
# (one of +:aai+ or +:ani+):
|
182
|
+
# - +0+: A String with the name(s) of the reference dataset
|
183
|
+
# - +1+: A Float with the AAI/ANI
|
184
|
+
def closest_relatives(how_many = 1, ref_project = false, metric = :aai)
|
185
|
+
return nil if multi?
|
188
186
|
|
189
187
|
r = result(ref_project ? :taxonomy : :distances)
|
190
188
|
return nil if r.nil?
|
191
189
|
|
192
190
|
require 'miga/sqlite'
|
193
|
-
|
194
|
-
|
195
|
-
|
191
|
+
metric = metric.to_s.downcase.to_sym
|
192
|
+
db_key = :"#{metric}_db"
|
193
|
+
MiGA::SQLite.new(r.file_path(db_key)).run(
|
194
|
+
"SELECT seq2, #{metric} FROM #{metric} WHERE seq2 != ? " \
|
195
|
+
"GROUP BY seq2 ORDER BY #{metric} DESC LIMIT ?",
|
196
|
+
[name, how_many]
|
196
197
|
)
|
197
198
|
end
|
198
199
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3,
|
15
|
+
VERSION = [1.3, 20, 0].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2024, 7,
|
23
|
+
VERSION_DATE = Date.new(2024, 7, 23)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
data/scripts/assembly.bash
CHANGED
@@ -58,7 +58,7 @@ if [[ -s "$DATASET/scaffold.fa" ]] ; then
|
|
58
58
|
else
|
59
59
|
ln -s "$DATASET/contig.fa" "$DATASET.AllContigs.fna"
|
60
60
|
fi
|
61
|
-
FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
|
61
|
+
FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2 >= 1000 { print $1 }' \
|
62
62
|
| FastA.filter.pl /dev/stdin "$DATASET.AllContigs.fna" \
|
63
63
|
> "$DATASET.LargeContigs.fna"
|
64
64
|
|
data/test/result_stats_test.rb
CHANGED
@@ -83,7 +83,7 @@ class ResultStatsTest < Test::Unit::TestCase
|
|
83
83
|
# Prepare result
|
84
84
|
dir = 'data/05.assembly'
|
85
85
|
fa = file_path(dir, '.LargeContigs.fna')
|
86
|
-
File.open(fa, 'w') { |fh| fh.puts
|
86
|
+
File.open(fa, 'w') { |fh| fh.puts('>1', 'ACTAC' * 500) }
|
87
87
|
touch_done(dir)
|
88
88
|
r = dataset.add_result(:assembly)
|
89
89
|
|
@@ -91,15 +91,29 @@ class ResultStatsTest < Test::Unit::TestCase
|
|
91
91
|
assert_equal({}, r[:stats])
|
92
92
|
r.compute_stats
|
93
93
|
assert_equal(1, r[:stats][:contigs])
|
94
|
-
assert_equal([
|
94
|
+
assert_equal([2500, 'bp'], r[:stats][:total_length])
|
95
95
|
assert_equal([40.0, '%'], r[:stats][:g_c_content])
|
96
96
|
end
|
97
97
|
|
98
|
+
def test_large_contigs
|
99
|
+
# Prepare result
|
100
|
+
dir = 'data/05.assembly'
|
101
|
+
fa = file_path(dir, '.LargeContigs.fna')
|
102
|
+
File.open(fa, 'w') { |fh| fh.puts('>1', 'ACTAC' * 50) }
|
103
|
+
touch_done(dir)
|
104
|
+
r = dataset.add_result(:assembly)
|
105
|
+
|
106
|
+
# Test assertions
|
107
|
+
assert_equal({}, r[:stats])
|
108
|
+
r.compute_stats
|
109
|
+
assert_equal(0, r[:stats][:contigs])
|
110
|
+
end
|
111
|
+
|
98
112
|
def test_cds
|
99
113
|
# Prepare result
|
100
114
|
dir = 'data/06.cds'
|
101
115
|
fa = file_path(dir, '.faa')
|
102
|
-
File.open(fa, 'w') { |fh| fh.puts
|
116
|
+
File.open(fa, 'w') { |fh| fh.puts('>1', 'M' * 500) }
|
103
117
|
gff = file_path(dir, '.gff3.gz')
|
104
118
|
Zlib::GzipWriter.open(gff) do |fh|
|
105
119
|
fh.puts '# Model Data: a=b;transl_table=11;'
|
@@ -111,7 +125,7 @@ class ResultStatsTest < Test::Unit::TestCase
|
|
111
125
|
assert_equal({}, r[:stats])
|
112
126
|
r.compute_stats
|
113
127
|
assert_equal(1, r[:stats][:predicted_proteins])
|
114
|
-
assert_equal([
|
128
|
+
assert_equal([500.0, 'aa'], r[:stats][:average_length])
|
115
129
|
assert_nil(r[:stats][:coding_density])
|
116
130
|
test_assembly
|
117
131
|
r.compute_stats
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.20.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-07-
|
11
|
+
date: 2024-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -212,6 +212,7 @@ files:
|
|
212
212
|
- lib/miga/cli/action/option.rb
|
213
213
|
- lib/miga/cli/action/preproc_wf.rb
|
214
214
|
- lib/miga/cli/action/quality_wf.rb
|
215
|
+
- lib/miga/cli/action/relatives.rb
|
215
216
|
- lib/miga/cli/action/rm.rb
|
216
217
|
- lib/miga/cli/action/run.rb
|
217
218
|
- lib/miga/cli/action/seqcode_get.rb
|