miga-base 1.3.18.0 → 1.3.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/common/format.rb +14 -8
- data/lib/miga/dataset/result/add.rb +1 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/assembly.bash +1 -1
- data/test/result_stats_test.rb +18 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17006d8dcfa4831924a52e750945a9f7dd37aab9c25db9f46e8b679724fefe18
|
4
|
+
data.tar.gz: 5d06af895cd42a710c0a2fbf72a415858fde6efac771b8c9dac124ca7b35b228
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f128a71a9237728e2740e2066513f398c2c34ba067fc7d7cec3ccb06a397c6f41eedcdb4ea653c416f60de5fc43690419921c64d01301f1206ca56f07c62576
|
7
|
+
data.tar.gz: 6cd59ea9390942e428d060ae390fbf36fc7ad959c5bb9281577cbd85cf4ba4cf5df568b72fdce3dd8a6c92e69d23099d39f4375d435f2473fcf5806a2d7f6554
|
data/lib/miga/common/format.rb
CHANGED
@@ -23,8 +23,9 @@ module MiGA::Common::Format
|
|
23
23
|
end
|
24
24
|
|
25
25
|
##
|
26
|
-
# Cleans a FastA file in place
|
27
|
-
|
26
|
+
# Cleans a FastA file in place, removing all sequences shorter than
|
27
|
+
# +min_len+
|
28
|
+
def clean_fasta_file(file, min_len = 1)
|
28
29
|
tmp_fh = nil
|
29
30
|
tmp_path = nil
|
30
31
|
begin
|
@@ -39,19 +40,24 @@ module MiGA::Common::Format
|
|
39
40
|
tmp_path = tmp_fh.path
|
40
41
|
fh = File.open(file, 'r')
|
41
42
|
end
|
42
|
-
|
43
|
+
next_seq = ['', '']
|
43
44
|
fh.each_line do |ln|
|
44
45
|
ln.chomp!
|
45
46
|
if ln =~ /^>\s*(\S+)(.*)/
|
46
47
|
id, df = $1, $2
|
47
|
-
|
48
|
-
|
49
|
-
|
48
|
+
if next_seq[1].length >= min_len
|
49
|
+
tmp_fh.puts next_seq[0]
|
50
|
+
tmp_fh.print next_seq[1].wrap_width(80)
|
51
|
+
end
|
52
|
+
next_seq = [">#{id.gsub(/[^A-Za-z0-9_\|\.]/, '_')}#{df}", '']
|
50
53
|
else
|
51
|
-
|
54
|
+
next_seq[1] += ln.gsub(/[^A-Za-z\.\-]/, '')
|
52
55
|
end
|
53
56
|
end
|
54
|
-
|
57
|
+
if next_seq[1].length >= min_len
|
58
|
+
tmp_fh.puts next_seq[0]
|
59
|
+
tmp_fh.print next_seq[1].wrap_width(80)
|
60
|
+
end
|
55
61
|
tmp_fh.close
|
56
62
|
fh.close
|
57
63
|
FileUtils.mv(tmp_path, file)
|
@@ -84,7 +84,7 @@ module MiGA::Dataset::Result::Add
|
|
84
84
|
opts[:is_clean] ||= false
|
85
85
|
r.clean! if opts[:is_clean]
|
86
86
|
unless r.clean?
|
87
|
-
MiGA::MiGA.clean_fasta_file(r.file_path(:largecontigs))
|
87
|
+
MiGA::MiGA.clean_fasta_file(r.file_path(:largecontigs), 1000)
|
88
88
|
r.clean!
|
89
89
|
end
|
90
90
|
r
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3,
|
15
|
+
VERSION = [1.3, 19, 0].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2024, 7,
|
23
|
+
VERSION_DATE = Date.new(2024, 7, 17)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
data/scripts/assembly.bash
CHANGED
@@ -58,7 +58,7 @@ if [[ -s "$DATASET/scaffold.fa" ]] ; then
|
|
58
58
|
else
|
59
59
|
ln -s "$DATASET/contig.fa" "$DATASET.AllContigs.fna"
|
60
60
|
fi
|
61
|
-
FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
|
61
|
+
FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2 >= 1000 { print $1 }' \
|
62
62
|
| FastA.filter.pl /dev/stdin "$DATASET.AllContigs.fna" \
|
63
63
|
> "$DATASET.LargeContigs.fna"
|
64
64
|
|
data/test/result_stats_test.rb
CHANGED
@@ -83,7 +83,7 @@ class ResultStatsTest < Test::Unit::TestCase
|
|
83
83
|
# Prepare result
|
84
84
|
dir = 'data/05.assembly'
|
85
85
|
fa = file_path(dir, '.LargeContigs.fna')
|
86
|
-
File.open(fa, 'w') { |fh| fh.puts
|
86
|
+
File.open(fa, 'w') { |fh| fh.puts('>1', 'ACTAC' * 500) }
|
87
87
|
touch_done(dir)
|
88
88
|
r = dataset.add_result(:assembly)
|
89
89
|
|
@@ -91,15 +91,29 @@ class ResultStatsTest < Test::Unit::TestCase
|
|
91
91
|
assert_equal({}, r[:stats])
|
92
92
|
r.compute_stats
|
93
93
|
assert_equal(1, r[:stats][:contigs])
|
94
|
-
assert_equal([
|
94
|
+
assert_equal([2500, 'bp'], r[:stats][:total_length])
|
95
95
|
assert_equal([40.0, '%'], r[:stats][:g_c_content])
|
96
96
|
end
|
97
97
|
|
98
|
+
def test_large_contigs
|
99
|
+
# Prepare result
|
100
|
+
dir = 'data/05.assembly'
|
101
|
+
fa = file_path(dir, '.LargeContigs.fna')
|
102
|
+
File.open(fa, 'w') { |fh| fh.puts('>1', 'ACTAC' * 50) }
|
103
|
+
touch_done(dir)
|
104
|
+
r = dataset.add_result(:assembly)
|
105
|
+
|
106
|
+
# Test assertions
|
107
|
+
assert_equal({}, r[:stats])
|
108
|
+
r.compute_stats
|
109
|
+
assert_equal(0, r[:stats][:contigs])
|
110
|
+
end
|
111
|
+
|
98
112
|
def test_cds
|
99
113
|
# Prepare result
|
100
114
|
dir = 'data/06.cds'
|
101
115
|
fa = file_path(dir, '.faa')
|
102
|
-
File.open(fa, 'w') { |fh| fh.puts
|
116
|
+
File.open(fa, 'w') { |fh| fh.puts('>1', 'M' * 500) }
|
103
117
|
gff = file_path(dir, '.gff3.gz')
|
104
118
|
Zlib::GzipWriter.open(gff) do |fh|
|
105
119
|
fh.puts '# Model Data: a=b;transl_table=11;'
|
@@ -111,7 +125,7 @@ class ResultStatsTest < Test::Unit::TestCase
|
|
111
125
|
assert_equal({}, r[:stats])
|
112
126
|
r.compute_stats
|
113
127
|
assert_equal(1, r[:stats][:predicted_proteins])
|
114
|
-
assert_equal([
|
128
|
+
assert_equal([500.0, 'aa'], r[:stats][:average_length])
|
115
129
|
assert_nil(r[:stats][:coding_density])
|
116
130
|
test_assembly
|
117
131
|
r.compute_stats
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.19.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-07-
|
11
|
+
date: 2024-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|