miga-base 1.3.18.0 → 1.3.19.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bf9ebc2ca06846d9b76c981c2a89896080690bc44d979ba28b4a9173f7386c7a
4
- data.tar.gz: a156a4f0894f1988862b8daf296b916c80cf197eb36fb7c13c4698975403c35e
3
+ metadata.gz: 17006d8dcfa4831924a52e750945a9f7dd37aab9c25db9f46e8b679724fefe18
4
+ data.tar.gz: 5d06af895cd42a710c0a2fbf72a415858fde6efac771b8c9dac124ca7b35b228
5
5
  SHA512:
6
- metadata.gz: b6e1318553d24d93af467670754e86e5281c7418d4242ad11b56125889188fb7cde48a1858f20e1b07bab04a372c4aac718e04c005bdfadccf641c15cd7cf2ed
7
- data.tar.gz: e584f1b4442a099fb16c4b0c07515b9332867c5b72438bae9e33467f9d0911c7bd4811a6861121482cada1b88f44aaec2efd6d2f668fa6a05c722c9fe56995bc
6
+ metadata.gz: 8f128a71a9237728e2740e2066513f398c2c34ba067fc7d7cec3ccb06a397c6f41eedcdb4ea653c416f60de5fc43690419921c64d01301f1206ca56f07c62576
7
+ data.tar.gz: 6cd59ea9390942e428d060ae390fbf36fc7ad959c5bb9281577cbd85cf4ba4cf5df568b72fdce3dd8a6c92e69d23099d39f4375d435f2473fcf5806a2d7f6554
@@ -23,8 +23,9 @@ module MiGA::Common::Format
23
23
  end
24
24
 
25
25
  ##
26
- # Cleans a FastA file in place.
27
- def clean_fasta_file(file)
26
+ # Cleans a FastA file in place, removing all sequences shorter than
27
+ # +min_len+
28
+ def clean_fasta_file(file, min_len = 1)
28
29
  tmp_fh = nil
29
30
  tmp_path = nil
30
31
  begin
@@ -39,19 +40,24 @@ module MiGA::Common::Format
39
40
  tmp_path = tmp_fh.path
40
41
  fh = File.open(file, 'r')
41
42
  end
42
- buffer = ''.dup
43
+ next_seq = ['', '']
43
44
  fh.each_line do |ln|
44
45
  ln.chomp!
45
46
  if ln =~ /^>\s*(\S+)(.*)/
46
47
  id, df = $1, $2
47
- tmp_fh.print buffer.wrap_width(80)
48
- buffer = ''.dup
49
- tmp_fh.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, '_')}#{df}"
48
+ if next_seq[1].length >= min_len
49
+ tmp_fh.puts next_seq[0]
50
+ tmp_fh.print next_seq[1].wrap_width(80)
51
+ end
52
+ next_seq = [">#{id.gsub(/[^A-Za-z0-9_\|\.]/, '_')}#{df}", '']
50
53
  else
51
- buffer << ln.gsub(/[^A-Za-z\.\-]/, '')
54
+ next_seq[1] += ln.gsub(/[^A-Za-z\.\-]/, '')
52
55
  end
53
56
  end
54
- tmp_fh.print buffer.wrap_width(80)
57
+ if next_seq[1].length >= min_len
58
+ tmp_fh.puts next_seq[0]
59
+ tmp_fh.print next_seq[1].wrap_width(80)
60
+ end
55
61
  tmp_fh.close
56
62
  fh.close
57
63
  FileUtils.mv(tmp_path, file)
@@ -84,7 +84,7 @@ module MiGA::Dataset::Result::Add
84
84
  opts[:is_clean] ||= false
85
85
  r.clean! if opts[:is_clean]
86
86
  unless r.clean?
87
- MiGA::MiGA.clean_fasta_file(r.file_path(:largecontigs))
87
+ MiGA::MiGA.clean_fasta_file(r.file_path(:largecontigs), 1000)
88
88
  r.clean!
89
89
  end
90
90
  r
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.3, 18, 0].freeze
15
+ VERSION = [1.3, 19, 0].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2024, 7, 12)
23
+ VERSION_DATE = Date.new(2024, 7, 17)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -58,7 +58,7 @@ if [[ -s "$DATASET/scaffold.fa" ]] ; then
58
58
  else
59
59
  ln -s "$DATASET/contig.fa" "$DATASET.AllContigs.fna"
60
60
  fi
61
- FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
61
+ FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2 >= 1000 { print $1 }' \
62
62
  | FastA.filter.pl /dev/stdin "$DATASET.AllContigs.fna" \
63
63
  > "$DATASET.LargeContigs.fna"
64
64
 
@@ -83,7 +83,7 @@ class ResultStatsTest < Test::Unit::TestCase
83
83
  # Prepare result
84
84
  dir = 'data/05.assembly'
85
85
  fa = file_path(dir, '.LargeContigs.fna')
86
- File.open(fa, 'w') { |fh| fh.puts '>1', 'ACTAC' }
86
+ File.open(fa, 'w') { |fh| fh.puts('>1', 'ACTAC' * 500) }
87
87
  touch_done(dir)
88
88
  r = dataset.add_result(:assembly)
89
89
 
@@ -91,15 +91,29 @@ class ResultStatsTest < Test::Unit::TestCase
91
91
  assert_equal({}, r[:stats])
92
92
  r.compute_stats
93
93
  assert_equal(1, r[:stats][:contigs])
94
- assert_equal([5, 'bp'], r[:stats][:total_length])
94
+ assert_equal([2500, 'bp'], r[:stats][:total_length])
95
95
  assert_equal([40.0, '%'], r[:stats][:g_c_content])
96
96
  end
97
97
 
98
+ def test_large_contigs
99
+ # Prepare result
100
+ dir = 'data/05.assembly'
101
+ fa = file_path(dir, '.LargeContigs.fna')
102
+ File.open(fa, 'w') { |fh| fh.puts('>1', 'ACTAC' * 50) }
103
+ touch_done(dir)
104
+ r = dataset.add_result(:assembly)
105
+
106
+ # Test assertions
107
+ assert_equal({}, r[:stats])
108
+ r.compute_stats
109
+ assert_equal(0, r[:stats][:contigs])
110
+ end
111
+
98
112
  def test_cds
99
113
  # Prepare result
100
114
  dir = 'data/06.cds'
101
115
  fa = file_path(dir, '.faa')
102
- File.open(fa, 'w') { |fh| fh.puts '>1', 'M' }
116
+ File.open(fa, 'w') { |fh| fh.puts('>1', 'M' * 500) }
103
117
  gff = file_path(dir, '.gff3.gz')
104
118
  Zlib::GzipWriter.open(gff) do |fh|
105
119
  fh.puts '# Model Data: a=b;transl_table=11;'
@@ -111,7 +125,7 @@ class ResultStatsTest < Test::Unit::TestCase
111
125
  assert_equal({}, r[:stats])
112
126
  r.compute_stats
113
127
  assert_equal(1, r[:stats][:predicted_proteins])
114
- assert_equal([1.0, 'aa'], r[:stats][:average_length])
128
+ assert_equal([500.0, 'aa'], r[:stats][:average_length])
115
129
  assert_nil(r[:stats][:coding_density])
116
130
  test_assembly
117
131
  r.compute_stats
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.18.0
4
+ version: 1.3.19.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-12 00:00:00.000000000 Z
11
+ date: 2024-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons