miga-base 1.3.18.0 → 1.3.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bf9ebc2ca06846d9b76c981c2a89896080690bc44d979ba28b4a9173f7386c7a
4
- data.tar.gz: a156a4f0894f1988862b8daf296b916c80cf197eb36fb7c13c4698975403c35e
3
+ metadata.gz: 17006d8dcfa4831924a52e750945a9f7dd37aab9c25db9f46e8b679724fefe18
4
+ data.tar.gz: 5d06af895cd42a710c0a2fbf72a415858fde6efac771b8c9dac124ca7b35b228
5
5
  SHA512:
6
- metadata.gz: b6e1318553d24d93af467670754e86e5281c7418d4242ad11b56125889188fb7cde48a1858f20e1b07bab04a372c4aac718e04c005bdfadccf641c15cd7cf2ed
7
- data.tar.gz: e584f1b4442a099fb16c4b0c07515b9332867c5b72438bae9e33467f9d0911c7bd4811a6861121482cada1b88f44aaec2efd6d2f668fa6a05c722c9fe56995bc
6
+ metadata.gz: 8f128a71a9237728e2740e2066513f398c2c34ba067fc7d7cec3ccb06a397c6f41eedcdb4ea653c416f60de5fc43690419921c64d01301f1206ca56f07c62576
7
+ data.tar.gz: 6cd59ea9390942e428d060ae390fbf36fc7ad959c5bb9281577cbd85cf4ba4cf5df568b72fdce3dd8a6c92e69d23099d39f4375d435f2473fcf5806a2d7f6554
@@ -23,8 +23,9 @@ module MiGA::Common::Format
23
23
  end
24
24
 
25
25
  ##
26
- # Cleans a FastA file in place.
27
- def clean_fasta_file(file)
26
+ # Cleans a FastA file in place, removing all sequences shorter than
27
+ # +min_len+
28
+ def clean_fasta_file(file, min_len = 1)
28
29
  tmp_fh = nil
29
30
  tmp_path = nil
30
31
  begin
@@ -39,19 +40,24 @@ module MiGA::Common::Format
39
40
  tmp_path = tmp_fh.path
40
41
  fh = File.open(file, 'r')
41
42
  end
42
- buffer = ''.dup
43
+ next_seq = ['', '']
43
44
  fh.each_line do |ln|
44
45
  ln.chomp!
45
46
  if ln =~ /^>\s*(\S+)(.*)/
46
47
  id, df = $1, $2
47
- tmp_fh.print buffer.wrap_width(80)
48
- buffer = ''.dup
49
- tmp_fh.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, '_')}#{df}"
48
+ if next_seq[1].length >= min_len
49
+ tmp_fh.puts next_seq[0]
50
+ tmp_fh.print next_seq[1].wrap_width(80)
51
+ end
52
+ next_seq = [">#{id.gsub(/[^A-Za-z0-9_\|\.]/, '_')}#{df}", '']
50
53
  else
51
- buffer << ln.gsub(/[^A-Za-z\.\-]/, '')
54
+ next_seq[1] += ln.gsub(/[^A-Za-z\.\-]/, '')
52
55
  end
53
56
  end
54
- tmp_fh.print buffer.wrap_width(80)
57
+ if next_seq[1].length >= min_len
58
+ tmp_fh.puts next_seq[0]
59
+ tmp_fh.print next_seq[1].wrap_width(80)
60
+ end
55
61
  tmp_fh.close
56
62
  fh.close
57
63
  FileUtils.mv(tmp_path, file)
@@ -84,7 +84,7 @@ module MiGA::Dataset::Result::Add
84
84
  opts[:is_clean] ||= false
85
85
  r.clean! if opts[:is_clean]
86
86
  unless r.clean?
87
- MiGA::MiGA.clean_fasta_file(r.file_path(:largecontigs))
87
+ MiGA::MiGA.clean_fasta_file(r.file_path(:largecontigs), 1000)
88
88
  r.clean!
89
89
  end
90
90
  r
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.3, 18, 0].freeze
15
+ VERSION = [1.3, 19, 0].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2024, 7, 12)
23
+ VERSION_DATE = Date.new(2024, 7, 17)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -58,7 +58,7 @@ if [[ -s "$DATASET/scaffold.fa" ]] ; then
58
58
  else
59
59
  ln -s "$DATASET/contig.fa" "$DATASET.AllContigs.fna"
60
60
  fi
61
- FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
61
+ FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2 >= 1000 { print $1 }' \
62
62
  | FastA.filter.pl /dev/stdin "$DATASET.AllContigs.fna" \
63
63
  > "$DATASET.LargeContigs.fna"
64
64
 
@@ -83,7 +83,7 @@ class ResultStatsTest < Test::Unit::TestCase
83
83
  # Prepare result
84
84
  dir = 'data/05.assembly'
85
85
  fa = file_path(dir, '.LargeContigs.fna')
86
- File.open(fa, 'w') { |fh| fh.puts '>1', 'ACTAC' }
86
+ File.open(fa, 'w') { |fh| fh.puts('>1', 'ACTAC' * 500) }
87
87
  touch_done(dir)
88
88
  r = dataset.add_result(:assembly)
89
89
 
@@ -91,15 +91,29 @@ class ResultStatsTest < Test::Unit::TestCase
91
91
  assert_equal({}, r[:stats])
92
92
  r.compute_stats
93
93
  assert_equal(1, r[:stats][:contigs])
94
- assert_equal([5, 'bp'], r[:stats][:total_length])
94
+ assert_equal([2500, 'bp'], r[:stats][:total_length])
95
95
  assert_equal([40.0, '%'], r[:stats][:g_c_content])
96
96
  end
97
97
 
98
+ def test_large_contigs
99
+ # Prepare result
100
+ dir = 'data/05.assembly'
101
+ fa = file_path(dir, '.LargeContigs.fna')
102
+ File.open(fa, 'w') { |fh| fh.puts('>1', 'ACTAC' * 50) }
103
+ touch_done(dir)
104
+ r = dataset.add_result(:assembly)
105
+
106
+ # Test assertions
107
+ assert_equal({}, r[:stats])
108
+ r.compute_stats
109
+ assert_equal(0, r[:stats][:contigs])
110
+ end
111
+
98
112
  def test_cds
99
113
  # Prepare result
100
114
  dir = 'data/06.cds'
101
115
  fa = file_path(dir, '.faa')
102
- File.open(fa, 'w') { |fh| fh.puts '>1', 'M' }
116
+ File.open(fa, 'w') { |fh| fh.puts('>1', 'M' * 500) }
103
117
  gff = file_path(dir, '.gff3.gz')
104
118
  Zlib::GzipWriter.open(gff) do |fh|
105
119
  fh.puts '# Model Data: a=b;transl_table=11;'
@@ -111,7 +125,7 @@ class ResultStatsTest < Test::Unit::TestCase
111
125
  assert_equal({}, r[:stats])
112
126
  r.compute_stats
113
127
  assert_equal(1, r[:stats][:predicted_proteins])
114
- assert_equal([1.0, 'aa'], r[:stats][:average_length])
128
+ assert_equal([500.0, 'aa'], r[:stats][:average_length])
115
129
  assert_nil(r[:stats][:coding_density])
116
130
  test_assembly
117
131
  r.compute_stats
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.18.0
4
+ version: 1.3.19.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-12 00:00:00.000000000 Z
11
+ date: 2024-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons