miga-base 0.2.4.2 → 0.2.4.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0568af20da0be58fdf630b68d78851117f5be7fd
4
- data.tar.gz: 3941417241eff3bb78965b95a415497ffc09109b
3
+ metadata.gz: fdd460c0009e55ffc82547e7d31f94789a874442
4
+ data.tar.gz: c3e560fb2e8871ac586ef8b1fd64c2b1b2076471
5
5
  SHA512:
6
- metadata.gz: d15f8cb3bec1a804fe09901f52d9d1aedd557136675ea2e9edead1090c0752bde92d36de1414a5d84df799c105afe852eb0d7e4afe3cfb122e3023353333c654
7
- data.tar.gz: 1d0527e5f4942560d7efea9cfa17c5f95979ae2b342aca954d859d021fdf7c31d46055065860384706adb05b37712cd0a60d127d90080ce25ba17d1797287574
6
+ metadata.gz: c77bb9fc35f046a2ebc1c1ddc457f19c4accafe06958b43d42a3654f91a5c19dcf92cc3624f9b30b950542d736787dc69f2e8e9ba88141d20923823da0812f46
7
+ data.tar.gz: ecee7c4611a74fd16115260e706ea19315ccd639070482010f0bf5c4f669fe30fe9dea755b912053136366831f715cf346f2a32ed38dbee1ea69442525f9e080
data/lib/miga/common.rb CHANGED
@@ -3,6 +3,7 @@
3
3
 
4
4
  require "miga/version"
5
5
  require "json"
6
+ require "tempfile"
6
7
 
7
8
  ##
8
9
  # Generic class used to handle system-wide information and methods, and parent
@@ -75,6 +76,35 @@ class MiGA::MiGA
75
76
  end
76
77
  end
77
78
 
79
+ ##
80
+ # Cleans a FastA file in place.
81
+ def self.clean_fasta_file(file)
82
+ tmp = Tempfile.new("MiGA")
83
+ begin
84
+ File.open(file, "r") do |fh|
85
+ buffer = ""
86
+ fh.each_line do |ln|
87
+ ln.chomp!
88
+ if ln =~ /^>\s*(\S+)(.*)/
89
+ (id, df) = [$1, $2]
90
+ tmp.print buffer.wrap_width(80)
91
+ buffer = ""
92
+ tmp.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, "_")}#{df}"
93
+ else
94
+ buffer += ln.gsub(/[^A-Za-z\.\-]/, "")
95
+ end
96
+ end
97
+ tmp.print buffer.wrap_width(80)
98
+ end
99
+ tmp.close
100
+ FileUtils.cp(tmp.path, file)
101
+ ensure
102
+ tmp.close
103
+ tmp.unlink
104
+ end
105
+ end
106
+
107
+
78
108
  ##
79
109
  # Check if the result files exist with +base+ name (String) followed by the
80
110
  # +ext+ values (Array of String).
@@ -129,4 +159,8 @@ class String
129
159
  # Replace underscores by spaces.
130
160
  def unmiga_name ; tr("_", " ") ; end
131
161
 
162
+ ##
163
+ # Wraps the string with fixed Integer +width+.
164
+ def wrap_width(width) ; gsub(/([^\n\r]{1,#{width}})/,"\\1\n") ; end
165
+
132
166
  end
@@ -10,7 +10,7 @@ module MiGA::DatasetResult
10
10
  def add_result_raw_reads(base)
11
11
  return nil unless result_files_exist?(base, ".1.fastq")
12
12
  r = MiGA::Result.new(base + ".json")
13
- add_files_to_ds_result(r, name,
13
+ r = add_files_to_ds_result(r, name,
14
14
  ( result_files_exist?(base, ".2.fastq") ?
15
15
  {:pair1=>".1.fastq", :pair2=>".2.fastq"} :
16
16
  {:single=>".1.fastq"} ))
@@ -62,6 +62,10 @@ module MiGA::DatasetResult
62
62
  r = MiGA::Result.new(base + ".json")
63
63
  r = add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
64
64
  :allcontigs=>".AllContigs.fna", :assembly_data=>""})
65
+ unless r.clean?
66
+ MiGA::MiGA.clean_fasta_file(r.file_path :largecontigs)
67
+ r.clean!
68
+ end
65
69
  add_result(:trimmed_fasta) #-> Post interposing
66
70
  r
67
71
  end
@@ -71,8 +75,14 @@ module MiGA::DatasetResult
71
75
  def add_result_cds(base)
72
76
  return nil unless result_files_exist?(base, %w[.faa .fna])
73
77
  r = MiGA::Result.new(base + ".json")
74
- add_files_to_ds_result(r, name, {:proteins=>".faa", :genes=>".fna",
78
+ r = add_files_to_ds_result(r, name, {:proteins=>".faa", :genes=>".fna",
75
79
  :gff2=>".gff2", :gff3=>".gff3", :tab=>".tab"})
80
+ unless r.clean?
81
+ MiGA::MiGA.clean_fasta_file(r.file_path :proteins)
82
+ MiGA::MiGA.clean_fasta_file(r.file_path :genes)
83
+ r.clean!
84
+ end
85
+ r
76
86
  end
77
87
 
78
88
  ##
@@ -80,7 +90,7 @@ module MiGA::DatasetResult
80
90
  def add_result_essential_genes(base)
81
91
  return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
82
92
  r = MiGA::Result.new(base + ".json")
83
- add_files_to_ds_result(r, name, {:ess_genes=>".ess.faa",
93
+ r = add_files_to_ds_result(r, name, {:ess_genes=>".ess.faa",
84
94
  :collection=>".ess", :report=>".ess/log"})
85
95
  end
86
96
 
@@ -90,8 +100,13 @@ module MiGA::DatasetResult
90
100
  return MiGA::Result.new(base + ".json") if result(:assembly).nil?
91
101
  return nil unless result_files_exist?(base, ".ssu.fa")
92
102
  r = MiGA::Result.new(base + ".json")
93
- add_files_to_ds_result(r, name, {:longest_ssu_gene=>".ssu.fa",
103
+ r = add_files_to_ds_result(r, name, {:longest_ssu_gene=>".ssu.fa",
94
104
  :gff=>".ssu.gff", :all_ssu_genes=>".ssu.all.fa"})
105
+ unless r.clean?
106
+ MiGA::MiGA.clean_fasta_file(r.file_path :longest_ssu_gene)
107
+ r.clean!
108
+ end
109
+ r
95
110
  end
96
111
 
97
112
  ##
@@ -103,7 +118,7 @@ module MiGA::DatasetResult
103
118
  add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :blast=>".blast",
104
119
  :mytaxain=>".mytaxain"})
105
120
  else
106
- MiGA::Result.new base + ".json"
121
+ MiGA::Result.new(base + ".json")
107
122
  end
108
123
  end
109
124
 
@@ -140,7 +155,7 @@ module MiGA::DatasetResult
140
155
  ##
141
156
  # Add result type +:stats+ at +base+.
142
157
  def add_result_stats(base)
143
- MiGA::Result.new(base + ".json")
158
+ MiGA::Result.new "#{base}.json"
144
159
  end
145
160
 
146
161
  ##
@@ -168,7 +183,7 @@ module MiGA::DatasetResult
168
183
  result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) or
169
184
  result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
170
185
  r = MiGA::Result.new(base + ".json")
171
- add_files_to_ds_result(r, name, {
186
+ r = add_files_to_ds_result(r, name, {
172
187
  :aai_medoids=>".aai-medoids.tsv",
173
188
  :haai_db=>".haai.db", :aai_db=>".aai.db",
174
189
  :ani_medoids=>".ani-medoids.tsv", :ani_db=>".ani.db"})
data/lib/miga/project.rb CHANGED
@@ -272,7 +272,7 @@ class MiGA::Project < MiGA::MiGA
272
272
  return MiGA::Result.load(base + ".json") unless save
273
273
  return nil unless result_files_exist?(base, ".done")
274
274
  r = send("add_result_#{name}", base)
275
- r.save
275
+ r.save unless r.nil?
276
276
  r
277
277
  end
278
278
 
@@ -20,8 +20,7 @@ module MiGA::ProjectResult
20
20
  end
21
21
 
22
22
  def add_result_clade_finding(base)
23
- return nil unless result_files_exist?(base,
24
- %w[.proposed-clades])
23
+ return nil unless result_files_exist?(base, %w[.proposed-clades])
25
24
  return nil unless is_clade? or result_files_exist?(base,
26
25
  %w[.pdf .classif .medoids .class.tsv .class.nwk])
27
26
  r = add_result_iter_clades(base)
data/lib/miga/result.rb CHANGED
@@ -41,6 +41,14 @@ class MiGA::Result < MiGA::MiGA
41
41
  @path = path
42
42
  MiGA::Result.exist?(path) ? self.load : create
43
43
  end
44
+
45
+ ##
46
+ # Is the result clean? Returns Boolean.
47
+ def clean? ; !! self[:clean] ; end
48
+
49
+ ##
50
+ # Register the result as cleaned.
51
+ def clean! ; self[:clean] = true ; end
44
52
 
45
53
  ##
46
54
  # Directory containing the result.
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.2, 4, 2]
13
+ VERSION = [0.2, 4, 3]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2017, 3, 24)
21
+ VERSION_DATE = Date.new(2017, 3, 28)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
data/test/project_test.rb CHANGED
@@ -75,4 +75,60 @@ class ProjectTest < Test::Unit::TestCase
75
75
  File.expand_path("metadata/#{d1.name}.json", p2.path)))
76
76
  end
77
77
 
78
+ def test_add_result
79
+ p1 = $p1
80
+ assert_nil(p1.add_result(:doom))
81
+ %w[.Rdata .log .txt .done].each do |x|
82
+ assert_nil(p1.add_result(:haai_distances))
83
+ FileUtils.touch(
84
+ File.expand_path("data/09.distances/01.haai/miga-project#{x}",p1.path))
85
+ end
86
+ assert_equal(MiGA::Result, p1.add_result(:haai_distances).class)
87
+ end
88
+
89
+ def test_preprocessing
90
+ p1 = $p1
91
+ assert(p1.done_preprocessing?)
92
+ d1 = p1.add_dataset("BAH")
93
+ assert(! p1.done_preprocessing?)
94
+ FileUtils.touch(File.expand_path("data/90.stats/#{d1.name}.done", p1.path))
95
+ assert(p1.done_preprocessing?)
96
+ # Distances
97
+ [:haai_distances, :aai_distances, :ani_distances].each do |r|
98
+ assert_equal(Symbol, p1.next_distances.class)
99
+ d = MiGA::Project.RESULT_DIRS[r]
100
+ %w[.done .Rdata .log .txt].each do |x|
101
+ assert_nil(p1.add_result(r),
102
+ "Premature registration of result #{r} at extension #{x}.")
103
+ FileUtils.touch(File.expand_path("data/#{d}/miga-project#{x}", p1.path))
104
+ end
105
+ assert_equal(MiGA::Result, p1.add_result(r).class,
106
+ "Imposible to add #{r} result.")
107
+ end
108
+ assert_equal(:clade_finding, p1.next_distances)
109
+
110
+ # Clades
111
+ assert_nil(p1.next_inclade)
112
+ p1.metadata[:type] = :clade
113
+ res = [
114
+ [:clade_finding,
115
+ %w[.pdf .classif .medoids .class.tsv .class.nwk .proposed-clades]],
116
+ [:subclades, %w[.pdf .classif .medoids .class.tsv .class.nwk]],
117
+ [:ogs, %w[.ogs .stats]]
118
+ ]
119
+ res.each do |rr|
120
+ (r, xs) = rr
121
+ d = MiGA::Project.RESULT_DIRS[r]
122
+ assert_equal(Symbol, p1.next_inclade.class)
123
+ ([".done"] + xs).each do |x|
124
+ assert_nil(p1.add_result(r),
125
+ "Premature registration of result #{r} at extension #{x}.")
126
+ FileUtils.touch(File.expand_path("data/#{d}/miga-project#{x}", p1.path))
127
+ end
128
+ assert_equal(MiGA::Result, p1.add_result(r).class,
129
+ "Impossible to add #{r} result.")
130
+ end
131
+ assert_nil(p1.next_inclade)
132
+ end
133
+
78
134
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4.2
4
+ version: 0.2.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-24 00:00:00.000000000 Z
11
+ date: 2017-03-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client