miga-base 0.2.4.2 → 0.2.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0568af20da0be58fdf630b68d78851117f5be7fd
4
- data.tar.gz: 3941417241eff3bb78965b95a415497ffc09109b
3
+ metadata.gz: fdd460c0009e55ffc82547e7d31f94789a874442
4
+ data.tar.gz: c3e560fb2e8871ac586ef8b1fd64c2b1b2076471
5
5
  SHA512:
6
- metadata.gz: d15f8cb3bec1a804fe09901f52d9d1aedd557136675ea2e9edead1090c0752bde92d36de1414a5d84df799c105afe852eb0d7e4afe3cfb122e3023353333c654
7
- data.tar.gz: 1d0527e5f4942560d7efea9cfa17c5f95979ae2b342aca954d859d021fdf7c31d46055065860384706adb05b37712cd0a60d127d90080ce25ba17d1797287574
6
+ metadata.gz: c77bb9fc35f046a2ebc1c1ddc457f19c4accafe06958b43d42a3654f91a5c19dcf92cc3624f9b30b950542d736787dc69f2e8e9ba88141d20923823da0812f46
7
+ data.tar.gz: ecee7c4611a74fd16115260e706ea19315ccd639070482010f0bf5c4f669fe30fe9dea755b912053136366831f715cf346f2a32ed38dbee1ea69442525f9e080
data/lib/miga/common.rb CHANGED
@@ -3,6 +3,7 @@
3
3
 
4
4
  require "miga/version"
5
5
  require "json"
6
+ require "tempfile"
6
7
 
7
8
  ##
8
9
  # Generic class used to handle system-wide information and methods, and parent
@@ -75,6 +76,35 @@ class MiGA::MiGA
75
76
  end
76
77
  end
77
78
 
79
+ ##
80
+ # Cleans a FastA file in place.
81
+ def self.clean_fasta_file(file)
82
+ tmp = Tempfile.new("MiGA")
83
+ begin
84
+ File.open(file, "r") do |fh|
85
+ buffer = ""
86
+ fh.each_line do |ln|
87
+ ln.chomp!
88
+ if ln =~ /^>\s*(\S+)(.*)/
89
+ (id, df) = [$1, $2]
90
+ tmp.print buffer.wrap_width(80)
91
+ buffer = ""
92
+ tmp.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, "_")}#{df}"
93
+ else
94
+ buffer += ln.gsub(/[^A-Za-z\.\-]/, "")
95
+ end
96
+ end
97
+ tmp.print buffer.wrap_width(80)
98
+ end
99
+ tmp.close
100
+ FileUtils.cp(tmp.path, file)
101
+ ensure
102
+ tmp.close
103
+ tmp.unlink
104
+ end
105
+ end
106
+
107
+
78
108
  ##
79
109
  # Check if the result files exist with +base+ name (String) followed by the
80
110
  # +ext+ values (Array of String).
@@ -129,4 +159,8 @@ class String
129
159
  # Replace underscores by spaces.
130
160
  def unmiga_name ; tr("_", " ") ; end
131
161
 
162
+ ##
163
+ # Wraps the string with fixed Integer +width+.
164
+ def wrap_width(width) ; gsub(/([^\n\r]{1,#{width}})/,"\\1\n") ; end
165
+
132
166
  end
@@ -10,7 +10,7 @@ module MiGA::DatasetResult
10
10
  def add_result_raw_reads(base)
11
11
  return nil unless result_files_exist?(base, ".1.fastq")
12
12
  r = MiGA::Result.new(base + ".json")
13
- add_files_to_ds_result(r, name,
13
+ r = add_files_to_ds_result(r, name,
14
14
  ( result_files_exist?(base, ".2.fastq") ?
15
15
  {:pair1=>".1.fastq", :pair2=>".2.fastq"} :
16
16
  {:single=>".1.fastq"} ))
@@ -62,6 +62,10 @@ module MiGA::DatasetResult
62
62
  r = MiGA::Result.new(base + ".json")
63
63
  r = add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
64
64
  :allcontigs=>".AllContigs.fna", :assembly_data=>""})
65
+ unless r.clean?
66
+ MiGA::MiGA.clean_fasta_file(r.file_path :largecontigs)
67
+ r.clean!
68
+ end
65
69
  add_result(:trimmed_fasta) #-> Post interposing
66
70
  r
67
71
  end
@@ -71,8 +75,14 @@ module MiGA::DatasetResult
71
75
  def add_result_cds(base)
72
76
  return nil unless result_files_exist?(base, %w[.faa .fna])
73
77
  r = MiGA::Result.new(base + ".json")
74
- add_files_to_ds_result(r, name, {:proteins=>".faa", :genes=>".fna",
78
+ r = add_files_to_ds_result(r, name, {:proteins=>".faa", :genes=>".fna",
75
79
  :gff2=>".gff2", :gff3=>".gff3", :tab=>".tab"})
80
+ unless r.clean?
81
+ MiGA::MiGA.clean_fasta_file(r.file_path :proteins)
82
+ MiGA::MiGA.clean_fasta_file(r.file_path :genes)
83
+ r.clean!
84
+ end
85
+ r
76
86
  end
77
87
 
78
88
  ##
@@ -80,7 +90,7 @@ module MiGA::DatasetResult
80
90
  def add_result_essential_genes(base)
81
91
  return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
82
92
  r = MiGA::Result.new(base + ".json")
83
- add_files_to_ds_result(r, name, {:ess_genes=>".ess.faa",
93
+ r = add_files_to_ds_result(r, name, {:ess_genes=>".ess.faa",
84
94
  :collection=>".ess", :report=>".ess/log"})
85
95
  end
86
96
 
@@ -90,8 +100,13 @@ module MiGA::DatasetResult
90
100
  return MiGA::Result.new(base + ".json") if result(:assembly).nil?
91
101
  return nil unless result_files_exist?(base, ".ssu.fa")
92
102
  r = MiGA::Result.new(base + ".json")
93
- add_files_to_ds_result(r, name, {:longest_ssu_gene=>".ssu.fa",
103
+ r = add_files_to_ds_result(r, name, {:longest_ssu_gene=>".ssu.fa",
94
104
  :gff=>".ssu.gff", :all_ssu_genes=>".ssu.all.fa"})
105
+ unless r.clean?
106
+ MiGA::MiGA.clean_fasta_file(r.file_path :longest_ssu_gene)
107
+ r.clean!
108
+ end
109
+ r
95
110
  end
96
111
 
97
112
  ##
@@ -103,7 +118,7 @@ module MiGA::DatasetResult
103
118
  add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :blast=>".blast",
104
119
  :mytaxain=>".mytaxain"})
105
120
  else
106
- MiGA::Result.new base + ".json"
121
+ MiGA::Result.new(base + ".json")
107
122
  end
108
123
  end
109
124
 
@@ -140,7 +155,7 @@ module MiGA::DatasetResult
140
155
  ##
141
156
  # Add result type +:stats+ at +base+.
142
157
  def add_result_stats(base)
143
- MiGA::Result.new(base + ".json")
158
+ MiGA::Result.new "#{base}.json"
144
159
  end
145
160
 
146
161
  ##
@@ -168,7 +183,7 @@ module MiGA::DatasetResult
168
183
  result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) or
169
184
  result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
170
185
  r = MiGA::Result.new(base + ".json")
171
- add_files_to_ds_result(r, name, {
186
+ r = add_files_to_ds_result(r, name, {
172
187
  :aai_medoids=>".aai-medoids.tsv",
173
188
  :haai_db=>".haai.db", :aai_db=>".aai.db",
174
189
  :ani_medoids=>".ani-medoids.tsv", :ani_db=>".ani.db"})
data/lib/miga/project.rb CHANGED
@@ -272,7 +272,7 @@ class MiGA::Project < MiGA::MiGA
272
272
  return MiGA::Result.load(base + ".json") unless save
273
273
  return nil unless result_files_exist?(base, ".done")
274
274
  r = send("add_result_#{name}", base)
275
- r.save
275
+ r.save unless r.nil?
276
276
  r
277
277
  end
278
278
 
@@ -20,8 +20,7 @@ module MiGA::ProjectResult
20
20
  end
21
21
 
22
22
  def add_result_clade_finding(base)
23
- return nil unless result_files_exist?(base,
24
- %w[.proposed-clades])
23
+ return nil unless result_files_exist?(base, %w[.proposed-clades])
25
24
  return nil unless is_clade? or result_files_exist?(base,
26
25
  %w[.pdf .classif .medoids .class.tsv .class.nwk])
27
26
  r = add_result_iter_clades(base)
data/lib/miga/result.rb CHANGED
@@ -41,6 +41,14 @@ class MiGA::Result < MiGA::MiGA
41
41
  @path = path
42
42
  MiGA::Result.exist?(path) ? self.load : create
43
43
  end
44
+
45
+ ##
46
+ # Is the result clean? Returns Boolean.
47
+ def clean? ; !! self[:clean] ; end
48
+
49
+ ##
50
+ # Register the result as cleaned.
51
+ def clean! ; self[:clean] = true ; end
44
52
 
45
53
  ##
46
54
  # Directory containing the result.
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.2, 4, 2]
13
+ VERSION = [0.2, 4, 3]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2017, 3, 24)
21
+ VERSION_DATE = Date.new(2017, 3, 28)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
data/test/project_test.rb CHANGED
@@ -75,4 +75,60 @@ class ProjectTest < Test::Unit::TestCase
75
75
  File.expand_path("metadata/#{d1.name}.json", p2.path)))
76
76
  end
77
77
 
78
+ def test_add_result
79
+ p1 = $p1
80
+ assert_nil(p1.add_result(:doom))
81
+ %w[.Rdata .log .txt .done].each do |x|
82
+ assert_nil(p1.add_result(:haai_distances))
83
+ FileUtils.touch(
84
+ File.expand_path("data/09.distances/01.haai/miga-project#{x}",p1.path))
85
+ end
86
+ assert_equal(MiGA::Result, p1.add_result(:haai_distances).class)
87
+ end
88
+
89
+ def test_preprocessing
90
+ p1 = $p1
91
+ assert(p1.done_preprocessing?)
92
+ d1 = p1.add_dataset("BAH")
93
+ assert(! p1.done_preprocessing?)
94
+ FileUtils.touch(File.expand_path("data/90.stats/#{d1.name}.done", p1.path))
95
+ assert(p1.done_preprocessing?)
96
+ # Distances
97
+ [:haai_distances, :aai_distances, :ani_distances].each do |r|
98
+ assert_equal(Symbol, p1.next_distances.class)
99
+ d = MiGA::Project.RESULT_DIRS[r]
100
+ %w[.done .Rdata .log .txt].each do |x|
101
+ assert_nil(p1.add_result(r),
102
+ "Premature registration of result #{r} at extension #{x}.")
103
+ FileUtils.touch(File.expand_path("data/#{d}/miga-project#{x}", p1.path))
104
+ end
105
+ assert_equal(MiGA::Result, p1.add_result(r).class,
106
+ "Imposible to add #{r} result.")
107
+ end
108
+ assert_equal(:clade_finding, p1.next_distances)
109
+
110
+ # Clades
111
+ assert_nil(p1.next_inclade)
112
+ p1.metadata[:type] = :clade
113
+ res = [
114
+ [:clade_finding,
115
+ %w[.pdf .classif .medoids .class.tsv .class.nwk .proposed-clades]],
116
+ [:subclades, %w[.pdf .classif .medoids .class.tsv .class.nwk]],
117
+ [:ogs, %w[.ogs .stats]]
118
+ ]
119
+ res.each do |rr|
120
+ (r, xs) = rr
121
+ d = MiGA::Project.RESULT_DIRS[r]
122
+ assert_equal(Symbol, p1.next_inclade.class)
123
+ ([".done"] + xs).each do |x|
124
+ assert_nil(p1.add_result(r),
125
+ "Premature registration of result #{r} at extension #{x}.")
126
+ FileUtils.touch(File.expand_path("data/#{d}/miga-project#{x}", p1.path))
127
+ end
128
+ assert_equal(MiGA::Result, p1.add_result(r).class,
129
+ "Impossible to add #{r} result.")
130
+ end
131
+ assert_nil(p1.next_inclade)
132
+ end
133
+
78
134
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4.2
4
+ version: 0.2.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-24 00:00:00.000000000 Z
11
+ date: 2017-03-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client