miga-base 0.2.6.5 → 0.3.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a69a12511e98d2cc6751efa31592661b664e9d33
4
- data.tar.gz: 807b0a13efce0367c88369a2f389fffff34ee58d
3
+ metadata.gz: 141b39aed7636b3f2389837e5fd13348a8e11252
4
+ data.tar.gz: b4bcec9bc8a8fd8c15c670b6f0fae0925d36138e
5
5
  SHA512:
6
- metadata.gz: bdc1db4adec179da57c45ab29a41ec36ef2895f34d8159792420560bd0bc15910052fe6e2949ee0a992500e0001a7db4d2bbbf4f4d5e1daefb4c1c0fa3bd4ebb
7
- data.tar.gz: 40d5a05ec2c154b50eb8952b87212106461080740e8986f982f44fa79cea3100d31beaff1c647afa83892b3e2cde37464ddd11568ce658addcb60992b6395de3
6
+ metadata.gz: c92ae5fb577d1e945d2a8be0f3d3dbe05aefd61094ee386f0bfb198237e83e487660419fa3cafb1cdc13aa25929ddd2022fab03530cac3ab4e927cb35481e82e
7
+ data.tar.gz: 6566cad346ff51d1e6b55c2e7bbaa3b5d766ae8292db0e829430237e2254dbb09a67d1ea63cdb9ea42365e47236b01b2af41e6e722aa153523bf39438d5e7ccb
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ require "miga/tax_dist"
7
+
8
+ o = {q:true, test:"both"}
9
+ OptionParser.new do |opt|
10
+ opt_banner(opt)
11
+ opt_object(opt, o, [:project, :dataset])
12
+ opt.on("-t", "--test STRING",
13
+ "Test to perform. Supported values: intax, novel, both."
14
+ ){ |v| o[:test]=v.downcase }
15
+ opt_common(opt, o)
16
+ end.parse!
17
+
18
+ ##=> Main <=
19
+ opt_require(o, project:"-P", dataset:"-D")
20
+
21
+ $stderr.puts "Loading project." unless o[:q]
22
+ p = MiGA::Project.load(o[:project])
23
+ raise "Impossible to load project: #{o[:project]}" if p.nil?
24
+
25
+ $stderr.puts "Loading dataset." unless o[:q]
26
+ ds = p.dataset(o[:dataset])
27
+
28
+ $stderr.puts "Finding closest relative." unless o[:q]
29
+ cr = ds.closest_relatives(1)
30
+
31
+ unless cr.empty?
32
+ $stderr.puts "Querying probability distributions." unless o[:q]
33
+ cr = cr[0]
34
+ puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
35
+ tax = p.dataset(cr[0]).metadata[:tax]
36
+ tax ||= {}
37
+
38
+ if %w[intax both].include? o[:test]
39
+ # Intax
40
+ r = MiGA::TaxDist.aai_pvalues(cr[1], :intax).map do |k,v|
41
+ sig = ""
42
+ [0.5,0.1,0.05,0.01].each{ |i| sig << "*" if v<i }
43
+ [MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || "?"), v, sig]
44
+ end
45
+ puts ""
46
+ puts "Taxonomic classification"
47
+ puts MiGA::MiGA.tabulate(%w[Rank Taxonomy P-value Signif.], r)
48
+ end
49
+
50
+ if %w[novel both].include? o[:test]
51
+ # Novel
52
+ r = MiGA::TaxDist.aai_pvalues(cr[1], :novel).map do |k,v|
53
+ sig = ""
54
+ [0.5,0.1,0.05,0.01].each{ |i| sig << "*" if v<i }
55
+ [MiGA::Taxonomy.LONG_RANKS[k], v, sig]
56
+ end
57
+ puts ""
58
+ puts "Taxonomic novelty"
59
+ puts MiGA::MiGA.tabulate(%w[Rank P-value Signif.], r)
60
+ end
61
+
62
+ puts ""
63
+ puts "Significance at p-value below: *0.5, **0.1, ***0.05, ****0.01."
64
+ end
65
+
66
+ $stderr.puts "Done." unless o[:q]
data/bin/miga CHANGED
@@ -34,6 +34,7 @@ $task_desc = {
34
34
  date: "Returns the current date in standard MiGA format.",
35
35
  # Taxonomy
36
36
  add_taxonomy: "Registers taxonomic information for datasets.",
37
+ test_taxonomy: "Returns test of taxonomic distributions for query datasets.",
37
38
  index_taxonomy: "Creates a taxonomy-indexed list of the datasets.",
38
39
  tax_distributions: "Estimates distributions of distance by taxonomy.",
39
40
  }
data/lib/miga/dataset.rb CHANGED
@@ -4,6 +4,7 @@
4
4
  require "miga/metadata"
5
5
  require "miga/result"
6
6
  require "miga/dataset_result"
7
+ require "sqlite3"
7
8
 
8
9
  ##
9
10
  # Dataset representation in MiGA.
@@ -135,28 +136,32 @@ class MiGA::Dataset < MiGA::MiGA
135
136
  # Get standard metadata values for the dataset as Array.
136
137
  def info
137
138
  MiGA::Dataset.INFO_FIELDS.map do |k|
138
- (k=="name") ? self.name : self.metadata[k.to_sym]
139
+ (k=="name") ? self.name : metadata[k.to_sym]
139
140
  end
140
141
  end
141
142
 
142
143
  ##
143
144
  # Is this dataset a reference?
144
- def is_ref? ; !!self.metadata[:ref] ; end
145
+ def is_ref? ; !!metadata[:ref] ; end
146
+
147
+ ##
148
+ # Is this dataset a query (non-reference)?
149
+ def is_query? ; !metadata[:ref] ; end
145
150
 
146
151
  ##
147
152
  # Is this dataset known to be multi-organism?
148
153
  def is_multi?
149
- return false if self.metadata[:type].nil? or
150
- @@KNOWN_TYPES[self.metadata[:type]].nil?
151
- @@KNOWN_TYPES[self.metadata[:type]][:multi]
154
+ return false if metadata[:type].nil? or
155
+ @@KNOWN_TYPES[type].nil?
156
+ @@KNOWN_TYPES[type][:multi]
152
157
  end
153
158
 
154
159
  ##
155
160
  # Is this dataset known to be single-organism?
156
161
  def is_nonmulti?
157
- return false if self.metadata[:type].nil? or
158
- @@KNOWN_TYPES[self.metadata[:type]].nil?
159
- !@@KNOWN_TYPES[self.metadata[:type]][:multi]
162
+ return false if metadata[:type].nil? or
163
+ @@KNOWN_TYPES[type].nil?
164
+ !@@KNOWN_TYPES[type][:multi]
160
165
  end
161
166
 
162
167
  ##
@@ -264,5 +269,20 @@ class MiGA::Dataset < MiGA::MiGA
264
269
  end
265
270
  adv
266
271
  end
272
+
273
+ ##
274
+ # Returns an Array of duples (Arrays) sorted by AAI:
275
+ # - +0+: A String with the name(s) of the reference dataset.
276
+ # - +1+: A Float with the AAI.
277
+ # This function is currently only supported for query datasets. It returns
278
+ # +nil+ if this analysis is not supported.
279
+ def closest_relatives(how_many=1)
280
+ return nil if is_ref? or project.is_multi?
281
+ r = result :distances
282
+ return nil if r.nil?
283
+ db = SQLite3::Database.new(r.file_path :aai_db)
284
+ db.execute("SELECT seq2, aai FROM aai WHERE seq2 != ? " +
285
+ "GROUP BY seq2 ORDER BY aai DESC LIMIT ?", [name, how_many])
286
+ end
267
287
 
268
288
  end # class MiGA::Dataset
data/lib/miga/project.rb CHANGED
@@ -162,6 +162,10 @@ class MiGA::Project < MiGA::MiGA
162
162
  ##
163
163
  # Is this a clade project?
164
164
  def is_clade? ; type==:clade ; end
165
+
166
+ ##
167
+ # Is this a project for multi-organism datasets?
168
+ def is_multi? ; @@KNOWN_TYPES[type][:multi] ; end
165
169
 
166
170
  ##
167
171
  # Returns Array of MiGA::Dataset.
@@ -55,6 +55,8 @@ module MiGA::ProjectResult
55
55
  r.add_file(:ogs, "miga-project.ogs")
56
56
  r.add_file(:stats, "miga-project.stats")
57
57
  r.add_file(:rbm, "miga-project.rbm")
58
+ r.add_file(:core_pan, "miga-project.core-pan.tsv")
59
+ r.add_file(:core_pan_plot, "miga-project.core-pan.pdf")
58
60
  r
59
61
  end
60
62
 
data/lib/miga/version.rb CHANGED
@@ -10,15 +10,15 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.2, 6, 5]
13
+ VERSION = [0.3, 0, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
17
- VERSION_NAME = "pochoir"
17
+ VERSION_NAME = "tinge"
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2017, 5, 22)
21
+ VERSION_DATE = Date.new(2017, 5, 29)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
data/scripts/ogs.bash CHANGED
@@ -11,30 +11,34 @@ cd "$PROJECT/data/10.clades/03.ogs"
11
11
  # Initialize
12
12
  miga date > "miga-project.start"
13
13
 
14
- echo -n "" > miga-project.log
15
14
  DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
16
-
17
- # Extract RBMs
18
- [[ -d miga-project.rbm ]] || mkdir miga-project.rbm
19
- for i in $DS ; do
20
- for j in $DS ; do
21
- file="miga-project.rbm/$i-$j.rbm"
22
- [[ -s $file ]] && continue
23
- echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
24
- "where seq1='$i' and seq2='$j' ;" \
25
- | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
26
- > "$file"
27
- [[ -s "$file" ]] || rm "$file"
15
+ if [[ ! -s miga-project.ogs ]] ; then
16
+ # Extract RBMs
17
+ [[ -d miga-project.rbm ]] || mkdir miga-project.rbm
18
+ echo -n "" > miga-project.log
19
+ for i in $DS ; do
20
+ for j in $DS ; do
21
+ file="miga-project.rbm/$i-$j.rbm"
22
+ [[ -s $file ]] && continue
23
+ echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
24
+ "where seq1='$i' and seq2='$j' ;" \
25
+ | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
26
+ > "$file"
27
+ [[ -s "$file" ]] || rm "$file"
28
+ done
29
+ echo "$i" >> miga-project.log
28
30
  done
29
- echo "$i" >> miga-project.log
30
- done
31
31
 
32
- # Estimate OGs
33
- ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t "$CORES"
34
- ogs.stats.rb -o miga-project.ogs -j miga-project.stats
32
+ # Estimate OGs and Clean RBMs
33
+ ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t "$CORES"
34
+ rm -rf miga-project.rbm
35
+ fi
35
36
 
36
- # Clean RBMs
37
- rm -rf miga-project.rbm
37
+ # Calculate Statistics
38
+ ogs.stats.rb -o miga-project.ogs -j miga-project.stats
39
+ ogs.core-pan.rb -o miga-project.ogs -s miga-project.core-pan.tsv -t "$CORES"
40
+ Rscript $MIGA/utils/core-pan-plot.R \
41
+ miga-project.core-pan.tsv miga-project.core-pan.pdf
38
42
 
39
43
  # Finalize
40
44
  miga date > "miga-project.done"
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env Rscript
2
+
3
+ argv <- commandArgs(trailingOnly=T)
4
+
5
+ plot_core_pan <- function(core_pan, pdf){
6
+ a <- read.table(core_pan, sep="\t", header=TRUE)
7
+ pdf(pdf, 7, 5)
8
+ plot(1, type="n", xlim=c(0, max(a$genomes)*1.05), xaxs="i", yaxs="i",
9
+ ylim=c(0, max(a$pan_q3)*1.05), xlab="Genomes", ylab="Orthologous Groups")
10
+ # Core
11
+ polygon(c(a$genomes, rev(a$genomes)), c(a$core_q1, rev(a$core_q3)),
12
+ border=NA, col=rgb(0, 121, 166, 128/2, max=255))
13
+ lines(a$genomes, a$core_avg, col=rgb(0,121,166,max=255), lty=2)
14
+ lines(a$genomes, a$core_q2, col=rgb(0,121,166,max=255), lty=1)
15
+ # Pan
16
+ polygon(c(a$genomes, rev(a$genomes)), c(a$pan_q1, rev(a$pan_q3)),
17
+ border=NA, col=rgb(96, 11, 64, 128/2, max=255))
18
+ lines(a$genomes, a$pan_avg, col=rgb(96,11,64,max=255), lty=2)
19
+ lines(a$genomes, a$pan_q2, col=rgb(96,11,64,max=255), lty=1)
20
+ # Legend
21
+ legend("topleft",
22
+ legend=c("pangenome","core genome","Inter-Quartile","Median","Average"),
23
+ pch=c(16,16,15,NA,NA),lty=c(NA,NA,NA,1,2), pt.cex=c(1,1,2,NA,NA),
24
+ col=c(rgb(96,11,64,max=255), rgb(0,121,166,max=255),
25
+ rgb(0.5,0.5,0.5,166/255), rep(rgb(0.5,0.5,0.5),2)), bty="n")
26
+ dev.off()
27
+ }
28
+
29
+ plot_core_pan(argv[1], argv[2])
metadata CHANGED
@@ -1,103 +1,103 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6.5
4
+ version: 0.3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-22 00:00:00.000000000 Z
11
+ date: 2017-05-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ~>
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.7'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.7'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: daemons
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ~>
32
32
  - !ruby/object:Gem::Version
33
33
  version: '1.2'
34
- - - ">="
34
+ - - '>='
35
35
  - !ruby/object:Gem::Version
36
36
  version: 1.2.4
37
37
  type: :runtime
38
38
  prerelease: false
39
39
  version_requirements: !ruby/object:Gem::Requirement
40
40
  requirements:
41
- - - "~>"
41
+ - - ~>
42
42
  - !ruby/object:Gem::Version
43
43
  version: '1.2'
44
- - - ">="
44
+ - - '>='
45
45
  - !ruby/object:Gem::Version
46
46
  version: 1.2.4
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: json
49
49
  requirement: !ruby/object:Gem::Requirement
50
50
  requirements:
51
- - - "~>"
51
+ - - ~>
52
52
  - !ruby/object:Gem::Version
53
53
  version: '1.8'
54
54
  type: :runtime
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  requirements:
58
- - - "~>"
58
+ - - ~>
59
59
  - !ruby/object:Gem::Version
60
60
  version: '1.8'
61
61
  - !ruby/object:Gem::Dependency
62
62
  name: sqlite3
63
63
  requirement: !ruby/object:Gem::Requirement
64
64
  requirements:
65
- - - "~>"
65
+ - - ~>
66
66
  - !ruby/object:Gem::Version
67
67
  version: '1.3'
68
68
  type: :runtime
69
69
  prerelease: false
70
70
  version_requirements: !ruby/object:Gem::Requirement
71
71
  requirements:
72
- - - "~>"
72
+ - - ~>
73
73
  - !ruby/object:Gem::Version
74
74
  version: '1.3'
75
75
  - !ruby/object:Gem::Dependency
76
76
  name: rake
77
77
  requirement: !ruby/object:Gem::Requirement
78
78
  requirements:
79
- - - "~>"
79
+ - - ~>
80
80
  - !ruby/object:Gem::Version
81
81
  version: '11'
82
82
  type: :development
83
83
  prerelease: false
84
84
  version_requirements: !ruby/object:Gem::Requirement
85
85
  requirements:
86
- - - "~>"
86
+ - - ~>
87
87
  - !ruby/object:Gem::Version
88
88
  version: '11'
89
89
  - !ruby/object:Gem::Dependency
90
90
  name: test-unit
91
91
  requirement: !ruby/object:Gem::Requirement
92
92
  requirements:
93
- - - "~>"
93
+ - - ~>
94
94
  - !ruby/object:Gem::Version
95
95
  version: '3'
96
96
  type: :development
97
97
  prerelease: false
98
98
  version_requirements: !ruby/object:Gem::Requirement
99
99
  requirements:
100
- - - "~>"
100
+ - - ~>
101
101
  - !ruby/object:Gem::Version
102
102
  version: '3'
103
103
  description: Microbial Genomes Atlas
@@ -108,32 +108,6 @@ extensions: []
108
108
  extra_rdoc_files:
109
109
  - README.md
110
110
  files:
111
- - Gemfile
112
- - LICENSE
113
- - README.md
114
- - Rakefile
115
- - actions/add_result.rb
116
- - actions/add_taxonomy.rb
117
- - actions/create_dataset.rb
118
- - actions/create_project.rb
119
- - actions/daemon.rb
120
- - actions/date.rb
121
- - actions/download_dataset.rb
122
- - actions/find_datasets.rb
123
- - actions/import_datasets.rb
124
- - actions/index_taxonomy.rb
125
- - actions/list_datasets.rb
126
- - actions/list_files.rb
127
- - actions/plugins.rb
128
- - actions/project_info.rb
129
- - actions/result_stats.rb
130
- - actions/run_local.rb
131
- - actions/tax_distributions.rb
132
- - actions/unlink_dataset.rb
133
- - bin/miga
134
- - lib/miga.rb
135
- - lib/miga/_data/aai-intax.tsv.gz
136
- - lib/miga/_data/aai-novel.tsv.gz
137
111
  - lib/miga/common.rb
138
112
  - lib/miga/daemon.rb
139
113
  - lib/miga/dataset.rb
@@ -147,6 +121,18 @@ files:
147
121
  - lib/miga/tax_index.rb
148
122
  - lib/miga/taxonomy.rb
149
123
  - lib/miga/version.rb
124
+ - lib/miga.rb
125
+ - test/common_test.rb
126
+ - test/daemon_test.rb
127
+ - test/dataset_test.rb
128
+ - test/metadata_test.rb
129
+ - test/project_test.rb
130
+ - test/remote_dataset_test.rb
131
+ - test/tax_index_test.rb
132
+ - test/taxonomy_test.rb
133
+ - test/test_helper.rb
134
+ - lib/miga/_data/aai-intax.tsv.gz
135
+ - lib/miga/_data/aai-novel.tsv.gz
150
136
  - scripts/_distances_functions.bash
151
137
  - scripts/_distances_noref_nomulti.bash
152
138
  - scripts/_distances_ref_nomulti.bash
@@ -170,16 +156,8 @@ files:
170
156
  - scripts/subclades.bash
171
157
  - scripts/trimmed_fasta.bash
172
158
  - scripts/trimmed_reads.bash
173
- - test/common_test.rb
174
- - test/daemon_test.rb
175
- - test/dataset_test.rb
176
- - test/metadata_test.rb
177
- - test/project_test.rb
178
- - test/remote_dataset_test.rb
179
- - test/tax_index_test.rb
180
- - test/taxonomy_test.rb
181
- - test/test_helper.rb
182
159
  - utils/adapters.fa
160
+ - utils/core-pan-plot.R
183
161
  - utils/index_metadata.rb
184
162
  - utils/mytaxa_scan.R
185
163
  - utils/mytaxa_scan.rb
@@ -189,6 +167,30 @@ files:
189
167
  - utils/subclades-compile.rb
190
168
  - utils/subclades-nj.R
191
169
  - utils/subclades.R
170
+ - bin/miga
171
+ - actions/add_result.rb
172
+ - actions/add_taxonomy.rb
173
+ - actions/create_dataset.rb
174
+ - actions/create_project.rb
175
+ - actions/daemon.rb
176
+ - actions/date.rb
177
+ - actions/download_dataset.rb
178
+ - actions/find_datasets.rb
179
+ - actions/import_datasets.rb
180
+ - actions/index_taxonomy.rb
181
+ - actions/list_datasets.rb
182
+ - actions/list_files.rb
183
+ - actions/plugins.rb
184
+ - actions/project_info.rb
185
+ - actions/result_stats.rb
186
+ - actions/run_local.rb
187
+ - actions/tax_distributions.rb
188
+ - actions/test_taxonomy.rb
189
+ - actions/unlink_dataset.rb
190
+ - Gemfile
191
+ - Rakefile
192
+ - README.md
193
+ - LICENSE
192
194
  homepage: http://enve-omics.ce.gatech.edu/miga
193
195
  licenses:
194
196
  - Artistic-2.0
@@ -197,25 +199,25 @@ post_install_message:
197
199
  rdoc_options:
198
200
  - lib
199
201
  - README.md
200
- - "--main"
202
+ - --main
201
203
  - README.md
202
- - "--title"
204
+ - --title
203
205
  - MiGA
204
206
  require_paths:
205
207
  - lib
206
208
  required_ruby_version: !ruby/object:Gem::Requirement
207
209
  requirements:
208
- - - ">="
210
+ - - '>='
209
211
  - !ruby/object:Gem::Version
210
212
  version: '1.9'
211
213
  required_rubygems_version: !ruby/object:Gem::Requirement
212
214
  requirements:
213
- - - ">="
215
+ - - '>='
214
216
  - !ruby/object:Gem::Version
215
217
  version: '0'
216
218
  requirements: []
217
219
  rubyforge_project:
218
- rubygems_version: 2.6.8
220
+ rubygems_version: 2.0.14
219
221
  signing_key:
220
222
  specification_version: 4
221
223
  summary: MiGA