miga-base 0.2.6.5 → 0.3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a69a12511e98d2cc6751efa31592661b664e9d33
4
- data.tar.gz: 807b0a13efce0367c88369a2f389fffff34ee58d
3
+ metadata.gz: 141b39aed7636b3f2389837e5fd13348a8e11252
4
+ data.tar.gz: b4bcec9bc8a8fd8c15c670b6f0fae0925d36138e
5
5
  SHA512:
6
- metadata.gz: bdc1db4adec179da57c45ab29a41ec36ef2895f34d8159792420560bd0bc15910052fe6e2949ee0a992500e0001a7db4d2bbbf4f4d5e1daefb4c1c0fa3bd4ebb
7
- data.tar.gz: 40d5a05ec2c154b50eb8952b87212106461080740e8986f982f44fa79cea3100d31beaff1c647afa83892b3e2cde37464ddd11568ce658addcb60992b6395de3
6
+ metadata.gz: c92ae5fb577d1e945d2a8be0f3d3dbe05aefd61094ee386f0bfb198237e83e487660419fa3cafb1cdc13aa25929ddd2022fab03530cac3ab4e927cb35481e82e
7
+ data.tar.gz: 6566cad346ff51d1e6b55c2e7bbaa3b5d766ae8292db0e829430237e2254dbb09a67d1ea63cdb9ea42365e47236b01b2af41e6e722aa153523bf39438d5e7ccb
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ require "miga/tax_dist"
7
+
8
+ o = {q:true, test:"both"}
9
+ OptionParser.new do |opt|
10
+ opt_banner(opt)
11
+ opt_object(opt, o, [:project, :dataset])
12
+ opt.on("-t", "--test STRING",
13
+ "Test to perform. Supported values: intax, novel, both."
14
+ ){ |v| o[:test]=v.downcase }
15
+ opt_common(opt, o)
16
+ end.parse!
17
+
18
+ ##=> Main <=
19
+ opt_require(o, project:"-P", dataset:"-D")
20
+
21
+ $stderr.puts "Loading project." unless o[:q]
22
+ p = MiGA::Project.load(o[:project])
23
+ raise "Impossible to load project: #{o[:project]}" if p.nil?
24
+
25
+ $stderr.puts "Loading dataset." unless o[:q]
26
+ ds = p.dataset(o[:dataset])
27
+
28
+ $stderr.puts "Finding closest relative." unless o[:q]
29
+ cr = ds.closest_relatives(1)
30
+
31
+ unless cr.empty?
32
+ $stderr.puts "Querying probability distributions." unless o[:q]
33
+ cr = cr[0]
34
+ puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
35
+ tax = p.dataset(cr[0]).metadata[:tax]
36
+ tax ||= {}
37
+
38
+ if %w[intax both].include? o[:test]
39
+ # Intax
40
+ r = MiGA::TaxDist.aai_pvalues(cr[1], :intax).map do |k,v|
41
+ sig = ""
42
+ [0.5,0.1,0.05,0.01].each{ |i| sig << "*" if v<i }
43
+ [MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || "?"), v, sig]
44
+ end
45
+ puts ""
46
+ puts "Taxonomic classification"
47
+ puts MiGA::MiGA.tabulate(%w[Rank Taxonomy P-value Signif.], r)
48
+ end
49
+
50
+ if %w[novel both].include? o[:test]
51
+ # Novel
52
+ r = MiGA::TaxDist.aai_pvalues(cr[1], :novel).map do |k,v|
53
+ sig = ""
54
+ [0.5,0.1,0.05,0.01].each{ |i| sig << "*" if v<i }
55
+ [MiGA::Taxonomy.LONG_RANKS[k], v, sig]
56
+ end
57
+ puts ""
58
+ puts "Taxonomic novelty"
59
+ puts MiGA::MiGA.tabulate(%w[Rank P-value Signif.], r)
60
+ end
61
+
62
+ puts ""
63
+ puts "Significance at p-value below: *0.5, **0.1, ***0.05, ****0.01."
64
+ end
65
+
66
+ $stderr.puts "Done." unless o[:q]
data/bin/miga CHANGED
@@ -34,6 +34,7 @@ $task_desc = {
34
34
  date: "Returns the current date in standard MiGA format.",
35
35
  # Taxonomy
36
36
  add_taxonomy: "Registers taxonomic information for datasets.",
37
+ test_taxonomy: "Returns test of taxonomic distributions for query datasets.",
37
38
  index_taxonomy: "Creates a taxonomy-indexed list of the datasets.",
38
39
  tax_distributions: "Estimates distributions of distance by taxonomy.",
39
40
  }
data/lib/miga/dataset.rb CHANGED
@@ -4,6 +4,7 @@
4
4
  require "miga/metadata"
5
5
  require "miga/result"
6
6
  require "miga/dataset_result"
7
+ require "sqlite3"
7
8
 
8
9
  ##
9
10
  # Dataset representation in MiGA.
@@ -135,28 +136,32 @@ class MiGA::Dataset < MiGA::MiGA
135
136
  # Get standard metadata values for the dataset as Array.
136
137
  def info
137
138
  MiGA::Dataset.INFO_FIELDS.map do |k|
138
- (k=="name") ? self.name : self.metadata[k.to_sym]
139
+ (k=="name") ? self.name : metadata[k.to_sym]
139
140
  end
140
141
  end
141
142
 
142
143
  ##
143
144
  # Is this dataset a reference?
144
- def is_ref? ; !!self.metadata[:ref] ; end
145
+ def is_ref? ; !!metadata[:ref] ; end
146
+
147
+ ##
148
+ # Is this dataset a query (non-reference)?
149
+ def is_query? ; !metadata[:ref] ; end
145
150
 
146
151
  ##
147
152
  # Is this dataset known to be multi-organism?
148
153
  def is_multi?
149
- return false if self.metadata[:type].nil? or
150
- @@KNOWN_TYPES[self.metadata[:type]].nil?
151
- @@KNOWN_TYPES[self.metadata[:type]][:multi]
154
+ return false if metadata[:type].nil? or
155
+ @@KNOWN_TYPES[type].nil?
156
+ @@KNOWN_TYPES[type][:multi]
152
157
  end
153
158
 
154
159
  ##
155
160
  # Is this dataset known to be single-organism?
156
161
  def is_nonmulti?
157
- return false if self.metadata[:type].nil? or
158
- @@KNOWN_TYPES[self.metadata[:type]].nil?
159
- !@@KNOWN_TYPES[self.metadata[:type]][:multi]
162
+ return false if metadata[:type].nil? or
163
+ @@KNOWN_TYPES[type].nil?
164
+ !@@KNOWN_TYPES[type][:multi]
160
165
  end
161
166
 
162
167
  ##
@@ -264,5 +269,20 @@ class MiGA::Dataset < MiGA::MiGA
264
269
  end
265
270
  adv
266
271
  end
272
+
273
+ ##
274
+ # Returns an Array of duples (Arrays) sorted by AAI:
275
+ # - +0+: A String with the name(s) of the reference dataset.
276
+ # - +1+: A Float with the AAI.
277
+ # This function is currently only supported for query datasets. It returns
278
+ # +nil+ if this analysis is not supported.
279
+ def closest_relatives(how_many=1)
280
+ return nil if is_ref? or project.is_multi?
281
+ r = result :distances
282
+ return nil if r.nil?
283
+ db = SQLite3::Database.new(r.file_path :aai_db)
284
+ db.execute("SELECT seq2, aai FROM aai WHERE seq2 != ? " +
285
+ "GROUP BY seq2 ORDER BY aai DESC LIMIT ?", [name, how_many])
286
+ end
267
287
 
268
288
  end # class MiGA::Dataset
data/lib/miga/project.rb CHANGED
@@ -162,6 +162,10 @@ class MiGA::Project < MiGA::MiGA
162
162
  ##
163
163
  # Is this a clade project?
164
164
  def is_clade? ; type==:clade ; end
165
+
166
+ ##
167
+ # Is this a project for multi-organism datasets?
168
+ def is_multi? ; @@KNOWN_TYPES[type][:multi] ; end
165
169
 
166
170
  ##
167
171
  # Returns Array of MiGA::Dataset.
@@ -55,6 +55,8 @@ module MiGA::ProjectResult
55
55
  r.add_file(:ogs, "miga-project.ogs")
56
56
  r.add_file(:stats, "miga-project.stats")
57
57
  r.add_file(:rbm, "miga-project.rbm")
58
+ r.add_file(:core_pan, "miga-project.core-pan.tsv")
59
+ r.add_file(:core_pan_plot, "miga-project.core-pan.pdf")
58
60
  r
59
61
  end
60
62
 
data/lib/miga/version.rb CHANGED
@@ -10,15 +10,15 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.2, 6, 5]
13
+ VERSION = [0.3, 0, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
17
- VERSION_NAME = "pochoir"
17
+ VERSION_NAME = "tinge"
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2017, 5, 22)
21
+ VERSION_DATE = Date.new(2017, 5, 29)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
data/scripts/ogs.bash CHANGED
@@ -11,30 +11,34 @@ cd "$PROJECT/data/10.clades/03.ogs"
11
11
  # Initialize
12
12
  miga date > "miga-project.start"
13
13
 
14
- echo -n "" > miga-project.log
15
14
  DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
16
-
17
- # Extract RBMs
18
- [[ -d miga-project.rbm ]] || mkdir miga-project.rbm
19
- for i in $DS ; do
20
- for j in $DS ; do
21
- file="miga-project.rbm/$i-$j.rbm"
22
- [[ -s $file ]] && continue
23
- echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
24
- "where seq1='$i' and seq2='$j' ;" \
25
- | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
26
- > "$file"
27
- [[ -s "$file" ]] || rm "$file"
15
+ if [[ ! -s miga-project.ogs ]] ; then
16
+ # Extract RBMs
17
+ [[ -d miga-project.rbm ]] || mkdir miga-project.rbm
18
+ echo -n "" > miga-project.log
19
+ for i in $DS ; do
20
+ for j in $DS ; do
21
+ file="miga-project.rbm/$i-$j.rbm"
22
+ [[ -s $file ]] && continue
23
+ echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
24
+ "where seq1='$i' and seq2='$j' ;" \
25
+ | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
26
+ > "$file"
27
+ [[ -s "$file" ]] || rm "$file"
28
+ done
29
+ echo "$i" >> miga-project.log
28
30
  done
29
- echo "$i" >> miga-project.log
30
- done
31
31
 
32
- # Estimate OGs
33
- ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t "$CORES"
34
- ogs.stats.rb -o miga-project.ogs -j miga-project.stats
32
+ # Estimate OGs and Clean RBMs
33
+ ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t "$CORES"
34
+ rm -rf miga-project.rbm
35
+ fi
35
36
 
36
- # Clean RBMs
37
- rm -rf miga-project.rbm
37
+ # Calculate Statistics
38
+ ogs.stats.rb -o miga-project.ogs -j miga-project.stats
39
+ ogs.core-pan.rb -o miga-project.ogs -s miga-project.core-pan.tsv -t "$CORES"
40
+ Rscript $MIGA/utils/core-pan-plot.R \
41
+ miga-project.core-pan.tsv miga-project.core-pan.pdf
38
42
 
39
43
  # Finalize
40
44
  miga date > "miga-project.done"
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env Rscript
2
+
3
+ argv <- commandArgs(trailingOnly=T)
4
+
5
+ plot_core_pan <- function(core_pan, pdf){
6
+ a <- read.table(core_pan, sep="\t", header=TRUE)
7
+ pdf(pdf, 7, 5)
8
+ plot(1, type="n", xlim=c(0, max(a$genomes)*1.05), xaxs="i", yaxs="i",
9
+ ylim=c(0, max(a$pan_q3)*1.05), xlab="Genomes", ylab="Orthologous Groups")
10
+ # Core
11
+ polygon(c(a$genomes, rev(a$genomes)), c(a$core_q1, rev(a$core_q3)),
12
+ border=NA, col=rgb(0, 121, 166, 128/2, max=255))
13
+ lines(a$genomes, a$core_avg, col=rgb(0,121,166,max=255), lty=2)
14
+ lines(a$genomes, a$core_q2, col=rgb(0,121,166,max=255), lty=1)
15
+ # Pan
16
+ polygon(c(a$genomes, rev(a$genomes)), c(a$pan_q1, rev(a$pan_q3)),
17
+ border=NA, col=rgb(96, 11, 64, 128/2, max=255))
18
+ lines(a$genomes, a$pan_avg, col=rgb(96,11,64,max=255), lty=2)
19
+ lines(a$genomes, a$pan_q2, col=rgb(96,11,64,max=255), lty=1)
20
+ # Legend
21
+ legend("topleft",
22
+ legend=c("pangenome","core genome","Inter-Quartile","Median","Average"),
23
+ pch=c(16,16,15,NA,NA),lty=c(NA,NA,NA,1,2), pt.cex=c(1,1,2,NA,NA),
24
+ col=c(rgb(96,11,64,max=255), rgb(0,121,166,max=255),
25
+ rgb(0.5,0.5,0.5,166/255), rep(rgb(0.5,0.5,0.5),2)), bty="n")
26
+ dev.off()
27
+ }
28
+
29
+ plot_core_pan(argv[1], argv[2])
metadata CHANGED
@@ -1,103 +1,103 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6.5
4
+ version: 0.3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-22 00:00:00.000000000 Z
11
+ date: 2017-05-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ~>
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.7'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.7'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: daemons
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ~>
32
32
  - !ruby/object:Gem::Version
33
33
  version: '1.2'
34
- - - ">="
34
+ - - '>='
35
35
  - !ruby/object:Gem::Version
36
36
  version: 1.2.4
37
37
  type: :runtime
38
38
  prerelease: false
39
39
  version_requirements: !ruby/object:Gem::Requirement
40
40
  requirements:
41
- - - "~>"
41
+ - - ~>
42
42
  - !ruby/object:Gem::Version
43
43
  version: '1.2'
44
- - - ">="
44
+ - - '>='
45
45
  - !ruby/object:Gem::Version
46
46
  version: 1.2.4
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: json
49
49
  requirement: !ruby/object:Gem::Requirement
50
50
  requirements:
51
- - - "~>"
51
+ - - ~>
52
52
  - !ruby/object:Gem::Version
53
53
  version: '1.8'
54
54
  type: :runtime
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  requirements:
58
- - - "~>"
58
+ - - ~>
59
59
  - !ruby/object:Gem::Version
60
60
  version: '1.8'
61
61
  - !ruby/object:Gem::Dependency
62
62
  name: sqlite3
63
63
  requirement: !ruby/object:Gem::Requirement
64
64
  requirements:
65
- - - "~>"
65
+ - - ~>
66
66
  - !ruby/object:Gem::Version
67
67
  version: '1.3'
68
68
  type: :runtime
69
69
  prerelease: false
70
70
  version_requirements: !ruby/object:Gem::Requirement
71
71
  requirements:
72
- - - "~>"
72
+ - - ~>
73
73
  - !ruby/object:Gem::Version
74
74
  version: '1.3'
75
75
  - !ruby/object:Gem::Dependency
76
76
  name: rake
77
77
  requirement: !ruby/object:Gem::Requirement
78
78
  requirements:
79
- - - "~>"
79
+ - - ~>
80
80
  - !ruby/object:Gem::Version
81
81
  version: '11'
82
82
  type: :development
83
83
  prerelease: false
84
84
  version_requirements: !ruby/object:Gem::Requirement
85
85
  requirements:
86
- - - "~>"
86
+ - - ~>
87
87
  - !ruby/object:Gem::Version
88
88
  version: '11'
89
89
  - !ruby/object:Gem::Dependency
90
90
  name: test-unit
91
91
  requirement: !ruby/object:Gem::Requirement
92
92
  requirements:
93
- - - "~>"
93
+ - - ~>
94
94
  - !ruby/object:Gem::Version
95
95
  version: '3'
96
96
  type: :development
97
97
  prerelease: false
98
98
  version_requirements: !ruby/object:Gem::Requirement
99
99
  requirements:
100
- - - "~>"
100
+ - - ~>
101
101
  - !ruby/object:Gem::Version
102
102
  version: '3'
103
103
  description: Microbial Genomes Atlas
@@ -108,32 +108,6 @@ extensions: []
108
108
  extra_rdoc_files:
109
109
  - README.md
110
110
  files:
111
- - Gemfile
112
- - LICENSE
113
- - README.md
114
- - Rakefile
115
- - actions/add_result.rb
116
- - actions/add_taxonomy.rb
117
- - actions/create_dataset.rb
118
- - actions/create_project.rb
119
- - actions/daemon.rb
120
- - actions/date.rb
121
- - actions/download_dataset.rb
122
- - actions/find_datasets.rb
123
- - actions/import_datasets.rb
124
- - actions/index_taxonomy.rb
125
- - actions/list_datasets.rb
126
- - actions/list_files.rb
127
- - actions/plugins.rb
128
- - actions/project_info.rb
129
- - actions/result_stats.rb
130
- - actions/run_local.rb
131
- - actions/tax_distributions.rb
132
- - actions/unlink_dataset.rb
133
- - bin/miga
134
- - lib/miga.rb
135
- - lib/miga/_data/aai-intax.tsv.gz
136
- - lib/miga/_data/aai-novel.tsv.gz
137
111
  - lib/miga/common.rb
138
112
  - lib/miga/daemon.rb
139
113
  - lib/miga/dataset.rb
@@ -147,6 +121,18 @@ files:
147
121
  - lib/miga/tax_index.rb
148
122
  - lib/miga/taxonomy.rb
149
123
  - lib/miga/version.rb
124
+ - lib/miga.rb
125
+ - test/common_test.rb
126
+ - test/daemon_test.rb
127
+ - test/dataset_test.rb
128
+ - test/metadata_test.rb
129
+ - test/project_test.rb
130
+ - test/remote_dataset_test.rb
131
+ - test/tax_index_test.rb
132
+ - test/taxonomy_test.rb
133
+ - test/test_helper.rb
134
+ - lib/miga/_data/aai-intax.tsv.gz
135
+ - lib/miga/_data/aai-novel.tsv.gz
150
136
  - scripts/_distances_functions.bash
151
137
  - scripts/_distances_noref_nomulti.bash
152
138
  - scripts/_distances_ref_nomulti.bash
@@ -170,16 +156,8 @@ files:
170
156
  - scripts/subclades.bash
171
157
  - scripts/trimmed_fasta.bash
172
158
  - scripts/trimmed_reads.bash
173
- - test/common_test.rb
174
- - test/daemon_test.rb
175
- - test/dataset_test.rb
176
- - test/metadata_test.rb
177
- - test/project_test.rb
178
- - test/remote_dataset_test.rb
179
- - test/tax_index_test.rb
180
- - test/taxonomy_test.rb
181
- - test/test_helper.rb
182
159
  - utils/adapters.fa
160
+ - utils/core-pan-plot.R
183
161
  - utils/index_metadata.rb
184
162
  - utils/mytaxa_scan.R
185
163
  - utils/mytaxa_scan.rb
@@ -189,6 +167,30 @@ files:
189
167
  - utils/subclades-compile.rb
190
168
  - utils/subclades-nj.R
191
169
  - utils/subclades.R
170
+ - bin/miga
171
+ - actions/add_result.rb
172
+ - actions/add_taxonomy.rb
173
+ - actions/create_dataset.rb
174
+ - actions/create_project.rb
175
+ - actions/daemon.rb
176
+ - actions/date.rb
177
+ - actions/download_dataset.rb
178
+ - actions/find_datasets.rb
179
+ - actions/import_datasets.rb
180
+ - actions/index_taxonomy.rb
181
+ - actions/list_datasets.rb
182
+ - actions/list_files.rb
183
+ - actions/plugins.rb
184
+ - actions/project_info.rb
185
+ - actions/result_stats.rb
186
+ - actions/run_local.rb
187
+ - actions/tax_distributions.rb
188
+ - actions/test_taxonomy.rb
189
+ - actions/unlink_dataset.rb
190
+ - Gemfile
191
+ - Rakefile
192
+ - README.md
193
+ - LICENSE
192
194
  homepage: http://enve-omics.ce.gatech.edu/miga
193
195
  licenses:
194
196
  - Artistic-2.0
@@ -197,25 +199,25 @@ post_install_message:
197
199
  rdoc_options:
198
200
  - lib
199
201
  - README.md
200
- - "--main"
202
+ - --main
201
203
  - README.md
202
- - "--title"
204
+ - --title
203
205
  - MiGA
204
206
  require_paths:
205
207
  - lib
206
208
  required_ruby_version: !ruby/object:Gem::Requirement
207
209
  requirements:
208
- - - ">="
210
+ - - '>='
209
211
  - !ruby/object:Gem::Version
210
212
  version: '1.9'
211
213
  required_rubygems_version: !ruby/object:Gem::Requirement
212
214
  requirements:
213
- - - ">="
215
+ - - '>='
214
216
  - !ruby/object:Gem::Version
215
217
  version: '0'
216
218
  requirements: []
217
219
  rubyforge_project:
218
- rubygems_version: 2.6.8
220
+ rubygems_version: 2.0.14
219
221
  signing_key:
220
222
  specification_version: 4
221
223
  summary: MiGA