miga-base 0.2.4.3 → 0.2.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fdd460c0009e55ffc82547e7d31f94789a874442
4
- data.tar.gz: c3e560fb2e8871ac586ef8b1fd64c2b1b2076471
3
+ metadata.gz: 367ffd0c85c1ad5bdbc44f4c88677fac5268c2e2
4
+ data.tar.gz: eb107b14279ea252e4287d40efa73a562d890451
5
5
  SHA512:
6
- metadata.gz: c77bb9fc35f046a2ebc1c1ddc457f19c4accafe06958b43d42a3654f91a5c19dcf92cc3624f9b30b950542d736787dc69f2e8e9ba88141d20923823da0812f46
7
- data.tar.gz: ecee7c4611a74fd16115260e706ea19315ccd639070482010f0bf5c4f669fe30fe9dea755b912053136366831f715cf346f2a32ed38dbee1ea69442525f9e080
6
+ metadata.gz: 7a2bf0f148315cd6cc0fd57e5ff48a6f88cb5be7ebcaeea4e3b6a0b820cc71fca1165f98287fcd94ffd5b330e3e352281c9d517d66b2df3219a4af067854c0e4
7
+ data.tar.gz: c8d884b0c56d075179c38470e4d0a34316ad14706c9a88ac171fc0230811e863090ce3a7ab0330c5c871d6754f15c01cae6b177cb6cf68b13153c3979934ff2a
@@ -26,6 +26,7 @@ p = MiGA::Project.load(o[:project])
26
26
  raise "Impossible to load project: #{o[:project]}" if p.nil?
27
27
 
28
28
  $stderr.puts "Loading result." unless o[:q]
29
+ d = nil
29
30
  if o[:dataset].nil?
30
31
  r = p.add_result(o[:name], false)
31
32
  else
@@ -77,6 +78,8 @@ if o[:compute]
77
78
  end
78
79
  end
79
80
  stats[:quality] = stats[:completeness][0] - stats[:contamination][0]*5
81
+ when :distances
82
+ d.cleanup_distances! unless d.nil?
80
83
  else
81
84
  stats = nil
82
85
  end
data/lib/miga/dataset.rb CHANGED
@@ -198,6 +198,11 @@ class MiGA::Dataset < MiGA::MiGA
198
198
  r
199
199
  end
200
200
 
201
+ ##
202
+ # Gets a result as MiGA::Result for the datasets with +result_type+. This is
203
+ # equivalent to +add_result(result_type, false)+.
204
+ def get_result(result_type) ; add_result(result_type, false) ; end
205
+
201
206
  ##
202
207
  # Returns the key symbol of the first registered result (sorted by the
203
208
  # execution order). This typically corresponds to the result used as the
@@ -1,8 +1,29 @@
1
1
 
2
+ require "sqlite3"
3
+
2
4
  ##
3
5
  # Helper module including specific functions to add dataset results.
4
6
  module MiGA::DatasetResult
5
7
 
8
+ ##
9
+ # Clean-up all the stored distances, removing values for datasets no longer in
10
+ # the project as reference datasets.
11
+ def cleanup_distances!
12
+ r = get_result(:distances)
13
+ return if r.nil?
14
+ [:haai_db, :aai_db, :ani_db].each do |db_type|
15
+ db = r.file_path(db_type)
16
+ next if db.nil? or not File.size? db
17
+ sqlite_db = SQLite3::Database.new db
18
+ table = db_type[-6..-4]
19
+ val = sqlite_db.execute "select seq2 from #{table}"
20
+ next if val.empty?
21
+ (val.map{ |i| i.first } - project.dataset_names).each do |extra|
22
+ sqlite_db.execute "delete from #{table} where seq2=?", extra
23
+ end
24
+ end
25
+ end
26
+
6
27
  private
7
28
 
8
29
  ##
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.2, 4, 3]
13
+ VERSION = [0.2, 5, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2017, 3, 28)
21
+ VERSION_DATE = Date.new(2017, 3, 29)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
data/scripts/stats.bash CHANGED
@@ -13,7 +13,7 @@ cd "$DIR"
13
13
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
14
14
 
15
15
  # Calculate statistics
16
- for i in raw_reads trimmed_fasta assembly cds essential_genes ; do
16
+ for i in raw_reads trimmed_fasta assembly cds essential_genes distances ; do
17
17
  echo "# $i"
18
18
  miga result_stats --compute-and-save -P "$PROJECT" -D "$DATASET" -r $i
19
19
  done
metadata CHANGED
@@ -1,83 +1,103 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4.3
4
+ version: 0.2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-28 00:00:00.000000000 Z
11
+ date: 2017-03-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.7'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.7'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: daemons
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.2'
34
+ - - ">="
32
35
  - !ruby/object:Gem::Version
33
36
  version: 1.2.4
34
37
  type: :runtime
35
38
  prerelease: false
36
39
  version_requirements: !ruby/object:Gem::Requirement
37
40
  requirements:
38
- - - ~>
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: '1.2'
44
+ - - ">="
39
45
  - !ruby/object:Gem::Version
40
46
  version: 1.2.4
41
47
  - !ruby/object:Gem::Dependency
42
48
  name: json
43
49
  requirement: !ruby/object:Gem::Requirement
44
50
  requirements:
45
- - - ~>
51
+ - - "~>"
46
52
  - !ruby/object:Gem::Version
47
53
  version: '1.8'
48
54
  type: :runtime
49
55
  prerelease: false
50
56
  version_requirements: !ruby/object:Gem::Requirement
51
57
  requirements:
52
- - - ~>
58
+ - - "~>"
53
59
  - !ruby/object:Gem::Version
54
60
  version: '1.8'
61
+ - !ruby/object:Gem::Dependency
62
+ name: sqlite3
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '1.3'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.3'
55
75
  - !ruby/object:Gem::Dependency
56
76
  name: rake
57
77
  requirement: !ruby/object:Gem::Requirement
58
78
  requirements:
59
- - - ~>
79
+ - - "~>"
60
80
  - !ruby/object:Gem::Version
61
81
  version: '11'
62
82
  type: :development
63
83
  prerelease: false
64
84
  version_requirements: !ruby/object:Gem::Requirement
65
85
  requirements:
66
- - - ~>
86
+ - - "~>"
67
87
  - !ruby/object:Gem::Version
68
88
  version: '11'
69
89
  - !ruby/object:Gem::Dependency
70
90
  name: test-unit
71
91
  requirement: !ruby/object:Gem::Requirement
72
92
  requirements:
73
- - - ~>
93
+ - - "~>"
74
94
  - !ruby/object:Gem::Version
75
95
  version: '3'
76
96
  type: :development
77
97
  prerelease: false
78
98
  version_requirements: !ruby/object:Gem::Requirement
79
99
  requirements:
80
- - - ~>
100
+ - - "~>"
81
101
  - !ruby/object:Gem::Version
82
102
  version: '3'
83
103
  description: Microbial Genomes Atlas
@@ -88,6 +108,31 @@ extensions: []
88
108
  extra_rdoc_files:
89
109
  - README.md
90
110
  files:
111
+ - Gemfile
112
+ - LICENSE
113
+ - README.md
114
+ - Rakefile
115
+ - actions/add_result.rb
116
+ - actions/add_taxonomy.rb
117
+ - actions/create_dataset.rb
118
+ - actions/create_project.rb
119
+ - actions/daemon.rb
120
+ - actions/date.rb
121
+ - actions/download_dataset.rb
122
+ - actions/find_datasets.rb
123
+ - actions/import_datasets.rb
124
+ - actions/index_taxonomy.rb
125
+ - actions/list_datasets.rb
126
+ - actions/list_files.rb
127
+ - actions/plugins.rb
128
+ - actions/project_info.rb
129
+ - actions/result_stats.rb
130
+ - actions/tax_distributions.rb
131
+ - actions/unlink_dataset.rb
132
+ - bin/miga
133
+ - lib/miga.rb
134
+ - lib/miga/_data/aai-intax.tsv.gz
135
+ - lib/miga/_data/aai-novel.tsv.gz
91
136
  - lib/miga/common.rb
92
137
  - lib/miga/daemon.rb
93
138
  - lib/miga/dataset.rb
@@ -101,18 +146,6 @@ files:
101
146
  - lib/miga/tax_index.rb
102
147
  - lib/miga/taxonomy.rb
103
148
  - lib/miga/version.rb
104
- - lib/miga.rb
105
- - test/common_test.rb
106
- - test/daemon_test.rb
107
- - test/dataset_test.rb
108
- - test/metadata_test.rb
109
- - test/project_test.rb
110
- - test/remote_dataset_test.rb
111
- - test/tax_index_test.rb
112
- - test/taxonomy_test.rb
113
- - test/test_helper.rb
114
- - lib/miga/_data/aai-intax.tsv.gz
115
- - lib/miga/_data/aai-novel.tsv.gz
116
149
  - scripts/_distances_functions.bash
117
150
  - scripts/_distances_noref_nomulti.bash
118
151
  - scripts/_distances_ref_nomulti.bash
@@ -135,6 +168,15 @@ files:
135
168
  - scripts/subclades.bash
136
169
  - scripts/trimmed_fasta.bash
137
170
  - scripts/trimmed_reads.bash
171
+ - test/common_test.rb
172
+ - test/daemon_test.rb
173
+ - test/dataset_test.rb
174
+ - test/metadata_test.rb
175
+ - test/project_test.rb
176
+ - test/remote_dataset_test.rb
177
+ - test/tax_index_test.rb
178
+ - test/taxonomy_test.rb
179
+ - test/test_helper.rb
138
180
  - utils/adapters.fa
139
181
  - utils/mytaxa_scan.R
140
182
  - utils/mytaxa_scan.rb
@@ -142,30 +184,7 @@ files:
142
184
  - utils/requirements.txt
143
185
  - utils/subclades-compile.rb
144
186
  - utils/subclades-nj.R
145
- - utils/subclades-pam.R
146
187
  - utils/subclades.R
147
- - bin/miga
148
- - actions/add_result.rb
149
- - actions/add_taxonomy.rb
150
- - actions/create_dataset.rb
151
- - actions/create_project.rb
152
- - actions/daemon.rb
153
- - actions/date.rb
154
- - actions/download_dataset.rb
155
- - actions/find_datasets.rb
156
- - actions/import_datasets.rb
157
- - actions/index_taxonomy.rb
158
- - actions/list_datasets.rb
159
- - actions/list_files.rb
160
- - actions/plugins.rb
161
- - actions/project_info.rb
162
- - actions/result_stats.rb
163
- - actions/tax_distributions.rb
164
- - actions/unlink_dataset.rb
165
- - Gemfile
166
- - Rakefile
167
- - README.md
168
- - LICENSE
169
188
  homepage: http://enve-omics.ce.gatech.edu/miga
170
189
  licenses:
171
190
  - Artistic-2.0
@@ -174,25 +193,25 @@ post_install_message:
174
193
  rdoc_options:
175
194
  - lib
176
195
  - README.md
177
- - --main
196
+ - "--main"
178
197
  - README.md
179
- - --title
198
+ - "--title"
180
199
  - MiGA
181
200
  require_paths:
182
201
  - lib
183
202
  required_ruby_version: !ruby/object:Gem::Requirement
184
203
  requirements:
185
- - - '>='
204
+ - - ">="
186
205
  - !ruby/object:Gem::Version
187
206
  version: '1.9'
188
207
  required_rubygems_version: !ruby/object:Gem::Requirement
189
208
  requirements:
190
- - - '>='
209
+ - - ">="
191
210
  - !ruby/object:Gem::Version
192
211
  version: '0'
193
212
  requirements: []
194
213
  rubyforge_project:
195
- rubygems_version: 2.0.14
214
+ rubygems_version: 2.6.8
196
215
  signing_key:
197
216
  specification_version: 4
198
217
  summary: MiGA
@@ -1,186 +0,0 @@
1
- #!/usr/bin/env Rscript
2
- #
3
- # @package MiGA
4
- # @license Artistic-2.0
5
- #
6
-
7
- #= Load stuff
8
- argv <- commandArgs(trailingOnly=T)
9
- suppressPackageStartupMessages(library(ape))
10
- suppressPackageStartupMessages(library(vegan))
11
- suppressPackageStartupMessages(library(cluster))
12
- suppressPackageStartupMessages(library(parallel))
13
- suppressPackageStartupMessages(library(enveomics.R))
14
-
15
- #= Main function
16
- subclades <- function(ani_file, out_base, thr=1, ani=c()) {
17
- say("==> Out base:", out_base, "<==")
18
-
19
- # Input arguments
20
- if(missing(ani_file)){
21
- a <- as.data.frame(ani)
22
- }else{
23
- a <- read.table(gzfile(ani_file), sep="\t", header=TRUE, as.is=TRUE)
24
- }
25
- if(nrow(a)==0){
26
- generate_empty_files(out_base)
27
- return(NULL)
28
- }
29
-
30
- # Get ANI distances
31
- say("Distances")
32
- a$d <- 1-a$value/100
33
- ani.d <- enve.df2dist(data.frame(a$a, a$b, a$d), default.d=max(a$d)*1.2)
34
- ani.ph <- bionj(ani.d)
35
- express.ori <- options('expressions')$expressions
36
- if(express.ori < ani.ph$Nnode*4){
37
- options(expressions=min(c(5e7,ani.ph$Nnode*4)))
38
- }
39
- write.tree(ani.ph, paste(out_base, ".nwk", sep=""))
40
- options(expressions=express.ori)
41
-
42
- # Silhouette
43
- say("Silhouette")
44
- k <- 2:min(length(labels(ani.d))-1, 100)
45
- cl <- makeCluster(thr)
46
- s <- parSapply(cl, k, function(x) {
47
- library(cluster)
48
- s <- pam(ani.d, x, do.swap=FALSE, pamonce=1)$silinfo
49
- c(s$avg.width, -sum(ifelse(s$widths[,3]>0,0,s$widths[,3])))
50
- })
51
- stopCluster(cl)
52
- s.avg.z <- (s[1,]-mean(s[1,]))/(sd(s[1,])+0.0001)
53
- s.neg.z <- (s[2,]-mean(s[2,]))/(sd(s[2,])+0.01)
54
- ds <- s.avg.z - s.neg.z - 2/(1:length(k)) - (1:length(k))/50
55
- top.n <- k[which.max(ds)]
56
-
57
- # Classify genomes
58
- say("Classify => k :", top.n, "| n :", length(labels(ani.d)))
59
- ani.cl <- pam(ani.d, top.n, pamonce=1)
60
- ani.types <- ani.cl$clustering
61
- ani.medoids <- ani.cl$medoids
62
-
63
- # Generate graphic report
64
- say("Graphic report")
65
- pdf(paste(out_base, ".pdf", sep=""), 7, 12)
66
- layout(matrix(c(1,1,2,2,3,3,4,5),byrow=TRUE, ncol=2))
67
- plot_distances(ani.d)
68
- plot_silhouette(k, s[1,], s[2,], ds, top.n)
69
- plot_clustering(ani.cl, ani.d, ani.types)
70
- plot_tree(ani.ph, ani.types, ani.medoids)
71
- dev.off()
72
-
73
- # Save results
74
- say("Text report")
75
- write.table(ani.medoids, paste(out_base, "medoids", sep="."),
76
- quote=FALSE, col.names=FALSE, row.names=FALSE)
77
- save(ani.d, file=paste(out_base, "dist.rdata", sep="."))
78
- classif <- cbind(names(ani.types), ani.types, ani.medoids[ ani.types ], NA)
79
- ani.d.m <- 100 - as.matrix(ani.d)*100
80
- for(j in 1:nrow(classif)){
81
- classif[j,4] <- ani.d.m[classif[j,1], classif[j,3]]
82
- }
83
- write.table(classif, paste(out_base,"classif",sep="."),
84
- quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
85
-
86
- # Recursive search
87
- say("Recursive search")
88
- for(i in 1:top.n){
89
- medoid <- ani.medoids[i]
90
- ds_f <- names(ani.types)[ ani.types==i ]
91
- say("Analyzing subclade", i, "with medoid:", medoid)
92
- dir.create(paste(out_base, ".sc-", i, sep=""))
93
- write.table(ds_f,
94
- paste(out_base, ".sc-", i, "/miga-project.all",sep=""),
95
- quote=FALSE, col.names=FALSE, row.names=FALSE)
96
- if(length(ds_f) > 5){
97
- a_f <- a[ (a$a %in% ds_f) & (a$b %in% ds_f), ]
98
- subclades(out_base=paste(out_base, ".sc-", i, "/miga-project", sep=""),
99
- thr=thr, ani=a_f)
100
- }
101
- }
102
- }
103
-
104
- #= Helper functions
105
- say <- function(...) { cat("[", date(), "]", ..., "\n") }
106
-
107
- generate_empty_files <- function(out_base) {
108
- pdf(paste(out_base, ".pdf", sep=""), 7, 12)
109
- plot(1, t="n", axes=F)
110
- legend("center", "No data", bty="n")
111
- dev.off()
112
- file.create(paste(out_base,".1.classif",sep=""))
113
- file.create(paste(out_base,".1.medoids",sep=""))
114
- }
115
-
116
- plot_silhouette <- function(k, s, ns, ds, top.n) {
117
- # s
118
- par(mar=c(4,5,1,5)+0.1)
119
- plot(1, t="n", xlab="k (clusters)", ylab="", xlim=range(c(0,k)),
120
- ylim=range(s), bty="n", xaxs="i", yaxt="n")
121
- polygon(c(k[1], k, k[length(k)]), c(0,s,0), border=NA, col="grey80")
122
- axis(2, fg="grey60", col.axis="grey60")
123
- mtext("Mean silhouette", side=2, line=3, col="grey60")
124
- # ns
125
- par(new=TRUE)
126
- plot(1, t="n", xlab="", xaxt="n", ylab="", yaxt="n", xlim=range(c(0,k)),
127
- ylim=range(ns), bty="n", xaxs="i")
128
- points(k, ns, type="o", pch=16, col=rgb(1/2,0,0,3/4))
129
- axis(4, fg="darkred", col.axis="darkred")
130
- mtext("Negative silhouette area", side=4, line=3, col="darkred")
131
- # ds
132
- par(new=TRUE)
133
- plot(1, t="n", xlab="", xaxt="n", ylab="", yaxt="n", xlim=range(c(0,k)),
134
- ylim=range(ds), bty="n", xaxs="i")
135
- lines(k, ds)
136
- abline(v=top.n, lty=2)
137
- }
138
-
139
- plot_distances <- function(dist) {
140
- par(mar=c(5,4,1,2)+0.1)
141
- hist(dist, border=NA, col="grey60", breaks=50, xlab="Distances", main="")
142
- }
143
-
144
- plot_clustering <- function(cl, dist, types) {
145
- par(mar=c(5,4,4,2)+0.1)
146
- top.n <- length(cl$medoids)
147
- col <- ggplotColours(top.n)
148
- plot(silhouette(cl), col=col)
149
- if(length(labels(dist))<=15){
150
- plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
151
- plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
152
- }else{
153
- ani.mds <- cmdscale(dist, k=4)
154
- if(ncol(ani.mds)==4){
155
- plot(ani.mds[,1], ani.mds[,2], col=col[types], cex=1/2,
156
- xlab='Component 1', ylab='Component 2')
157
- plot(ani.mds[,3], ani.mds[,4], col=col[types], cex=1/2,
158
- xlab='Component 3', ylab='Component 4')
159
- }else{
160
- plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
161
- plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
162
- }
163
- }
164
- }
165
-
166
- plot_tree <- function(phy, types, medoids){
167
- layout(1)
168
- top.n <- length(unique(types))
169
- col <- ggplotColours(top.n)
170
- is.medoid <- phy$tip.label %in% medoids
171
- phy$tip.label[is.medoid] <- paste(phy$tip.label[is.medoid],
172
- " [", types[phy$tip.label[is.medoid]], "]", sep='')
173
- plot(phy, cex=ifelse(is.medoid, 1/3, 1/6),
174
- font=ifelse(is.medoid, 2, 1),
175
- tip.color=col[types[phy$tip.label]])
176
- }
177
-
178
- ggplotColours <- function(n=6, h=c(0, 360)+15, alpha=1){
179
- if ((diff(h)%%360) < 1) h[2] <- h[2] - 360/n
180
- hcl(h=seq(h[1], h[2], length=n), c=100, l=65, alpha=alpha)
181
- }
182
-
183
- #= Main
184
- subclades(ani_file=argv[1], out_base=argv[2],
185
- thr=ifelse(is.na(argv[3]), 1, as.numeric(argv[3])))
186
-