miga-base 0.2.4.3 → 0.2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fdd460c0009e55ffc82547e7d31f94789a874442
4
- data.tar.gz: c3e560fb2e8871ac586ef8b1fd64c2b1b2076471
3
+ metadata.gz: 367ffd0c85c1ad5bdbc44f4c88677fac5268c2e2
4
+ data.tar.gz: eb107b14279ea252e4287d40efa73a562d890451
5
5
  SHA512:
6
- metadata.gz: c77bb9fc35f046a2ebc1c1ddc457f19c4accafe06958b43d42a3654f91a5c19dcf92cc3624f9b30b950542d736787dc69f2e8e9ba88141d20923823da0812f46
7
- data.tar.gz: ecee7c4611a74fd16115260e706ea19315ccd639070482010f0bf5c4f669fe30fe9dea755b912053136366831f715cf346f2a32ed38dbee1ea69442525f9e080
6
+ metadata.gz: 7a2bf0f148315cd6cc0fd57e5ff48a6f88cb5be7ebcaeea4e3b6a0b820cc71fca1165f98287fcd94ffd5b330e3e352281c9d517d66b2df3219a4af067854c0e4
7
+ data.tar.gz: c8d884b0c56d075179c38470e4d0a34316ad14706c9a88ac171fc0230811e863090ce3a7ab0330c5c871d6754f15c01cae6b177cb6cf68b13153c3979934ff2a
@@ -26,6 +26,7 @@ p = MiGA::Project.load(o[:project])
26
26
  raise "Impossible to load project: #{o[:project]}" if p.nil?
27
27
 
28
28
  $stderr.puts "Loading result." unless o[:q]
29
+ d = nil
29
30
  if o[:dataset].nil?
30
31
  r = p.add_result(o[:name], false)
31
32
  else
@@ -77,6 +78,8 @@ if o[:compute]
77
78
  end
78
79
  end
79
80
  stats[:quality] = stats[:completeness][0] - stats[:contamination][0]*5
81
+ when :distances
82
+ d.cleanup_distances! unless d.nil?
80
83
  else
81
84
  stats = nil
82
85
  end
data/lib/miga/dataset.rb CHANGED
@@ -198,6 +198,11 @@ class MiGA::Dataset < MiGA::MiGA
198
198
  r
199
199
  end
200
200
 
201
+ ##
202
+ # Gets a result as MiGA::Result for the datasets with +result_type+. This is
203
+ # equivalent to +add_result(result_type, false)+.
204
+ def get_result(result_type) ; add_result(result_type, false) ; end
205
+
201
206
  ##
202
207
  # Returns the key symbol of the first registered result (sorted by the
203
208
  # execution order). This typically corresponds to the result used as the
@@ -1,8 +1,29 @@
1
1
 
2
+ require "sqlite3"
3
+
2
4
  ##
3
5
  # Helper module including specific functions to add dataset results.
4
6
  module MiGA::DatasetResult
5
7
 
8
+ ##
9
+ # Clean-up all the stored distances, removing values for datasets no longer in
10
+ # the project as reference datasets.
11
+ def cleanup_distances!
12
+ r = get_result(:distances)
13
+ return if r.nil?
14
+ [:haai_db, :aai_db, :ani_db].each do |db_type|
15
+ db = r.file_path(db_type)
16
+ next if db.nil? or not File.size? db
17
+ sqlite_db = SQLite3::Database.new db
18
+ table = db_type[-6..-4]
19
+ val = sqlite_db.execute "select seq2 from #{table}"
20
+ next if val.empty?
21
+ (val.map{ |i| i.first } - project.dataset_names).each do |extra|
22
+ sqlite_db.execute "delete from #{table} where seq2=?", extra
23
+ end
24
+ end
25
+ end
26
+
6
27
  private
7
28
 
8
29
  ##
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.2, 4, 3]
13
+ VERSION = [0.2, 5, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2017, 3, 28)
21
+ VERSION_DATE = Date.new(2017, 3, 29)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
data/scripts/stats.bash CHANGED
@@ -13,7 +13,7 @@ cd "$DIR"
13
13
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
14
14
 
15
15
  # Calculate statistics
16
- for i in raw_reads trimmed_fasta assembly cds essential_genes ; do
16
+ for i in raw_reads trimmed_fasta assembly cds essential_genes distances ; do
17
17
  echo "# $i"
18
18
  miga result_stats --compute-and-save -P "$PROJECT" -D "$DATASET" -r $i
19
19
  done
metadata CHANGED
@@ -1,83 +1,103 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4.3
4
+ version: 0.2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-28 00:00:00.000000000 Z
11
+ date: 2017-03-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.7'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.7'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: daemons
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.2'
34
+ - - ">="
32
35
  - !ruby/object:Gem::Version
33
36
  version: 1.2.4
34
37
  type: :runtime
35
38
  prerelease: false
36
39
  version_requirements: !ruby/object:Gem::Requirement
37
40
  requirements:
38
- - - ~>
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: '1.2'
44
+ - - ">="
39
45
  - !ruby/object:Gem::Version
40
46
  version: 1.2.4
41
47
  - !ruby/object:Gem::Dependency
42
48
  name: json
43
49
  requirement: !ruby/object:Gem::Requirement
44
50
  requirements:
45
- - - ~>
51
+ - - "~>"
46
52
  - !ruby/object:Gem::Version
47
53
  version: '1.8'
48
54
  type: :runtime
49
55
  prerelease: false
50
56
  version_requirements: !ruby/object:Gem::Requirement
51
57
  requirements:
52
- - - ~>
58
+ - - "~>"
53
59
  - !ruby/object:Gem::Version
54
60
  version: '1.8'
61
+ - !ruby/object:Gem::Dependency
62
+ name: sqlite3
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '1.3'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.3'
55
75
  - !ruby/object:Gem::Dependency
56
76
  name: rake
57
77
  requirement: !ruby/object:Gem::Requirement
58
78
  requirements:
59
- - - ~>
79
+ - - "~>"
60
80
  - !ruby/object:Gem::Version
61
81
  version: '11'
62
82
  type: :development
63
83
  prerelease: false
64
84
  version_requirements: !ruby/object:Gem::Requirement
65
85
  requirements:
66
- - - ~>
86
+ - - "~>"
67
87
  - !ruby/object:Gem::Version
68
88
  version: '11'
69
89
  - !ruby/object:Gem::Dependency
70
90
  name: test-unit
71
91
  requirement: !ruby/object:Gem::Requirement
72
92
  requirements:
73
- - - ~>
93
+ - - "~>"
74
94
  - !ruby/object:Gem::Version
75
95
  version: '3'
76
96
  type: :development
77
97
  prerelease: false
78
98
  version_requirements: !ruby/object:Gem::Requirement
79
99
  requirements:
80
- - - ~>
100
+ - - "~>"
81
101
  - !ruby/object:Gem::Version
82
102
  version: '3'
83
103
  description: Microbial Genomes Atlas
@@ -88,6 +108,31 @@ extensions: []
88
108
  extra_rdoc_files:
89
109
  - README.md
90
110
  files:
111
+ - Gemfile
112
+ - LICENSE
113
+ - README.md
114
+ - Rakefile
115
+ - actions/add_result.rb
116
+ - actions/add_taxonomy.rb
117
+ - actions/create_dataset.rb
118
+ - actions/create_project.rb
119
+ - actions/daemon.rb
120
+ - actions/date.rb
121
+ - actions/download_dataset.rb
122
+ - actions/find_datasets.rb
123
+ - actions/import_datasets.rb
124
+ - actions/index_taxonomy.rb
125
+ - actions/list_datasets.rb
126
+ - actions/list_files.rb
127
+ - actions/plugins.rb
128
+ - actions/project_info.rb
129
+ - actions/result_stats.rb
130
+ - actions/tax_distributions.rb
131
+ - actions/unlink_dataset.rb
132
+ - bin/miga
133
+ - lib/miga.rb
134
+ - lib/miga/_data/aai-intax.tsv.gz
135
+ - lib/miga/_data/aai-novel.tsv.gz
91
136
  - lib/miga/common.rb
92
137
  - lib/miga/daemon.rb
93
138
  - lib/miga/dataset.rb
@@ -101,18 +146,6 @@ files:
101
146
  - lib/miga/tax_index.rb
102
147
  - lib/miga/taxonomy.rb
103
148
  - lib/miga/version.rb
104
- - lib/miga.rb
105
- - test/common_test.rb
106
- - test/daemon_test.rb
107
- - test/dataset_test.rb
108
- - test/metadata_test.rb
109
- - test/project_test.rb
110
- - test/remote_dataset_test.rb
111
- - test/tax_index_test.rb
112
- - test/taxonomy_test.rb
113
- - test/test_helper.rb
114
- - lib/miga/_data/aai-intax.tsv.gz
115
- - lib/miga/_data/aai-novel.tsv.gz
116
149
  - scripts/_distances_functions.bash
117
150
  - scripts/_distances_noref_nomulti.bash
118
151
  - scripts/_distances_ref_nomulti.bash
@@ -135,6 +168,15 @@ files:
135
168
  - scripts/subclades.bash
136
169
  - scripts/trimmed_fasta.bash
137
170
  - scripts/trimmed_reads.bash
171
+ - test/common_test.rb
172
+ - test/daemon_test.rb
173
+ - test/dataset_test.rb
174
+ - test/metadata_test.rb
175
+ - test/project_test.rb
176
+ - test/remote_dataset_test.rb
177
+ - test/tax_index_test.rb
178
+ - test/taxonomy_test.rb
179
+ - test/test_helper.rb
138
180
  - utils/adapters.fa
139
181
  - utils/mytaxa_scan.R
140
182
  - utils/mytaxa_scan.rb
@@ -142,30 +184,7 @@ files:
142
184
  - utils/requirements.txt
143
185
  - utils/subclades-compile.rb
144
186
  - utils/subclades-nj.R
145
- - utils/subclades-pam.R
146
187
  - utils/subclades.R
147
- - bin/miga
148
- - actions/add_result.rb
149
- - actions/add_taxonomy.rb
150
- - actions/create_dataset.rb
151
- - actions/create_project.rb
152
- - actions/daemon.rb
153
- - actions/date.rb
154
- - actions/download_dataset.rb
155
- - actions/find_datasets.rb
156
- - actions/import_datasets.rb
157
- - actions/index_taxonomy.rb
158
- - actions/list_datasets.rb
159
- - actions/list_files.rb
160
- - actions/plugins.rb
161
- - actions/project_info.rb
162
- - actions/result_stats.rb
163
- - actions/tax_distributions.rb
164
- - actions/unlink_dataset.rb
165
- - Gemfile
166
- - Rakefile
167
- - README.md
168
- - LICENSE
169
188
  homepage: http://enve-omics.ce.gatech.edu/miga
170
189
  licenses:
171
190
  - Artistic-2.0
@@ -174,25 +193,25 @@ post_install_message:
174
193
  rdoc_options:
175
194
  - lib
176
195
  - README.md
177
- - --main
196
+ - "--main"
178
197
  - README.md
179
- - --title
198
+ - "--title"
180
199
  - MiGA
181
200
  require_paths:
182
201
  - lib
183
202
  required_ruby_version: !ruby/object:Gem::Requirement
184
203
  requirements:
185
- - - '>='
204
+ - - ">="
186
205
  - !ruby/object:Gem::Version
187
206
  version: '1.9'
188
207
  required_rubygems_version: !ruby/object:Gem::Requirement
189
208
  requirements:
190
- - - '>='
209
+ - - ">="
191
210
  - !ruby/object:Gem::Version
192
211
  version: '0'
193
212
  requirements: []
194
213
  rubyforge_project:
195
- rubygems_version: 2.0.14
214
+ rubygems_version: 2.6.8
196
215
  signing_key:
197
216
  specification_version: 4
198
217
  summary: MiGA
@@ -1,186 +0,0 @@
1
- #!/usr/bin/env Rscript
2
- #
3
- # @package MiGA
4
- # @license Artistic-2.0
5
- #
6
-
7
- #= Load stuff
8
- argv <- commandArgs(trailingOnly=T)
9
- suppressPackageStartupMessages(library(ape))
10
- suppressPackageStartupMessages(library(vegan))
11
- suppressPackageStartupMessages(library(cluster))
12
- suppressPackageStartupMessages(library(parallel))
13
- suppressPackageStartupMessages(library(enveomics.R))
14
-
15
- #= Main function
16
- subclades <- function(ani_file, out_base, thr=1, ani=c()) {
17
- say("==> Out base:", out_base, "<==")
18
-
19
- # Input arguments
20
- if(missing(ani_file)){
21
- a <- as.data.frame(ani)
22
- }else{
23
- a <- read.table(gzfile(ani_file), sep="\t", header=TRUE, as.is=TRUE)
24
- }
25
- if(nrow(a)==0){
26
- generate_empty_files(out_base)
27
- return(NULL)
28
- }
29
-
30
- # Get ANI distances
31
- say("Distances")
32
- a$d <- 1-a$value/100
33
- ani.d <- enve.df2dist(data.frame(a$a, a$b, a$d), default.d=max(a$d)*1.2)
34
- ani.ph <- bionj(ani.d)
35
- express.ori <- options('expressions')$expressions
36
- if(express.ori < ani.ph$Nnode*4){
37
- options(expressions=min(c(5e7,ani.ph$Nnode*4)))
38
- }
39
- write.tree(ani.ph, paste(out_base, ".nwk", sep=""))
40
- options(expressions=express.ori)
41
-
42
- # Silhouette
43
- say("Silhouette")
44
- k <- 2:min(length(labels(ani.d))-1, 100)
45
- cl <- makeCluster(thr)
46
- s <- parSapply(cl, k, function(x) {
47
- library(cluster)
48
- s <- pam(ani.d, x, do.swap=FALSE, pamonce=1)$silinfo
49
- c(s$avg.width, -sum(ifelse(s$widths[,3]>0,0,s$widths[,3])))
50
- })
51
- stopCluster(cl)
52
- s.avg.z <- (s[1,]-mean(s[1,]))/(sd(s[1,])+0.0001)
53
- s.neg.z <- (s[2,]-mean(s[2,]))/(sd(s[2,])+0.01)
54
- ds <- s.avg.z - s.neg.z - 2/(1:length(k)) - (1:length(k))/50
55
- top.n <- k[which.max(ds)]
56
-
57
- # Classify genomes
58
- say("Classify => k :", top.n, "| n :", length(labels(ani.d)))
59
- ani.cl <- pam(ani.d, top.n, pamonce=1)
60
- ani.types <- ani.cl$clustering
61
- ani.medoids <- ani.cl$medoids
62
-
63
- # Generate graphic report
64
- say("Graphic report")
65
- pdf(paste(out_base, ".pdf", sep=""), 7, 12)
66
- layout(matrix(c(1,1,2,2,3,3,4,5),byrow=TRUE, ncol=2))
67
- plot_distances(ani.d)
68
- plot_silhouette(k, s[1,], s[2,], ds, top.n)
69
- plot_clustering(ani.cl, ani.d, ani.types)
70
- plot_tree(ani.ph, ani.types, ani.medoids)
71
- dev.off()
72
-
73
- # Save results
74
- say("Text report")
75
- write.table(ani.medoids, paste(out_base, "medoids", sep="."),
76
- quote=FALSE, col.names=FALSE, row.names=FALSE)
77
- save(ani.d, file=paste(out_base, "dist.rdata", sep="."))
78
- classif <- cbind(names(ani.types), ani.types, ani.medoids[ ani.types ], NA)
79
- ani.d.m <- 100 - as.matrix(ani.d)*100
80
- for(j in 1:nrow(classif)){
81
- classif[j,4] <- ani.d.m[classif[j,1], classif[j,3]]
82
- }
83
- write.table(classif, paste(out_base,"classif",sep="."),
84
- quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
85
-
86
- # Recursive search
87
- say("Recursive search")
88
- for(i in 1:top.n){
89
- medoid <- ani.medoids[i]
90
- ds_f <- names(ani.types)[ ani.types==i ]
91
- say("Analyzing subclade", i, "with medoid:", medoid)
92
- dir.create(paste(out_base, ".sc-", i, sep=""))
93
- write.table(ds_f,
94
- paste(out_base, ".sc-", i, "/miga-project.all",sep=""),
95
- quote=FALSE, col.names=FALSE, row.names=FALSE)
96
- if(length(ds_f) > 5){
97
- a_f <- a[ (a$a %in% ds_f) & (a$b %in% ds_f), ]
98
- subclades(out_base=paste(out_base, ".sc-", i, "/miga-project", sep=""),
99
- thr=thr, ani=a_f)
100
- }
101
- }
102
- }
103
-
104
- #= Helper functions
105
- say <- function(...) { cat("[", date(), "]", ..., "\n") }
106
-
107
- generate_empty_files <- function(out_base) {
108
- pdf(paste(out_base, ".pdf", sep=""), 7, 12)
109
- plot(1, t="n", axes=F)
110
- legend("center", "No data", bty="n")
111
- dev.off()
112
- file.create(paste(out_base,".1.classif",sep=""))
113
- file.create(paste(out_base,".1.medoids",sep=""))
114
- }
115
-
116
- plot_silhouette <- function(k, s, ns, ds, top.n) {
117
- # s
118
- par(mar=c(4,5,1,5)+0.1)
119
- plot(1, t="n", xlab="k (clusters)", ylab="", xlim=range(c(0,k)),
120
- ylim=range(s), bty="n", xaxs="i", yaxt="n")
121
- polygon(c(k[1], k, k[length(k)]), c(0,s,0), border=NA, col="grey80")
122
- axis(2, fg="grey60", col.axis="grey60")
123
- mtext("Mean silhouette", side=2, line=3, col="grey60")
124
- # ns
125
- par(new=TRUE)
126
- plot(1, t="n", xlab="", xaxt="n", ylab="", yaxt="n", xlim=range(c(0,k)),
127
- ylim=range(ns), bty="n", xaxs="i")
128
- points(k, ns, type="o", pch=16, col=rgb(1/2,0,0,3/4))
129
- axis(4, fg="darkred", col.axis="darkred")
130
- mtext("Negative silhouette area", side=4, line=3, col="darkred")
131
- # ds
132
- par(new=TRUE)
133
- plot(1, t="n", xlab="", xaxt="n", ylab="", yaxt="n", xlim=range(c(0,k)),
134
- ylim=range(ds), bty="n", xaxs="i")
135
- lines(k, ds)
136
- abline(v=top.n, lty=2)
137
- }
138
-
139
- plot_distances <- function(dist) {
140
- par(mar=c(5,4,1,2)+0.1)
141
- hist(dist, border=NA, col="grey60", breaks=50, xlab="Distances", main="")
142
- }
143
-
144
- plot_clustering <- function(cl, dist, types) {
145
- par(mar=c(5,4,4,2)+0.1)
146
- top.n <- length(cl$medoids)
147
- col <- ggplotColours(top.n)
148
- plot(silhouette(cl), col=col)
149
- if(length(labels(dist))<=15){
150
- plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
151
- plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
152
- }else{
153
- ani.mds <- cmdscale(dist, k=4)
154
- if(ncol(ani.mds)==4){
155
- plot(ani.mds[,1], ani.mds[,2], col=col[types], cex=1/2,
156
- xlab='Component 1', ylab='Component 2')
157
- plot(ani.mds[,3], ani.mds[,4], col=col[types], cex=1/2,
158
- xlab='Component 3', ylab='Component 4')
159
- }else{
160
- plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
161
- plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
162
- }
163
- }
164
- }
165
-
166
- plot_tree <- function(phy, types, medoids){
167
- layout(1)
168
- top.n <- length(unique(types))
169
- col <- ggplotColours(top.n)
170
- is.medoid <- phy$tip.label %in% medoids
171
- phy$tip.label[is.medoid] <- paste(phy$tip.label[is.medoid],
172
- " [", types[phy$tip.label[is.medoid]], "]", sep='')
173
- plot(phy, cex=ifelse(is.medoid, 1/3, 1/6),
174
- font=ifelse(is.medoid, 2, 1),
175
- tip.color=col[types[phy$tip.label]])
176
- }
177
-
178
- ggplotColours <- function(n=6, h=c(0, 360)+15, alpha=1){
179
- if ((diff(h)%%360) < 1) h[2] <- h[2] - 360/n
180
- hcl(h=seq(h[1], h[2], length=n), c=100, l=65, alpha=alpha)
181
- }
182
-
183
- #= Main
184
- subclades(ani_file=argv[1], out_base=argv[2],
185
- thr=ifelse(is.na(argv[3]), 1, as.numeric(argv[3])))
186
-