miga-base 0.2.0.6 → 0.2.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE +201 -0
- data/README.md +17 -335
- data/Rakefile +31 -0
- data/actions/add_result +2 -5
- data/actions/add_taxonomy +4 -7
- data/actions/create_dataset +5 -6
- data/actions/create_project +2 -5
- data/actions/daemon +2 -5
- data/actions/download_dataset +88 -58
- data/actions/find_datasets +36 -38
- data/actions/import_datasets +2 -5
- data/actions/index_taxonomy +2 -5
- data/actions/list_datasets +47 -49
- data/actions/list_files +7 -11
- data/actions/unlink_dataset +2 -5
- data/bin/miga +1 -1
- data/lib/miga/common.rb +132 -0
- data/lib/miga/daemon.rb +229 -168
- data/lib/miga/dataset.rb +354 -277
- data/lib/miga/gui.rb +346 -269
- data/lib/miga/metadata.rb +115 -71
- data/lib/miga/project.rb +361 -259
- data/lib/miga/remote_dataset.rb +200 -148
- data/lib/miga/result.rb +150 -99
- data/lib/miga/tax_index.rb +124 -67
- data/lib/miga/taxonomy.rb +129 -100
- data/lib/miga/version.rb +57 -0
- data/lib/miga.rb +2 -77
- data/scripts/_distances_noref_nomulti.bash +2 -0
- data/scripts/_distances_ref_nomulti.bash +2 -0
- data/scripts/aai_distances.bash +1 -0
- data/scripts/ani_distances.bash +1 -0
- data/scripts/assembly.bash +1 -0
- data/scripts/cds.bash +1 -0
- data/scripts/clade_finding.bash +17 -1
- data/scripts/distances.bash +1 -0
- data/scripts/essential_genes.bash +1 -0
- data/scripts/haai_distances.bash +1 -0
- data/scripts/init.bash +2 -0
- data/scripts/mytaxa.bash +1 -0
- data/scripts/mytaxa_scan.bash +1 -0
- data/scripts/ogs.bash +1 -0
- data/scripts/read_quality.bash +1 -0
- data/scripts/ssu.bash +1 -0
- data/scripts/subclades.bash +1 -0
- data/scripts/trimmed_fasta.bash +1 -0
- data/scripts/trimmed_reads.bash +1 -0
- data/test/common_test.rb +82 -0
- data/test/daemon_test.rb +53 -0
- data/test/dataset_test.rb +156 -0
- data/test/jruby_gui_test.rb +20 -0
- data/test/metadata_test.rb +48 -0
- data/test/project_test.rb +54 -0
- data/test/remote_dataset_test.rb +41 -0
- data/test/tax_index_test.rb +44 -0
- data/test/taxonomy_test.rb +36 -0
- data/test/test_helper.rb +32 -0
- metadata +53 -38
data/lib/miga/tax_index.rb
CHANGED
@@ -1,70 +1,127 @@
|
|
1
|
-
|
2
1
|
# @package MiGA
|
3
|
-
# @
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
child
|
49
|
-
end
|
50
|
-
def add_dataset(dataset) @datasets << dataset ; end
|
51
|
-
def datasets_count
|
52
|
-
datasets.size + children.map{ |it| it.datasets_count }.reduce(0, :+)
|
53
|
-
end
|
54
|
-
def to_json(*a)
|
55
|
-
{ str:tax_str, datasets:datasets.map{|d| d.name}, children:children }.to_json(a)
|
56
|
-
end
|
57
|
-
def to_hash
|
58
|
-
{ str:tax_str, datasets:datasets.map{|d| d.name}, children:children.map{ |it| it.to_hash } }
|
59
|
-
end
|
60
|
-
def to_tab(unknown, indent=0)
|
61
|
-
o = ""
|
62
|
-
o = (" " * indent) + tax_str + ": " + datasets_count.to_s + "\n" if unknown or not datasets.empty? or not name.nil?
|
63
|
-
indent += 2
|
64
|
-
datasets.each{ |ds| o += (" " * indent) + "# " + ds.name + "\n" }
|
65
|
-
children.each{ |it| o += it.to_tab(unknown, indent) }
|
66
|
-
o
|
67
|
-
end
|
68
|
-
end
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
require "miga/taxonomy"
|
5
|
+
|
6
|
+
##
|
7
|
+
# Indexing methods based on taxonomy.
|
8
|
+
class MiGA::TaxIndex < MiGA::MiGA
|
9
|
+
|
10
|
+
# Instance-level
|
11
|
+
|
12
|
+
##
|
13
|
+
# Datasets in the index.
|
14
|
+
attr_reader :datasets
|
15
|
+
# Taxonomy root.
|
16
|
+
attr_reader :root
|
17
|
+
|
18
|
+
##
|
19
|
+
# Initialize an empty MiGA::TaxIndex
|
20
|
+
def initialize
|
21
|
+
@root = MiGA::TaxIndexTaxon.new :root, "biota"
|
22
|
+
@datasets = []
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# Index +dataset+, a MiGA::Dataset object.
|
27
|
+
def <<(dataset)
|
28
|
+
return nil if dataset.metadata[:tax].nil?
|
29
|
+
taxon = @root
|
30
|
+
MiGA::Taxonomy.KNOWN_RANKS.each do |rank|
|
31
|
+
taxon = taxon.add_child(rank, dataset.metadata[:tax][rank])
|
32
|
+
end
|
33
|
+
taxon.add_dataset dataset
|
34
|
+
@datasets << dataset
|
35
|
+
end
|
36
|
+
|
37
|
+
##
|
38
|
+
# Generate JSON String for the index.
|
39
|
+
def to_json
|
40
|
+
JSON.pretty_generate({ root:root.to_hash,
|
41
|
+
datasets:datasets.map{ |d| d.name } })
|
42
|
+
end
|
43
|
+
|
44
|
+
##
|
45
|
+
# Generate tabular String for the index.
|
46
|
+
def to_tab(unknown=false) ; root.to_tab(unknown) ; end
|
69
47
|
end
|
70
48
|
|
49
|
+
##
|
50
|
+
# Helper class for MiGA::TaxIndex.
|
51
|
+
class MiGA::TaxIndexTaxon < MiGA::MiGA
|
52
|
+
|
53
|
+
# Instance-level
|
54
|
+
|
55
|
+
##
|
56
|
+
# Rank of the taxon.
|
57
|
+
attr_reader :rank
|
58
|
+
# Name of the taxon.
|
59
|
+
attr_reader :name
|
60
|
+
# Children of the taxon.
|
61
|
+
attr_reader :children
|
62
|
+
# Datasets directly classified at the taxon (not at children).
|
63
|
+
attr_reader :datasets
|
64
|
+
|
65
|
+
##
|
66
|
+
# Initalize taxon at +rank+ with +name+.
|
67
|
+
def initialize(rank, name)
|
68
|
+
@rank = rank.to_sym
|
69
|
+
@name = (name.nil? ? nil : name.miga_name)
|
70
|
+
@children = []
|
71
|
+
@datasets = []
|
72
|
+
end
|
73
|
+
|
74
|
+
##
|
75
|
+
# String representation of the taxon.
|
76
|
+
def tax_str ; "#{rank}:#{name.nil? ? "?" : name}" ; end
|
77
|
+
|
78
|
+
##
|
79
|
+
# Add child at +rank+ with +name+.
|
80
|
+
def add_child(rank, name)
|
81
|
+
rank = rank.to_sym
|
82
|
+
name = name.miga_name unless name.nil?
|
83
|
+
child = children.find{ |it| it.rank==rank and it.name==name }
|
84
|
+
if child.nil?
|
85
|
+
child = MiGA::TaxIndexTaxon.new(rank, name)
|
86
|
+
@children << child
|
87
|
+
end
|
88
|
+
child
|
89
|
+
end
|
90
|
+
|
91
|
+
##
|
92
|
+
# Add dataset at the current taxon (not children).
|
93
|
+
def add_dataset(dataset) @datasets << dataset ; end
|
94
|
+
|
95
|
+
##
|
96
|
+
# Get the number of datasets in the taxon (including children).
|
97
|
+
def datasets_count
|
98
|
+
datasets.size + children.map{ |it| it.datasets_count }.reduce(0, :+)
|
99
|
+
end
|
100
|
+
|
101
|
+
##
|
102
|
+
# JSON String of the taxon.
|
103
|
+
def to_json(*a)
|
104
|
+
{ str:tax_str, datasets:datasets.map{|d| d.name},
|
105
|
+
children:children }.to_json(a)
|
106
|
+
end
|
107
|
+
|
108
|
+
##
|
109
|
+
# Hash representation of the taxon.
|
110
|
+
def to_hash
|
111
|
+
{ str:tax_str, datasets:datasets.map{|d| d.name},
|
112
|
+
children:children.map{ |it| it.to_hash } }
|
113
|
+
end
|
114
|
+
|
115
|
+
##
|
116
|
+
# Tabular String of the taxon.
|
117
|
+
def to_tab(unknown, indent=0)
|
118
|
+
o = ""
|
119
|
+
o = (" " * indent) + tax_str + ": " + datasets_count.to_s + "\n" if
|
120
|
+
unknown or not datasets.empty? or not name.nil?
|
121
|
+
indent += 2
|
122
|
+
datasets.each{ |ds| o += (" " * indent) + "# " + ds.name + "\n" }
|
123
|
+
children.each{ |it| o += it.to_tab(unknown, indent) }
|
124
|
+
o
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
data/lib/miga/taxonomy.rb
CHANGED
@@ -1,107 +1,136 @@
|
|
1
|
-
#
|
2
1
|
# @package MiGA
|
3
|
-
# @
|
4
|
-
# @license artistic license 2.0
|
5
|
-
# @update Oct-05-2015
|
6
|
-
#
|
2
|
+
# @license Artistic-2.0
|
7
3
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
rank = rank.to_sym
|
35
|
-
return nil unless @@KNOWN_RANKS.include? rank
|
36
|
-
rank
|
37
|
-
end
|
38
|
-
# Instance
|
39
|
-
attr_reader :ranks
|
40
|
-
def initialize(str, ranks=nil)
|
41
|
-
@ranks = {}
|
42
|
-
if ranks.nil?
|
43
|
-
if str.is_a? Array or str.is_a? Hash
|
44
|
-
self << str
|
45
|
-
else
|
46
|
-
(str + " ").scan(/([A-Za-z]+):([^:]*)( )/) do |r,n,s|
|
47
|
-
self << {r=>n}
|
48
|
-
end
|
49
|
-
end
|
50
|
-
else
|
51
|
-
ranks = ranks.split(/\s+/) unless ranks.is_a? Array
|
52
|
-
str = str.split(/\s/) unless str.is_a? Array
|
53
|
-
raise "Unequal number of ranks (#{ranks.size}) " +
|
54
|
-
"and names (#{str.size}):#{ranks} => #{str}" unless
|
55
|
-
ranks.size==str.size
|
56
|
-
(0 .. str.size).each{ |i| self << "#{ranks[i]}:#{str[i]}" }
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
def <<(value)
|
61
|
-
if value.is_a? Array
|
62
|
-
value.each{ |v| self << v }
|
63
|
-
elsif value.is_a? String
|
64
|
-
(rank,name) = value.split /:/
|
65
|
-
self << { rank => name }
|
66
|
-
elsif value.is_a? Hash
|
67
|
-
value.each_pair do |rank, name|
|
68
|
-
next if name.nil? or name == ""
|
69
|
-
@ranks[ Taxonomy.normalize_rank rank ] = name.gsub(/_/," ")
|
70
|
-
end
|
71
|
-
else
|
72
|
-
raise "Unsupported class '#{value.class.name}'."
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
def [](rank) @ranks[ rank.to_sym ] ; end
|
77
|
-
|
78
|
-
### Evaluates if the loaded taxonomy includes `taxon`. It assumes that
|
79
|
-
### `taxon` only has one informative rank. The evaluation is
|
80
|
-
### case-insensitive.
|
81
|
-
def is_in? taxon
|
82
|
-
r = taxon.ranks.keys.first
|
83
|
-
return false if self[ r ].nil?
|
84
|
-
self[ r ].downcase == taxon[ r ].downcase
|
85
|
-
end
|
86
|
-
|
87
|
-
### Sorted list of ranks, as two-entry arrays
|
88
|
-
def sorted_ranks
|
89
|
-
@@KNOWN_RANKS.map do |r|
|
90
|
-
ranks[r].nil? ? nil : [r, ranks[r]]
|
91
|
-
end.compact
|
92
|
-
end
|
4
|
+
##
|
5
|
+
# Taxonomic classifications in MiGA.
|
6
|
+
class MiGA::Taxonomy < MiGA::MiGA
|
7
|
+
# Class-level
|
8
|
+
|
9
|
+
##
|
10
|
+
# Cannonical ranks.
|
11
|
+
def self.KNOWN_RANKS() @@KNOWN_RANKS ; end
|
12
|
+
@@KNOWN_RANKS = %w{ns d k p c o f g s ssp str ds}.map{|r| r.to_sym}
|
13
|
+
|
14
|
+
##
|
15
|
+
# Synonms for cannonical ranks.
|
16
|
+
@@RANK_SYNONYMS = {
|
17
|
+
"namespace"=>"ns",
|
18
|
+
"domain"=>"d","superkingdom"=>"d",
|
19
|
+
"kingdom"=>"k",
|
20
|
+
"phylum"=>"p",
|
21
|
+
"class"=>"c",
|
22
|
+
"order"=>"o",
|
23
|
+
"family"=>"f",
|
24
|
+
"genus"=>"g",
|
25
|
+
"species"=>"s","sp"=>"s",
|
26
|
+
"subspecies"=>"ssp",
|
27
|
+
"strain"=>"str","isolate"=>"str","culture"=>"str",
|
28
|
+
"dataset"=>"ds","organism"=>"ds","genome"=>"ds","specimen"=>"ds"
|
29
|
+
}
|
93
30
|
|
94
|
-
|
31
|
+
##
|
32
|
+
# Initialize from JSON-derived Hash +o+.
|
33
|
+
def self.json_create(o) new(o["str"]) ; end
|
95
34
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
35
|
+
##
|
36
|
+
# Returns cannonical rank (Symbol) for the +rank+ String.
|
37
|
+
def self.normalize_rank(rank)
|
38
|
+
rank = rank.to_s.downcase
|
39
|
+
return nil if rank=="no rank"
|
40
|
+
rank = @@RANK_SYNONYMS[rank] unless @@RANK_SYNONYMS[rank].nil?
|
41
|
+
rank = rank.to_sym
|
42
|
+
return nil unless @@KNOWN_RANKS.include? rank
|
43
|
+
rank
|
44
|
+
end
|
45
|
+
|
46
|
+
# Instance-level
|
47
|
+
|
48
|
+
##
|
49
|
+
# Taxonomic hierarchy Hash.
|
50
|
+
attr_reader :ranks
|
51
|
+
|
52
|
+
##
|
53
|
+
# Create MiGA::Taxonomy from String or Array +str+. The string is a series of
|
54
|
+
# space-delimited entries, the array is a vector of entries. Each entry can be
|
55
|
+
# either a rank:value pair (if +ranks+ is nil), or just values in the same
|
56
|
+
# order as ther ranks in +ranks+. Alternatively, +str+ as a Hash with rank =>
|
57
|
+
# value pairs is also supported.
|
58
|
+
def initialize(str, ranks=nil)
|
59
|
+
@ranks = {}
|
60
|
+
if ranks.nil?
|
61
|
+
case str when Array, Hash
|
62
|
+
self << str
|
63
|
+
else
|
64
|
+
"#{str} ".scan(/([A-Za-z]+):([^:]*)( )/){ |r,n,_| self << {r=>n} }
|
100
65
|
end
|
101
|
-
|
102
|
-
|
103
|
-
|
66
|
+
else
|
67
|
+
ranks = ranks.split(/\s+/) unless ranks.is_a? Array
|
68
|
+
str = str.split(/\s/) unless str.is_a? Array
|
69
|
+
raise "Unequal number of ranks (#{ranks.size}) " +
|
70
|
+
"and names (#{str.size}):#{ranks} => #{str}" unless
|
71
|
+
ranks.size==str.size
|
72
|
+
(0 .. str.size).each{ |i| self << "#{ranks[i]}:#{str[i]}" }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
##
|
77
|
+
# Add +value+ to the hierarchy, that can be an Array, a String, or a Hash, as
|
78
|
+
# described in #initialize.
|
79
|
+
def <<(value)
|
80
|
+
if value.is_a? Array
|
81
|
+
value.each{ |v| self << v }
|
82
|
+
elsif value.is_a? String
|
83
|
+
(rank, name) = value.split(/:/)
|
84
|
+
self << { rank => name }
|
85
|
+
elsif value.is_a? Hash
|
86
|
+
value.each_pair do |rank_i, name_i|
|
87
|
+
next if name_i.nil? or name_i == ""
|
88
|
+
@ranks[ Taxonomy.normalize_rank rank_i ] = name_i.tr("_"," ")
|
104
89
|
end
|
105
|
-
|
106
|
-
|
90
|
+
else
|
91
|
+
raise "Unsupported class: #{value.class.name}."
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
##
|
96
|
+
# Get +rank+ value.
|
97
|
+
def [](rank) @ranks[ rank.to_sym ] ; end
|
98
|
+
|
99
|
+
##
|
100
|
+
# Evaluates if the loaded taxonomy includes +taxon+. It assumes that +taxon+
|
101
|
+
# only has one informative rank. The evaluation is case-insensitive.
|
102
|
+
def is_in? taxon
|
103
|
+
r = taxon.ranks.keys.first
|
104
|
+
return false if self[ r ].nil?
|
105
|
+
self[ r ].downcase == taxon[ r ].downcase
|
106
|
+
end
|
107
|
+
|
108
|
+
##
|
109
|
+
# Sorted list of ranks, as an Array of two-entry Arrays (rank and value).
|
110
|
+
def sorted_ranks
|
111
|
+
@@KNOWN_RANKS.map do |r|
|
112
|
+
ranks[r].nil? ? nil : [r, ranks[r]]
|
113
|
+
end.compact
|
114
|
+
end
|
115
|
+
|
116
|
+
##
|
117
|
+
# Get the most general rank as a two-entry Array (rank and value).
|
118
|
+
def highest; sorted_ranks.first ; end
|
107
119
|
|
120
|
+
##
|
121
|
+
# Get the most specific rank as a two-entry Array (rank and value).
|
122
|
+
def lowest; sorted_ranks.last ; end
|
123
|
+
|
124
|
+
##
|
125
|
+
# Generate cannonical String for the taxonomy.
|
126
|
+
def to_s
|
127
|
+
sorted_ranks.map{ |r| "#{r[0]}:#{r[1].gsub(/\s/,"_")}" }.join(" ")
|
128
|
+
end
|
129
|
+
|
130
|
+
##
|
131
|
+
# Generate JSON-formated String representing the taxonomy.
|
132
|
+
def to_json(*a)
|
133
|
+
{ JSON.create_id => self.class.name, "str" => self.to_s }.to_json(*a)
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
data/lib/miga/version.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
|
2
|
+
require "date"
|
3
|
+
|
4
|
+
##
|
5
|
+
# High-level minimal requirements for the MiGA::MiGA class.
|
6
|
+
module MiGA
|
7
|
+
|
8
|
+
##
|
9
|
+
# Current version of MiGA. An Array with three values:
|
10
|
+
# - Float representing the major.minor version.
|
11
|
+
# - Integer representing gem releases of the current version.
|
12
|
+
# - Integer representing minor changes that require new version number.
|
13
|
+
VERSION = [0.2, 0, 7]
|
14
|
+
|
15
|
+
##
|
16
|
+
# Nickname for the current major.minor version.
|
17
|
+
VERSION_NAME = "pochoir"
|
18
|
+
|
19
|
+
##
|
20
|
+
# Date of the current gem release.
|
21
|
+
VERSION_DATE = Date.new(2016, 04, 13)
|
22
|
+
|
23
|
+
##
|
24
|
+
# Reference of MiGA.
|
25
|
+
CITATION = "Rodriguez-R et al, in preparation. Microbial Genomes Atlas: " +
|
26
|
+
"Standardizing genomic and metagenomic analyses for Archaea and Bacteria."
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
class MiGA::MiGA
|
31
|
+
|
32
|
+
include MiGA
|
33
|
+
|
34
|
+
##
|
35
|
+
# Major.minor version as Float.
|
36
|
+
def self.VERSION ; VERSION[0] ; end
|
37
|
+
|
38
|
+
##
|
39
|
+
# Complete version as string.
|
40
|
+
def self.FULL_VERSION ; VERSION.join(".") ; end
|
41
|
+
|
42
|
+
##
|
43
|
+
# Complete version with nickname and date as string.
|
44
|
+
def self.LONG_VERSION
|
45
|
+
"MiGA " + VERSION.join(".") + " - " + VERSION_NAME + " - " +
|
46
|
+
VERSION_DATE.to_s
|
47
|
+
end
|
48
|
+
|
49
|
+
##
|
50
|
+
# Date of the current gem release.
|
51
|
+
def self.VERSION_DATE ; VERSION_DATE ; end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Reference of MiGA.
|
55
|
+
def self.CITATION ; CITATION ; end
|
56
|
+
|
57
|
+
end
|
data/lib/miga.rb
CHANGED
@@ -1,83 +1,8 @@
|
|
1
|
-
#
|
2
1
|
# @package MiGA
|
3
|
-
# @
|
4
|
-
# @license artistic license 2.0
|
5
|
-
#
|
2
|
+
# @license Artistic-2.0
|
6
3
|
|
7
|
-
require "date"
|
8
4
|
require "json"
|
9
5
|
require "fileutils"
|
6
|
+
require "miga/common"
|
10
7
|
require "miga/project"
|
11
8
|
require "miga/taxonomy"
|
12
|
-
|
13
|
-
module MiGA
|
14
|
-
VERSION = [0.2, 0, 6]
|
15
|
-
VERSION_NAME = "pochoir"
|
16
|
-
VERSION_DATE = Date.new(2015, 12, 07)
|
17
|
-
CITATION = "Rodriguez-R et al, in preparation. Microbial Genomes Atlas: " +
|
18
|
-
"Standardizing genomic and metagenomic analyses for Archaea and Bacteria."
|
19
|
-
class MiGA
|
20
|
-
@@DEBUG = false
|
21
|
-
@@DEBUG_TRACE = false
|
22
|
-
def self.DEBUG_ON() @@DEBUG=true end
|
23
|
-
def self.DEBUG_OFF() @@DEBUG=false end
|
24
|
-
def self.DEBUG_TRACE_ON
|
25
|
-
@@DEBUG_TRACE=true
|
26
|
-
self.DEBUG_ON
|
27
|
-
end
|
28
|
-
def self.DEBUG_TRACE_OFF
|
29
|
-
@@DEBUG_TRACE=false
|
30
|
-
self.DEBUG_OFF
|
31
|
-
end
|
32
|
-
def self.DEBUG *args
|
33
|
-
$stderr.puts(*args) if @@DEBUG
|
34
|
-
$stderr.puts caller.map{|v| v.gsub(/^/," ")}.join("\n") if
|
35
|
-
@@DEBUG_TRACE
|
36
|
-
end
|
37
|
-
def self.VERSION ; VERSION[0] ; end
|
38
|
-
def self.FULL_VERSION ; VERSION.join(".") ; end
|
39
|
-
def self.LONG_VERSION
|
40
|
-
"MiGA " + VERSION.join(".") + " - " + VERSION_NAME + " - " +
|
41
|
-
VERSION_DATE.to_s
|
42
|
-
end
|
43
|
-
def self.VERSION_DATE ; VERSION_DATE ; end
|
44
|
-
def self.CITATION ; CITATION ; end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
class File
|
49
|
-
def self.unlink_r(path)
|
50
|
-
if Dir.exists? path
|
51
|
-
unless File.symlink? path
|
52
|
-
Dir.entries(path).reject{|f| f =~ /^\.\.?$/}.each do |f|
|
53
|
-
File.unlink_r path + "/" + f
|
54
|
-
end
|
55
|
-
end
|
56
|
-
Dir.unlink path
|
57
|
-
elsif File.exists? path
|
58
|
-
File.unlink path
|
59
|
-
else
|
60
|
-
raise "Cannot find file: #{path}"
|
61
|
-
end
|
62
|
-
end
|
63
|
-
def self.generic_transfer(old_name, new_name, method)
|
64
|
-
return nil if exist? new_name
|
65
|
-
case method
|
66
|
-
when :symlink
|
67
|
-
File.symlink(old_name, new_name)
|
68
|
-
when :hardlink
|
69
|
-
File.link(old_name, new_name)
|
70
|
-
when :copy
|
71
|
-
FileUtils.cp_r(old_name, new_name)
|
72
|
-
else
|
73
|
-
raise "Unknown transfer method: #{method}."
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
class String
|
79
|
-
def miga_name ; gsub /[^A-Za-z0-9_]/, "_" ; end
|
80
|
-
def miga_name? ; not(self !~ /^[A-Za-z0-9_]+$/) ; end
|
81
|
-
def unmiga_name ; gsub /_/, " " ; end
|
82
|
-
end
|
83
|
-
|
data/scripts/aai_distances.bash
CHANGED
data/scripts/ani_distances.bash
CHANGED
data/scripts/assembly.bash
CHANGED
data/scripts/cds.bash
CHANGED
data/scripts/clade_finding.bash
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
|
+
set -e
|
3
4
|
echo "MiGA: $MIGA"
|
4
5
|
echo "Project: $PROJECT"
|
5
6
|
source "$MIGA/scripts/miga.bash" || exit 1
|
@@ -11,16 +12,31 @@ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
|
|
11
12
|
# Markov-cluster genomes by ANI
|
12
13
|
gunzip -c ../../09.distances/03.ani/miga-project.txt.gz | tail -n+2 \
|
13
14
|
| awk -F"\\t" '{print $2"'"\\t"'"$3"'"\\t"'"$4}' > genome-genome.aai90.rbm
|
14
|
-
ogs.mcl.rb -d . -o miga-project.
|
15
|
+
ogs.mcl.rb -d . -o miga-project.aai90-clades -t "$CORES" -i \
|
15
16
|
-f "(\\S+)-(\\S+)\\.aai90\\.rbm"
|
17
|
+
rm genome-genome.aai90.rbm
|
16
18
|
cat genome-genome.aai90.rbm | awk -F"\\t" '$3>=95' > genome-genome.ani95.rbm
|
17
19
|
ogs.mcl.rb -d . -o miga-project.ani95-clades -t "$CORES" -b \
|
18
20
|
-f "(\\S+)-(\\S+)\\.ani95\\.rbm"
|
21
|
+
rm genome-genome.ani95.rbm
|
19
22
|
|
20
23
|
# Propose clade projects
|
21
24
|
cat miga-project.ani95-clades | tail -n +2 | tr "," "\\t" | awk 'NF >= 5' \
|
22
25
|
> miga-project.proposed-clades
|
23
26
|
|
27
|
+
# Run R code
|
28
|
+
echo "
|
29
|
+
source('$MIGA/utils/subclades.R');
|
30
|
+
subclades('../../09.distances/02.aai/miga-project.txt.gz',
|
31
|
+
'miga-project', $CORES);
|
32
|
+
" | R --vanilla
|
33
|
+
mv miga-project.ani.nwk miga-project.aai.nwk
|
34
|
+
|
35
|
+
# Compile
|
36
|
+
ruby "$MIGA/utils/subclades-compile.rb" . \
|
37
|
+
> miga-project.class.tsv \
|
38
|
+
2> miga-project.class.nwk
|
39
|
+
|
24
40
|
# Finalize
|
25
41
|
date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
|
26
42
|
miga add_result -P "$PROJECT" -r clade_finding
|
data/scripts/distances.bash
CHANGED