miga-base 0.2.1.6 → 0.2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/actions/add_result.rb +1 -7
- data/actions/result_stats.rb +75 -0
- data/bin/miga +21 -8
- data/lib/miga/dataset.rb +3 -1
- data/lib/miga/dataset_result.rb +9 -1
- data/lib/miga/project.rb +6 -4
- data/lib/miga/result.rb +4 -0
- data/lib/miga/version.rb +2 -2
- data/scripts/stats.bash +22 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dd90983aa2e7a7a6653dc1f0456c6801d8cb3baf
|
4
|
+
data.tar.gz: f80a07fad789a82ee7b55ed040c050cecba5b9ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 883eaef98d137a956ecbd28bdff15a7c657d84dc1c1dc2f296bd70107924332731f23215507d8d5f60382ebf92cd0a8a6ba2ff746e9d49a1f849615772d05546
|
7
|
+
data.tar.gz: 5124f28088c29a3fc06cac810b6c4e0d2cb702fd6de109b97c35464560b6a3ada73df769a73d87c0594b0d1e8e5400735cd4d11084b8f89b5c959adc0cc3ef07
|
data/actions/add_result.rb
CHANGED
@@ -6,13 +6,7 @@
|
|
6
6
|
o = {q:true}
|
7
7
|
opts = OptionParser.new do |opt|
|
8
8
|
opt_banner(opt)
|
9
|
-
opt_object(opt, o, [:project, :dataset_opt])
|
10
|
-
opt.on("-r", "--result STRING",
|
11
|
-
"(Mandatory) Name of the result to add.",
|
12
|
-
"Recognized names for dataset-specific results include:",
|
13
|
-
*MiGA::Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"},
|
14
|
-
"Recognized names for project-wide results include:",
|
15
|
-
*MiGA::Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}){ |v| o[:name]=v }
|
9
|
+
opt_object(opt, o, [:project, :dataset_opt, :result])
|
16
10
|
opt_common(opt, o)
|
17
11
|
end.parse!
|
18
12
|
|
@@ -0,0 +1,75 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# @package MiGA
|
4
|
+
# @license Artistic-2.0
|
5
|
+
|
6
|
+
o = {q:true}
|
7
|
+
opts = OptionParser.new do |opt|
|
8
|
+
opt_banner(opt)
|
9
|
+
opt_object(opt, o, [:project, :dataset_opt, :result])
|
10
|
+
opt.on("--compute-and-save",
|
11
|
+
"Computes and saves the statistics."){ |v| o[:compute] = v }
|
12
|
+
opt_common(opt, o)
|
13
|
+
end.parse!
|
14
|
+
|
15
|
+
##=> Main <=
|
16
|
+
opts.parse!
|
17
|
+
opt_require(o, project:"-P", name:"-r")
|
18
|
+
|
19
|
+
$stderr.puts "Loading project." unless o[:q]
|
20
|
+
p = MiGA::Project.load(o[:project])
|
21
|
+
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
22
|
+
|
23
|
+
$stderr.puts "Loading result." unless o[:q]
|
24
|
+
if o[:dataset].nil?
|
25
|
+
r = p.add_result(o[:name], false)
|
26
|
+
else
|
27
|
+
d = p.dataset(o[:dataset])
|
28
|
+
r = d.add_result(o[:name], false)
|
29
|
+
end
|
30
|
+
raise "Cannot load result." if r.nil?
|
31
|
+
|
32
|
+
if o[:compute]
|
33
|
+
$stderr.puts "Computing statistics." unless o[:q]
|
34
|
+
stats = {}
|
35
|
+
case o[:name]
|
36
|
+
when :raw_reads
|
37
|
+
scr = "awk 'NR%4==2{L+=length($0)} END{print NR/4, L*4/NR}'"
|
38
|
+
if r[:files][:pair1].nil?
|
39
|
+
s = `#{scr} '#{r.file_path :single}'`.chomp.split(" ")
|
40
|
+
stats = {reads: s[0].to_i, average_length: [s[1].to_f, "bp"]}
|
41
|
+
else
|
42
|
+
s1 = `#{scr} '#{r.file_path :pair1}'`.chomp.split(" ")
|
43
|
+
s2 = `#{scr} '#{r.file_path :pair2}'`.chomp.split(" ")
|
44
|
+
stats = {read_pairs: s1[0].to_i,
|
45
|
+
average_length_forward: [s1[1].to_f, "bp"],
|
46
|
+
average_length_reverse: [s2[1].to_f, "bp"]}
|
47
|
+
end
|
48
|
+
when :trimmed_fasta
|
49
|
+
scr = "awk '{L+=$2} END{print NR, L/NR}'"
|
50
|
+
f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
|
51
|
+
s = `FastA.length.pl '#{f}' | #{scr}`.chomp.split(" ")
|
52
|
+
stats = {reads: s[0].to_i, average_length: [s[1].to_f, "bp"]}
|
53
|
+
when :assembly
|
54
|
+
f = r.file_path :largecontigs
|
55
|
+
s = `FastA.N50.pl '#{f}'`.chomp.split("\n").map{|i| i.gsub(/.*: /,'').to_i}
|
56
|
+
stats = {contigs: s[1], n50: [s[0], "bp"], total_length: [s[2], "bp"]}
|
57
|
+
when :cds
|
58
|
+
scr = "awk '{L+=$2} END{print NR, L/NR}'"
|
59
|
+
f = r.file_path :proteins
|
60
|
+
s = `FastA.length.pl '#{f}' | #{scr}`.chomp.split(" ")
|
61
|
+
stats = {predicted_proteins: s[0].to_i, average_length: [s[1].to_f, "aa"]}
|
62
|
+
else
|
63
|
+
stats = nil
|
64
|
+
end
|
65
|
+
unless stats.nil?
|
66
|
+
r[:stats] = stats
|
67
|
+
r.save
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
r[:stats].each do |k,v|
|
72
|
+
puts "#{k.to_s.unmiga_name.capitalize}: #{v.is_a?(Array) ? v.join(" ") : v}."
|
73
|
+
end
|
74
|
+
|
75
|
+
$stderr.puts "Done." unless o[:q]
|
data/bin/miga
CHANGED
@@ -11,23 +11,29 @@ require "miga"
|
|
11
11
|
##=> Global variables <=
|
12
12
|
|
13
13
|
$task_desc = {
|
14
|
-
|
15
|
-
add_taxonomy: "Registers taxonomic information for datasets.",
|
16
|
-
create_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
|
14
|
+
# Projects
|
17
15
|
create_project: "Creates an empty MiGA project.",
|
18
|
-
|
19
|
-
|
16
|
+
project_info: "Displays information about a MiGA project.",
|
17
|
+
# Datasets
|
18
|
+
create_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
|
20
19
|
download_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
|
20
|
+
unlink_dataset: "Removes a dataset from an MiGA project.",
|
21
21
|
find_datasets: "Finds unregistered datasets based on result files.",
|
22
22
|
import_datasets: "Link datasets (including results) from one project to "+
|
23
23
|
"another.",
|
24
|
-
index_taxonomy: "Creates a taxonomy-indexed list of the datasets.",
|
25
24
|
list_datasets: "Lists all registered datasets in an MiGA project.",
|
25
|
+
# Results
|
26
|
+
add_result: "Registers a result.",
|
27
|
+
result_stats: "Extracts statistics for the given result.",
|
26
28
|
list_files: "Lists all registered files from the results of a dataset or a "+
|
27
29
|
"project.",
|
28
|
-
|
30
|
+
# System
|
31
|
+
daemon: "Controls the daemon of a MiGA project.",
|
32
|
+
date: "Returns the current date in standard MiGA format.",
|
33
|
+
# Taxonomy
|
34
|
+
add_taxonomy: "Registers taxonomic information for datasets.",
|
35
|
+
index_taxonomy: "Creates a taxonomy-indexed list of the datasets.",
|
29
36
|
tax_distributions: "Estimates distributions of distance by taxonomy.",
|
30
|
-
unlink_dataset: "Removes a dataset from an MiGA project."
|
31
37
|
}
|
32
38
|
|
33
39
|
##=> Functions <=
|
@@ -58,6 +64,13 @@ def opt_object(opt, o, what=[:project, :dataset])
|
|
58
64
|
"Type of dataset. Recognized types include:",
|
59
65
|
*MiGA::Project.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}"}
|
60
66
|
){ |v| o[:type]=v.to_sym } if what.include? :project_type
|
67
|
+
opt.on("-r", "--result STRING",
|
68
|
+
"(Mandatory) Name of the result to add.",
|
69
|
+
"Recognized names for dataset-specific results include:",
|
70
|
+
*MiGA::Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"},
|
71
|
+
"Recognized names for project-wide results include:",
|
72
|
+
*MiGA::Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
|
73
|
+
){ |v| o[:name]=v.downcase.to_sym } if what.include? :result
|
61
74
|
end
|
62
75
|
|
63
76
|
# OptParse flags common to all actions.
|
data/lib/miga/dataset.rb
CHANGED
@@ -30,7 +30,9 @@ class MiGA::Dataset < MiGA::MiGA
|
|
30
30
|
mapping_on_contigs: "08.mapping/01.read-ctg",
|
31
31
|
mapping_on_genes: "08.mapping/02.read-gene",
|
32
32
|
# Distances (for single-species datasets)
|
33
|
-
distances: "09.distances"
|
33
|
+
distances: "09.distances",
|
34
|
+
# General statistics
|
35
|
+
stats: "90.stats"
|
34
36
|
}
|
35
37
|
|
36
38
|
##
|
data/lib/miga/dataset_result.rb
CHANGED
@@ -59,8 +59,10 @@ module MiGA::DatasetResult
|
|
59
59
|
def add_result_assembly(base)
|
60
60
|
return nil unless result_files_exist?(base, ".LargeContigs.fna")
|
61
61
|
r = MiGA::Result.new(base + ".json")
|
62
|
-
add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
|
62
|
+
r = add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
|
63
63
|
:allcontigs=>".AllContigs.fna"})
|
64
|
+
add_result(:trimmed_fasta) #-> Post interposing
|
65
|
+
r
|
64
66
|
end
|
65
67
|
|
66
68
|
##
|
@@ -133,6 +135,12 @@ module MiGA::DatasetResult
|
|
133
135
|
add_result_distances_multi(base)
|
134
136
|
end
|
135
137
|
end
|
138
|
+
|
139
|
+
##
|
140
|
+
# Add result type +:stats+ at +base+.
|
141
|
+
def add_result_stats(base)
|
142
|
+
MiGA::Result.new(base + ".json")
|
143
|
+
end
|
136
144
|
|
137
145
|
##
|
138
146
|
# Add result type +:distances+ for _multi_ datasets at +base+.
|
data/lib/miga/project.rb
CHANGED
@@ -30,6 +30,7 @@ class MiGA::Project < MiGA::MiGA
|
|
30
30
|
10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
|
31
31
|
10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential
|
32
32
|
10.clades/04.phylogeny/02.core 10.clades/05.metadata
|
33
|
+
90.stats
|
33
34
|
]
|
34
35
|
|
35
36
|
##
|
@@ -45,10 +46,11 @@ class MiGA::Project < MiGA::MiGA
|
|
45
46
|
clade_finding: "10.clades/01.find",
|
46
47
|
# Clade analysis
|
47
48
|
subclades: "10.clades/02.ani",
|
48
|
-
ogs: "10.clades/03.ogs"
|
49
|
-
ess_phylogeny: "10.clades/04.phylogeny/01.essential",
|
50
|
-
core_phylogeny: "10.clades/04.phylogeny/02.core",
|
51
|
-
clade_metadata: "10.clades/05.metadata"
|
49
|
+
ogs: "10.clades/03.ogs"
|
50
|
+
#ess_phylogeny: "10.clades/04.phylogeny/01.essential",
|
51
|
+
#core_phylogeny: "10.clades/04.phylogeny/02.core",
|
52
|
+
#clade_metadata: "10.clades/05.metadata"
|
53
|
+
#project_stats: "90.stats"
|
52
54
|
}
|
53
55
|
|
54
56
|
##
|
data/lib/miga/result.rb
CHANGED
@@ -62,6 +62,10 @@ class MiGA::Result < MiGA::MiGA
|
|
62
62
|
# Entry with symbol +k+.
|
63
63
|
def [](k) data[k.to_sym] ; end
|
64
64
|
|
65
|
+
##
|
66
|
+
# Adds value +v+ to entry with symbol +k+.
|
67
|
+
def []=(k,v) data[k.to_sym]=v ; end
|
68
|
+
|
65
69
|
##
|
66
70
|
# Register +file+ (path relative to #dir) with the symbol +k+. If the file
|
67
71
|
# doesn't exist but the .gz extension does, the gzipped file is registered
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.2,
|
13
|
+
VERSION = [0.2, 2, 0]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2016,
|
21
|
+
VERSION_DATE = Date.new(2016, 8, 31)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
data/scripts/stats.bash
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
|
+
set -e
|
4
|
+
echo "MiGA: $MIGA"
|
5
|
+
echo "Project: $PROJECT"
|
6
|
+
source "$MIGA/scripts/miga.bash" || exit 1
|
7
|
+
DIR="$PROJECT/data/90.stats"
|
8
|
+
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
9
|
+
cd "$DIR"
|
10
|
+
|
11
|
+
# Initialize
|
12
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
|
13
|
+
|
14
|
+
# Calculate statistics
|
15
|
+
for i in raw_reads trimmed_fasta assembly cds ; do
|
16
|
+
echo "# $i"
|
17
|
+
miga result_stats --compute-and-save -P "$PROJECT" -D "$DATASET" -r $i
|
18
|
+
done
|
19
|
+
|
20
|
+
# Finalize
|
21
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
22
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r stats
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -130,6 +130,7 @@ files:
|
|
130
130
|
- scripts/ogs.bash
|
131
131
|
- scripts/read_quality.bash
|
132
132
|
- scripts/ssu.bash
|
133
|
+
- scripts/stats.bash
|
133
134
|
- scripts/subclades.bash
|
134
135
|
- scripts/trimmed_fasta.bash
|
135
136
|
- scripts/trimmed_reads.bash
|
@@ -154,6 +155,7 @@ files:
|
|
154
155
|
- actions/list_datasets.rb
|
155
156
|
- actions/list_files.rb
|
156
157
|
- actions/project_info.rb
|
158
|
+
- actions/result_stats.rb
|
157
159
|
- actions/tax_distributions.rb
|
158
160
|
- actions/unlink_dataset.rb
|
159
161
|
- Gemfile
|