miga-base 0.2.1.6 → 0.2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/actions/add_result.rb +1 -7
- data/actions/result_stats.rb +75 -0
- data/bin/miga +21 -8
- data/lib/miga/dataset.rb +3 -1
- data/lib/miga/dataset_result.rb +9 -1
- data/lib/miga/project.rb +6 -4
- data/lib/miga/result.rb +4 -0
- data/lib/miga/version.rb +2 -2
- data/scripts/stats.bash +22 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dd90983aa2e7a7a6653dc1f0456c6801d8cb3baf
|
4
|
+
data.tar.gz: f80a07fad789a82ee7b55ed040c050cecba5b9ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 883eaef98d137a956ecbd28bdff15a7c657d84dc1c1dc2f296bd70107924332731f23215507d8d5f60382ebf92cd0a8a6ba2ff746e9d49a1f849615772d05546
|
7
|
+
data.tar.gz: 5124f28088c29a3fc06cac810b6c4e0d2cb702fd6de109b97c35464560b6a3ada73df769a73d87c0594b0d1e8e5400735cd4d11084b8f89b5c959adc0cc3ef07
|
data/actions/add_result.rb
CHANGED
@@ -6,13 +6,7 @@
|
|
6
6
|
o = {q:true}
|
7
7
|
opts = OptionParser.new do |opt|
|
8
8
|
opt_banner(opt)
|
9
|
-
opt_object(opt, o, [:project, :dataset_opt])
|
10
|
-
opt.on("-r", "--result STRING",
|
11
|
-
"(Mandatory) Name of the result to add.",
|
12
|
-
"Recognized names for dataset-specific results include:",
|
13
|
-
*MiGA::Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"},
|
14
|
-
"Recognized names for project-wide results include:",
|
15
|
-
*MiGA::Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}){ |v| o[:name]=v }
|
9
|
+
opt_object(opt, o, [:project, :dataset_opt, :result])
|
16
10
|
opt_common(opt, o)
|
17
11
|
end.parse!
|
18
12
|
|
@@ -0,0 +1,75 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# @package MiGA
|
4
|
+
# @license Artistic-2.0
|
5
|
+
|
6
|
+
o = {q:true}
|
7
|
+
opts = OptionParser.new do |opt|
|
8
|
+
opt_banner(opt)
|
9
|
+
opt_object(opt, o, [:project, :dataset_opt, :result])
|
10
|
+
opt.on("--compute-and-save",
|
11
|
+
"Computes and saves the statistics."){ |v| o[:compute] = v }
|
12
|
+
opt_common(opt, o)
|
13
|
+
end.parse!
|
14
|
+
|
15
|
+
##=> Main <=
|
16
|
+
opts.parse!
|
17
|
+
opt_require(o, project:"-P", name:"-r")
|
18
|
+
|
19
|
+
$stderr.puts "Loading project." unless o[:q]
|
20
|
+
p = MiGA::Project.load(o[:project])
|
21
|
+
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
22
|
+
|
23
|
+
$stderr.puts "Loading result." unless o[:q]
|
24
|
+
if o[:dataset].nil?
|
25
|
+
r = p.add_result(o[:name], false)
|
26
|
+
else
|
27
|
+
d = p.dataset(o[:dataset])
|
28
|
+
r = d.add_result(o[:name], false)
|
29
|
+
end
|
30
|
+
raise "Cannot load result." if r.nil?
|
31
|
+
|
32
|
+
if o[:compute]
|
33
|
+
$stderr.puts "Computing statistics." unless o[:q]
|
34
|
+
stats = {}
|
35
|
+
case o[:name]
|
36
|
+
when :raw_reads
|
37
|
+
scr = "awk 'NR%4==2{L+=length($0)} END{print NR/4, L*4/NR}'"
|
38
|
+
if r[:files][:pair1].nil?
|
39
|
+
s = `#{scr} '#{r.file_path :single}'`.chomp.split(" ")
|
40
|
+
stats = {reads: s[0].to_i, average_length: [s[1].to_f, "bp"]}
|
41
|
+
else
|
42
|
+
s1 = `#{scr} '#{r.file_path :pair1}'`.chomp.split(" ")
|
43
|
+
s2 = `#{scr} '#{r.file_path :pair2}'`.chomp.split(" ")
|
44
|
+
stats = {read_pairs: s1[0].to_i,
|
45
|
+
average_length_forward: [s1[1].to_f, "bp"],
|
46
|
+
average_length_reverse: [s2[1].to_f, "bp"]}
|
47
|
+
end
|
48
|
+
when :trimmed_fasta
|
49
|
+
scr = "awk '{L+=$2} END{print NR, L/NR}'"
|
50
|
+
f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
|
51
|
+
s = `FastA.length.pl '#{f}' | #{scr}`.chomp.split(" ")
|
52
|
+
stats = {reads: s[0].to_i, average_length: [s[1].to_f, "bp"]}
|
53
|
+
when :assembly
|
54
|
+
f = r.file_path :largecontigs
|
55
|
+
s = `FastA.N50.pl '#{f}'`.chomp.split("\n").map{|i| i.gsub(/.*: /,'').to_i}
|
56
|
+
stats = {contigs: s[1], n50: [s[0], "bp"], total_length: [s[2], "bp"]}
|
57
|
+
when :cds
|
58
|
+
scr = "awk '{L+=$2} END{print NR, L/NR}'"
|
59
|
+
f = r.file_path :proteins
|
60
|
+
s = `FastA.length.pl '#{f}' | #{scr}`.chomp.split(" ")
|
61
|
+
stats = {predicted_proteins: s[0].to_i, average_length: [s[1].to_f, "aa"]}
|
62
|
+
else
|
63
|
+
stats = nil
|
64
|
+
end
|
65
|
+
unless stats.nil?
|
66
|
+
r[:stats] = stats
|
67
|
+
r.save
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
r[:stats].each do |k,v|
|
72
|
+
puts "#{k.to_s.unmiga_name.capitalize}: #{v.is_a?(Array) ? v.join(" ") : v}."
|
73
|
+
end
|
74
|
+
|
75
|
+
$stderr.puts "Done." unless o[:q]
|
data/bin/miga
CHANGED
@@ -11,23 +11,29 @@ require "miga"
|
|
11
11
|
##=> Global variables <=
|
12
12
|
|
13
13
|
$task_desc = {
|
14
|
-
|
15
|
-
add_taxonomy: "Registers taxonomic information for datasets.",
|
16
|
-
create_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
|
14
|
+
# Projects
|
17
15
|
create_project: "Creates an empty MiGA project.",
|
18
|
-
|
19
|
-
|
16
|
+
project_info: "Displays information about a MiGA project.",
|
17
|
+
# Datasets
|
18
|
+
create_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
|
20
19
|
download_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
|
20
|
+
unlink_dataset: "Removes a dataset from an MiGA project.",
|
21
21
|
find_datasets: "Finds unregistered datasets based on result files.",
|
22
22
|
import_datasets: "Link datasets (including results) from one project to "+
|
23
23
|
"another.",
|
24
|
-
index_taxonomy: "Creates a taxonomy-indexed list of the datasets.",
|
25
24
|
list_datasets: "Lists all registered datasets in an MiGA project.",
|
25
|
+
# Results
|
26
|
+
add_result: "Registers a result.",
|
27
|
+
result_stats: "Extracts statistics for the given result.",
|
26
28
|
list_files: "Lists all registered files from the results of a dataset or a "+
|
27
29
|
"project.",
|
28
|
-
|
30
|
+
# System
|
31
|
+
daemon: "Controls the daemon of a MiGA project.",
|
32
|
+
date: "Returns the current date in standard MiGA format.",
|
33
|
+
# Taxonomy
|
34
|
+
add_taxonomy: "Registers taxonomic information for datasets.",
|
35
|
+
index_taxonomy: "Creates a taxonomy-indexed list of the datasets.",
|
29
36
|
tax_distributions: "Estimates distributions of distance by taxonomy.",
|
30
|
-
unlink_dataset: "Removes a dataset from an MiGA project."
|
31
37
|
}
|
32
38
|
|
33
39
|
##=> Functions <=
|
@@ -58,6 +64,13 @@ def opt_object(opt, o, what=[:project, :dataset])
|
|
58
64
|
"Type of dataset. Recognized types include:",
|
59
65
|
*MiGA::Project.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}"}
|
60
66
|
){ |v| o[:type]=v.to_sym } if what.include? :project_type
|
67
|
+
opt.on("-r", "--result STRING",
|
68
|
+
"(Mandatory) Name of the result to add.",
|
69
|
+
"Recognized names for dataset-specific results include:",
|
70
|
+
*MiGA::Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"},
|
71
|
+
"Recognized names for project-wide results include:",
|
72
|
+
*MiGA::Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
|
73
|
+
){ |v| o[:name]=v.downcase.to_sym } if what.include? :result
|
61
74
|
end
|
62
75
|
|
63
76
|
# OptParse flags common to all actions.
|
data/lib/miga/dataset.rb
CHANGED
@@ -30,7 +30,9 @@ class MiGA::Dataset < MiGA::MiGA
|
|
30
30
|
mapping_on_contigs: "08.mapping/01.read-ctg",
|
31
31
|
mapping_on_genes: "08.mapping/02.read-gene",
|
32
32
|
# Distances (for single-species datasets)
|
33
|
-
distances: "09.distances"
|
33
|
+
distances: "09.distances",
|
34
|
+
# General statistics
|
35
|
+
stats: "90.stats"
|
34
36
|
}
|
35
37
|
|
36
38
|
##
|
data/lib/miga/dataset_result.rb
CHANGED
@@ -59,8 +59,10 @@ module MiGA::DatasetResult
|
|
59
59
|
def add_result_assembly(base)
|
60
60
|
return nil unless result_files_exist?(base, ".LargeContigs.fna")
|
61
61
|
r = MiGA::Result.new(base + ".json")
|
62
|
-
add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
|
62
|
+
r = add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
|
63
63
|
:allcontigs=>".AllContigs.fna"})
|
64
|
+
add_result(:trimmed_fasta) #-> Post interposing
|
65
|
+
r
|
64
66
|
end
|
65
67
|
|
66
68
|
##
|
@@ -133,6 +135,12 @@ module MiGA::DatasetResult
|
|
133
135
|
add_result_distances_multi(base)
|
134
136
|
end
|
135
137
|
end
|
138
|
+
|
139
|
+
##
|
140
|
+
# Add result type +:stats+ at +base+.
|
141
|
+
def add_result_stats(base)
|
142
|
+
MiGA::Result.new(base + ".json")
|
143
|
+
end
|
136
144
|
|
137
145
|
##
|
138
146
|
# Add result type +:distances+ for _multi_ datasets at +base+.
|
data/lib/miga/project.rb
CHANGED
@@ -30,6 +30,7 @@ class MiGA::Project < MiGA::MiGA
|
|
30
30
|
10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
|
31
31
|
10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential
|
32
32
|
10.clades/04.phylogeny/02.core 10.clades/05.metadata
|
33
|
+
90.stats
|
33
34
|
]
|
34
35
|
|
35
36
|
##
|
@@ -45,10 +46,11 @@ class MiGA::Project < MiGA::MiGA
|
|
45
46
|
clade_finding: "10.clades/01.find",
|
46
47
|
# Clade analysis
|
47
48
|
subclades: "10.clades/02.ani",
|
48
|
-
ogs: "10.clades/03.ogs"
|
49
|
-
ess_phylogeny: "10.clades/04.phylogeny/01.essential",
|
50
|
-
core_phylogeny: "10.clades/04.phylogeny/02.core",
|
51
|
-
clade_metadata: "10.clades/05.metadata"
|
49
|
+
ogs: "10.clades/03.ogs"
|
50
|
+
#ess_phylogeny: "10.clades/04.phylogeny/01.essential",
|
51
|
+
#core_phylogeny: "10.clades/04.phylogeny/02.core",
|
52
|
+
#clade_metadata: "10.clades/05.metadata"
|
53
|
+
#project_stats: "90.stats"
|
52
54
|
}
|
53
55
|
|
54
56
|
##
|
data/lib/miga/result.rb
CHANGED
@@ -62,6 +62,10 @@ class MiGA::Result < MiGA::MiGA
|
|
62
62
|
# Entry with symbol +k+.
|
63
63
|
def [](k) data[k.to_sym] ; end
|
64
64
|
|
65
|
+
##
|
66
|
+
# Adds value +v+ to entry with symbol +k+.
|
67
|
+
def []=(k,v) data[k.to_sym]=v ; end
|
68
|
+
|
65
69
|
##
|
66
70
|
# Register +file+ (path relative to #dir) with the symbol +k+. If the file
|
67
71
|
# doesn't exist but the .gz extension does, the gzipped file is registered
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.2,
|
13
|
+
VERSION = [0.2, 2, 0]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2016,
|
21
|
+
VERSION_DATE = Date.new(2016, 8, 31)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
data/scripts/stats.bash
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
|
+
set -e
|
4
|
+
echo "MiGA: $MIGA"
|
5
|
+
echo "Project: $PROJECT"
|
6
|
+
source "$MIGA/scripts/miga.bash" || exit 1
|
7
|
+
DIR="$PROJECT/data/90.stats"
|
8
|
+
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
9
|
+
cd "$DIR"
|
10
|
+
|
11
|
+
# Initialize
|
12
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
|
13
|
+
|
14
|
+
# Calculate statistics
|
15
|
+
for i in raw_reads trimmed_fasta assembly cds ; do
|
16
|
+
echo "# $i"
|
17
|
+
miga result_stats --compute-and-save -P "$PROJECT" -D "$DATASET" -r $i
|
18
|
+
done
|
19
|
+
|
20
|
+
# Finalize
|
21
|
+
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
22
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r stats
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -130,6 +130,7 @@ files:
|
|
130
130
|
- scripts/ogs.bash
|
131
131
|
- scripts/read_quality.bash
|
132
132
|
- scripts/ssu.bash
|
133
|
+
- scripts/stats.bash
|
133
134
|
- scripts/subclades.bash
|
134
135
|
- scripts/trimmed_fasta.bash
|
135
136
|
- scripts/trimmed_reads.bash
|
@@ -154,6 +155,7 @@ files:
|
|
154
155
|
- actions/list_datasets.rb
|
155
156
|
- actions/list_files.rb
|
156
157
|
- actions/project_info.rb
|
158
|
+
- actions/result_stats.rb
|
157
159
|
- actions/tax_distributions.rb
|
158
160
|
- actions/unlink_dataset.rb
|
159
161
|
- Gemfile
|