miga-base 0.2.1.6 → 0.2.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ea45f64b95746bcaf6c00a6638edccd66e3873f0
4
- data.tar.gz: 83dbe2f728b6affd6122a760736fcd554c491cb8
3
+ metadata.gz: dd90983aa2e7a7a6653dc1f0456c6801d8cb3baf
4
+ data.tar.gz: f80a07fad789a82ee7b55ed040c050cecba5b9ca
5
5
  SHA512:
6
- metadata.gz: 3abc1e0dae50dbd035ca17bc6f3b8e40cea74bd400762f0bce282153f6a5e67f7fc855e6167766700380858d90f38726df5923f68a0fc6104358e0e22a749597
7
- data.tar.gz: cb537593f85427b412d8a112ca16748eaf09f35317ff81d5a0f7aa760365d2133c977a05579227fbf7c1f17882596806b79ae8c7df9818fe9947d061185cafaa
6
+ metadata.gz: 883eaef98d137a956ecbd28bdff15a7c657d84dc1c1dc2f296bd70107924332731f23215507d8d5f60382ebf92cd0a8a6ba2ff746e9d49a1f849615772d05546
7
+ data.tar.gz: 5124f28088c29a3fc06cac810b6c4e0d2cb702fd6de109b97c35464560b6a3ada73df769a73d87c0594b0d1e8e5400735cd4d11084b8f89b5c959adc0cc3ef07
@@ -6,13 +6,7 @@
6
6
  o = {q:true}
7
7
  opts = OptionParser.new do |opt|
8
8
  opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset_opt])
10
- opt.on("-r", "--result STRING",
11
- "(Mandatory) Name of the result to add.",
12
- "Recognized names for dataset-specific results include:",
13
- *MiGA::Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"},
14
- "Recognized names for project-wide results include:",
15
- *MiGA::Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}){ |v| o[:name]=v }
9
+ opt_object(opt, o, [:project, :dataset_opt, :result])
16
10
  opt_common(opt, o)
17
11
  end.parse!
18
12
 
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {q:true}
7
+ opts = OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_object(opt, o, [:project, :dataset_opt, :result])
10
+ opt.on("--compute-and-save",
11
+ "Computes and saves the statistics."){ |v| o[:compute] = v }
12
+ opt_common(opt, o)
13
+ end.parse!
14
+
15
+ ##=> Main <=
16
+ opts.parse!
17
+ opt_require(o, project:"-P", name:"-r")
18
+
19
+ $stderr.puts "Loading project." unless o[:q]
20
+ p = MiGA::Project.load(o[:project])
21
+ raise "Impossible to load project: #{o[:project]}" if p.nil?
22
+
23
+ $stderr.puts "Loading result." unless o[:q]
24
+ if o[:dataset].nil?
25
+ r = p.add_result(o[:name], false)
26
+ else
27
+ d = p.dataset(o[:dataset])
28
+ r = d.add_result(o[:name], false)
29
+ end
30
+ raise "Cannot load result." if r.nil?
31
+
32
+ if o[:compute]
33
+ $stderr.puts "Computing statistics." unless o[:q]
34
+ stats = {}
35
+ case o[:name]
36
+ when :raw_reads
37
+ scr = "awk 'NR%4==2{L+=length($0)} END{print NR/4, L*4/NR}'"
38
+ if r[:files][:pair1].nil?
39
+ s = `#{scr} '#{r.file_path :single}'`.chomp.split(" ")
40
+ stats = {reads: s[0].to_i, average_length: [s[1].to_f, "bp"]}
41
+ else
42
+ s1 = `#{scr} '#{r.file_path :pair1}'`.chomp.split(" ")
43
+ s2 = `#{scr} '#{r.file_path :pair2}'`.chomp.split(" ")
44
+ stats = {read_pairs: s1[0].to_i,
45
+ average_length_forward: [s1[1].to_f, "bp"],
46
+ average_length_reverse: [s2[1].to_f, "bp"]}
47
+ end
48
+ when :trimmed_fasta
49
+ scr = "awk '{L+=$2} END{print NR, L/NR}'"
50
+ f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
51
+ s = `FastA.length.pl '#{f}' | #{scr}`.chomp.split(" ")
52
+ stats = {reads: s[0].to_i, average_length: [s[1].to_f, "bp"]}
53
+ when :assembly
54
+ f = r.file_path :largecontigs
55
+ s = `FastA.N50.pl '#{f}'`.chomp.split("\n").map{|i| i.gsub(/.*: /,'').to_i}
56
+ stats = {contigs: s[1], n50: [s[0], "bp"], total_length: [s[2], "bp"]}
57
+ when :cds
58
+ scr = "awk '{L+=$2} END{print NR, L/NR}'"
59
+ f = r.file_path :proteins
60
+ s = `FastA.length.pl '#{f}' | #{scr}`.chomp.split(" ")
61
+ stats = {predicted_proteins: s[0].to_i, average_length: [s[1].to_f, "aa"]}
62
+ else
63
+ stats = nil
64
+ end
65
+ unless stats.nil?
66
+ r[:stats] = stats
67
+ r.save
68
+ end
69
+ end
70
+
71
+ r[:stats].each do |k,v|
72
+ puts "#{k.to_s.unmiga_name.capitalize}: #{v.is_a?(Array) ? v.join(" ") : v}."
73
+ end
74
+
75
+ $stderr.puts "Done." unless o[:q]
data/bin/miga CHANGED
@@ -11,23 +11,29 @@ require "miga"
11
11
  ##=> Global variables <=
12
12
 
13
13
  $task_desc = {
14
- add_result: "Registers a result.",
15
- add_taxonomy: "Registers taxonomic information for datasets.",
16
- create_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
14
+ # Projects
17
15
  create_project: "Creates an empty MiGA project.",
18
- daemon: "Controls the daemon of a MiGA project.",
19
- date: "Returns the current date in standard MiGA format.",
16
+ project_info: "Displays information about a MiGA project.",
17
+ # Datasets
18
+ create_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
20
19
  download_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
20
+ unlink_dataset: "Removes a dataset from an MiGA project.",
21
21
  find_datasets: "Finds unregistered datasets based on result files.",
22
22
  import_datasets: "Link datasets (including results) from one project to "+
23
23
  "another.",
24
- index_taxonomy: "Creates a taxonomy-indexed list of the datasets.",
25
24
  list_datasets: "Lists all registered datasets in an MiGA project.",
25
+ # Results
26
+ add_result: "Registers a result.",
27
+ result_stats: "Extracts statistics for the given result.",
26
28
  list_files: "Lists all registered files from the results of a dataset or a "+
27
29
  "project.",
28
- project_info: "Displays information about a MiGA project.",
30
+ # System
31
+ daemon: "Controls the daemon of a MiGA project.",
32
+ date: "Returns the current date in standard MiGA format.",
33
+ # Taxonomy
34
+ add_taxonomy: "Registers taxonomic information for datasets.",
35
+ index_taxonomy: "Creates a taxonomy-indexed list of the datasets.",
29
36
  tax_distributions: "Estimates distributions of distance by taxonomy.",
30
- unlink_dataset: "Removes a dataset from an MiGA project."
31
37
  }
32
38
 
33
39
  ##=> Functions <=
@@ -58,6 +64,13 @@ def opt_object(opt, o, what=[:project, :dataset])
58
64
  "Type of dataset. Recognized types include:",
59
65
  *MiGA::Project.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}"}
60
66
  ){ |v| o[:type]=v.to_sym } if what.include? :project_type
67
+ opt.on("-r", "--result STRING",
68
+ "(Mandatory) Name of the result to add.",
69
+ "Recognized names for dataset-specific results include:",
70
+ *MiGA::Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"},
71
+ "Recognized names for project-wide results include:",
72
+ *MiGA::Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
73
+ ){ |v| o[:name]=v.downcase.to_sym } if what.include? :result
61
74
  end
62
75
 
63
76
  # OptParse flags common to all actions.
data/lib/miga/dataset.rb CHANGED
@@ -30,7 +30,9 @@ class MiGA::Dataset < MiGA::MiGA
30
30
  mapping_on_contigs: "08.mapping/01.read-ctg",
31
31
  mapping_on_genes: "08.mapping/02.read-gene",
32
32
  # Distances (for single-species datasets)
33
- distances: "09.distances"
33
+ distances: "09.distances",
34
+ # General statistics
35
+ stats: "90.stats"
34
36
  }
35
37
 
36
38
  ##
@@ -59,8 +59,10 @@ module MiGA::DatasetResult
59
59
  def add_result_assembly(base)
60
60
  return nil unless result_files_exist?(base, ".LargeContigs.fna")
61
61
  r = MiGA::Result.new(base + ".json")
62
- add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
62
+ r = add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
63
63
  :allcontigs=>".AllContigs.fna"})
64
+ add_result(:trimmed_fasta) #-> Post interposing
65
+ r
64
66
  end
65
67
 
66
68
  ##
@@ -133,6 +135,12 @@ module MiGA::DatasetResult
133
135
  add_result_distances_multi(base)
134
136
  end
135
137
  end
138
+
139
+ ##
140
+ # Add result type +:stats+ at +base+.
141
+ def add_result_stats(base)
142
+ MiGA::Result.new(base + ".json")
143
+ end
136
144
 
137
145
  ##
138
146
  # Add result type +:distances+ for _multi_ datasets at +base+.
data/lib/miga/project.rb CHANGED
@@ -30,6 +30,7 @@ class MiGA::Project < MiGA::MiGA
30
30
  10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
31
31
  10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential
32
32
  10.clades/04.phylogeny/02.core 10.clades/05.metadata
33
+ 90.stats
33
34
  ]
34
35
 
35
36
  ##
@@ -45,10 +46,11 @@ class MiGA::Project < MiGA::MiGA
45
46
  clade_finding: "10.clades/01.find",
46
47
  # Clade analysis
47
48
  subclades: "10.clades/02.ani",
48
- ogs: "10.clades/03.ogs",
49
- ess_phylogeny: "10.clades/04.phylogeny/01.essential",
50
- core_phylogeny: "10.clades/04.phylogeny/02.core",
51
- clade_metadata: "10.clades/05.metadata"
49
+ ogs: "10.clades/03.ogs"
50
+ #ess_phylogeny: "10.clades/04.phylogeny/01.essential",
51
+ #core_phylogeny: "10.clades/04.phylogeny/02.core",
52
+ #clade_metadata: "10.clades/05.metadata"
53
+ #project_stats: "90.stats"
52
54
  }
53
55
 
54
56
  ##
data/lib/miga/result.rb CHANGED
@@ -62,6 +62,10 @@ class MiGA::Result < MiGA::MiGA
62
62
  # Entry with symbol +k+.
63
63
  def [](k) data[k.to_sym] ; end
64
64
 
65
+ ##
66
+ # Adds value +v+ to entry with symbol +k+.
67
+ def []=(k,v) data[k.to_sym]=v ; end
68
+
65
69
  ##
66
70
  # Register +file+ (path relative to #dir) with the symbol +k+. If the file
67
71
  # doesn't exist but the .gz extension does, the gzipped file is registered
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.2, 1, 6]
13
+ VERSION = [0.2, 2, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2016, 04, 20)
21
+ VERSION_DATE = Date.new(2016, 8, 31)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
@@ -0,0 +1,22 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
+ set -e
4
+ echo "MiGA: $MIGA"
5
+ echo "Project: $PROJECT"
6
+ source "$MIGA/scripts/miga.bash" || exit 1
7
+ DIR="$PROJECT/data/90.stats"
8
+ [[ -d "$DIR" ]] || mkdir -p "$DIR"
9
+ cd "$DIR"
10
+
11
+ # Initialize
12
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
13
+
14
+ # Calculate statistics
15
+ for i in raw_reads trimmed_fasta assembly cds ; do
16
+ echo "# $i"
17
+ miga result_stats --compute-and-save -P "$PROJECT" -D "$DATASET" -r $i
18
+ done
19
+
20
+ # Finalize
21
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
22
+ miga add_result -P "$PROJECT" -D "$DATASET" -r stats
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1.6
4
+ version: 0.2.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-20 00:00:00.000000000 Z
11
+ date: 2016-08-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
@@ -130,6 +130,7 @@ files:
130
130
  - scripts/ogs.bash
131
131
  - scripts/read_quality.bash
132
132
  - scripts/ssu.bash
133
+ - scripts/stats.bash
133
134
  - scripts/subclades.bash
134
135
  - scripts/trimmed_fasta.bash
135
136
  - scripts/trimmed_reads.bash
@@ -154,6 +155,7 @@ files:
154
155
  - actions/list_datasets.rb
155
156
  - actions/list_files.rb
156
157
  - actions/project_info.rb
158
+ - actions/result_stats.rb
157
159
  - actions/tax_distributions.rb
158
160
  - actions/unlink_dataset.rb
159
161
  - Gemfile