miga-base 0.2.1.6 → 0.2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ea45f64b95746bcaf6c00a6638edccd66e3873f0
4
- data.tar.gz: 83dbe2f728b6affd6122a760736fcd554c491cb8
3
+ metadata.gz: dd90983aa2e7a7a6653dc1f0456c6801d8cb3baf
4
+ data.tar.gz: f80a07fad789a82ee7b55ed040c050cecba5b9ca
5
5
  SHA512:
6
- metadata.gz: 3abc1e0dae50dbd035ca17bc6f3b8e40cea74bd400762f0bce282153f6a5e67f7fc855e6167766700380858d90f38726df5923f68a0fc6104358e0e22a749597
7
- data.tar.gz: cb537593f85427b412d8a112ca16748eaf09f35317ff81d5a0f7aa760365d2133c977a05579227fbf7c1f17882596806b79ae8c7df9818fe9947d061185cafaa
6
+ metadata.gz: 883eaef98d137a956ecbd28bdff15a7c657d84dc1c1dc2f296bd70107924332731f23215507d8d5f60382ebf92cd0a8a6ba2ff746e9d49a1f849615772d05546
7
+ data.tar.gz: 5124f28088c29a3fc06cac810b6c4e0d2cb702fd6de109b97c35464560b6a3ada73df769a73d87c0594b0d1e8e5400735cd4d11084b8f89b5c959adc0cc3ef07
@@ -6,13 +6,7 @@
6
6
  o = {q:true}
7
7
  opts = OptionParser.new do |opt|
8
8
  opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset_opt])
10
- opt.on("-r", "--result STRING",
11
- "(Mandatory) Name of the result to add.",
12
- "Recognized names for dataset-specific results include:",
13
- *MiGA::Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"},
14
- "Recognized names for project-wide results include:",
15
- *MiGA::Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}){ |v| o[:name]=v }
9
+ opt_object(opt, o, [:project, :dataset_opt, :result])
16
10
  opt_common(opt, o)
17
11
  end.parse!
18
12
 
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {q:true}
7
+ opts = OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_object(opt, o, [:project, :dataset_opt, :result])
10
+ opt.on("--compute-and-save",
11
+ "Computes and saves the statistics."){ |v| o[:compute] = v }
12
+ opt_common(opt, o)
13
+ end.parse!
14
+
15
+ ##=> Main <=
16
+ opts.parse!
17
+ opt_require(o, project:"-P", name:"-r")
18
+
19
+ $stderr.puts "Loading project." unless o[:q]
20
+ p = MiGA::Project.load(o[:project])
21
+ raise "Impossible to load project: #{o[:project]}" if p.nil?
22
+
23
+ $stderr.puts "Loading result." unless o[:q]
24
+ if o[:dataset].nil?
25
+ r = p.add_result(o[:name], false)
26
+ else
27
+ d = p.dataset(o[:dataset])
28
+ r = d.add_result(o[:name], false)
29
+ end
30
+ raise "Cannot load result." if r.nil?
31
+
32
+ if o[:compute]
33
+ $stderr.puts "Computing statistics." unless o[:q]
34
+ stats = {}
35
+ case o[:name]
36
+ when :raw_reads
37
+ scr = "awk 'NR%4==2{L+=length($0)} END{print NR/4, L*4/NR}'"
38
+ if r[:files][:pair1].nil?
39
+ s = `#{scr} '#{r.file_path :single}'`.chomp.split(" ")
40
+ stats = {reads: s[0].to_i, average_length: [s[1].to_f, "bp"]}
41
+ else
42
+ s1 = `#{scr} '#{r.file_path :pair1}'`.chomp.split(" ")
43
+ s2 = `#{scr} '#{r.file_path :pair2}'`.chomp.split(" ")
44
+ stats = {read_pairs: s1[0].to_i,
45
+ average_length_forward: [s1[1].to_f, "bp"],
46
+ average_length_reverse: [s2[1].to_f, "bp"]}
47
+ end
48
+ when :trimmed_fasta
49
+ scr = "awk '{L+=$2} END{print NR, L/NR}'"
50
+ f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
51
+ s = `FastA.length.pl '#{f}' | #{scr}`.chomp.split(" ")
52
+ stats = {reads: s[0].to_i, average_length: [s[1].to_f, "bp"]}
53
+ when :assembly
54
+ f = r.file_path :largecontigs
55
+ s = `FastA.N50.pl '#{f}'`.chomp.split("\n").map{|i| i.gsub(/.*: /,'').to_i}
56
+ stats = {contigs: s[1], n50: [s[0], "bp"], total_length: [s[2], "bp"]}
57
+ when :cds
58
+ scr = "awk '{L+=$2} END{print NR, L/NR}'"
59
+ f = r.file_path :proteins
60
+ s = `FastA.length.pl '#{f}' | #{scr}`.chomp.split(" ")
61
+ stats = {predicted_proteins: s[0].to_i, average_length: [s[1].to_f, "aa"]}
62
+ else
63
+ stats = nil
64
+ end
65
+ unless stats.nil?
66
+ r[:stats] = stats
67
+ r.save
68
+ end
69
+ end
70
+
71
+ r[:stats].each do |k,v|
72
+ puts "#{k.to_s.unmiga_name.capitalize}: #{v.is_a?(Array) ? v.join(" ") : v}."
73
+ end
74
+
75
+ $stderr.puts "Done." unless o[:q]
data/bin/miga CHANGED
@@ -11,23 +11,29 @@ require "miga"
11
11
  ##=> Global variables <=
12
12
 
13
13
  $task_desc = {
14
- add_result: "Registers a result.",
15
- add_taxonomy: "Registers taxonomic information for datasets.",
16
- create_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
14
+ # Projects
17
15
  create_project: "Creates an empty MiGA project.",
18
- daemon: "Controls the daemon of a MiGA project.",
19
- date: "Returns the current date in standard MiGA format.",
16
+ project_info: "Displays information about a MiGA project.",
17
+ # Datasets
18
+ create_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
20
19
  download_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
20
+ unlink_dataset: "Removes a dataset from an MiGA project.",
21
21
  find_datasets: "Finds unregistered datasets based on result files.",
22
22
  import_datasets: "Link datasets (including results) from one project to "+
23
23
  "another.",
24
- index_taxonomy: "Creates a taxonomy-indexed list of the datasets.",
25
24
  list_datasets: "Lists all registered datasets in an MiGA project.",
25
+ # Results
26
+ add_result: "Registers a result.",
27
+ result_stats: "Extracts statistics for the given result.",
26
28
  list_files: "Lists all registered files from the results of a dataset or a "+
27
29
  "project.",
28
- project_info: "Displays information about a MiGA project.",
30
+ # System
31
+ daemon: "Controls the daemon of a MiGA project.",
32
+ date: "Returns the current date in standard MiGA format.",
33
+ # Taxonomy
34
+ add_taxonomy: "Registers taxonomic information for datasets.",
35
+ index_taxonomy: "Creates a taxonomy-indexed list of the datasets.",
29
36
  tax_distributions: "Estimates distributions of distance by taxonomy.",
30
- unlink_dataset: "Removes a dataset from an MiGA project."
31
37
  }
32
38
 
33
39
  ##=> Functions <=
@@ -58,6 +64,13 @@ def opt_object(opt, o, what=[:project, :dataset])
58
64
  "Type of dataset. Recognized types include:",
59
65
  *MiGA::Project.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}"}
60
66
  ){ |v| o[:type]=v.to_sym } if what.include? :project_type
67
+ opt.on("-r", "--result STRING",
68
+ "(Mandatory) Name of the result to add.",
69
+ "Recognized names for dataset-specific results include:",
70
+ *MiGA::Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"},
71
+ "Recognized names for project-wide results include:",
72
+ *MiGA::Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}
73
+ ){ |v| o[:name]=v.downcase.to_sym } if what.include? :result
61
74
  end
62
75
 
63
76
  # OptParse flags common to all actions.
data/lib/miga/dataset.rb CHANGED
@@ -30,7 +30,9 @@ class MiGA::Dataset < MiGA::MiGA
30
30
  mapping_on_contigs: "08.mapping/01.read-ctg",
31
31
  mapping_on_genes: "08.mapping/02.read-gene",
32
32
  # Distances (for single-species datasets)
33
- distances: "09.distances"
33
+ distances: "09.distances",
34
+ # General statistics
35
+ stats: "90.stats"
34
36
  }
35
37
 
36
38
  ##
@@ -59,8 +59,10 @@ module MiGA::DatasetResult
59
59
  def add_result_assembly(base)
60
60
  return nil unless result_files_exist?(base, ".LargeContigs.fna")
61
61
  r = MiGA::Result.new(base + ".json")
62
- add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
62
+ r = add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
63
63
  :allcontigs=>".AllContigs.fna"})
64
+ add_result(:trimmed_fasta) #-> Post interposing
65
+ r
64
66
  end
65
67
 
66
68
  ##
@@ -133,6 +135,12 @@ module MiGA::DatasetResult
133
135
  add_result_distances_multi(base)
134
136
  end
135
137
  end
138
+
139
+ ##
140
+ # Add result type +:stats+ at +base+.
141
+ def add_result_stats(base)
142
+ MiGA::Result.new(base + ".json")
143
+ end
136
144
 
137
145
  ##
138
146
  # Add result type +:distances+ for _multi_ datasets at +base+.
data/lib/miga/project.rb CHANGED
@@ -30,6 +30,7 @@ class MiGA::Project < MiGA::MiGA
30
30
  10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
31
31
  10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential
32
32
  10.clades/04.phylogeny/02.core 10.clades/05.metadata
33
+ 90.stats
33
34
  ]
34
35
 
35
36
  ##
@@ -45,10 +46,11 @@ class MiGA::Project < MiGA::MiGA
45
46
  clade_finding: "10.clades/01.find",
46
47
  # Clade analysis
47
48
  subclades: "10.clades/02.ani",
48
- ogs: "10.clades/03.ogs",
49
- ess_phylogeny: "10.clades/04.phylogeny/01.essential",
50
- core_phylogeny: "10.clades/04.phylogeny/02.core",
51
- clade_metadata: "10.clades/05.metadata"
49
+ ogs: "10.clades/03.ogs"
50
+ #ess_phylogeny: "10.clades/04.phylogeny/01.essential",
51
+ #core_phylogeny: "10.clades/04.phylogeny/02.core",
52
+ #clade_metadata: "10.clades/05.metadata"
53
+ #project_stats: "90.stats"
52
54
  }
53
55
 
54
56
  ##
data/lib/miga/result.rb CHANGED
@@ -62,6 +62,10 @@ class MiGA::Result < MiGA::MiGA
62
62
  # Entry with symbol +k+.
63
63
  def [](k) data[k.to_sym] ; end
64
64
 
65
+ ##
66
+ # Adds value +v+ to entry with symbol +k+.
67
+ def []=(k,v) data[k.to_sym]=v ; end
68
+
65
69
  ##
66
70
  # Register +file+ (path relative to #dir) with the symbol +k+. If the file
67
71
  # doesn't exist but the .gz extension does, the gzipped file is registered
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.2, 1, 6]
13
+ VERSION = [0.2, 2, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2016, 04, 20)
21
+ VERSION_DATE = Date.new(2016, 8, 31)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
@@ -0,0 +1,22 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
+ set -e
4
+ echo "MiGA: $MIGA"
5
+ echo "Project: $PROJECT"
6
+ source "$MIGA/scripts/miga.bash" || exit 1
7
+ DIR="$PROJECT/data/90.stats"
8
+ [[ -d "$DIR" ]] || mkdir -p "$DIR"
9
+ cd "$DIR"
10
+
11
+ # Initialize
12
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
13
+
14
+ # Calculate statistics
15
+ for i in raw_reads trimmed_fasta assembly cds ; do
16
+ echo "# $i"
17
+ miga result_stats --compute-and-save -P "$PROJECT" -D "$DATASET" -r $i
18
+ done
19
+
20
+ # Finalize
21
+ date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
22
+ miga add_result -P "$PROJECT" -D "$DATASET" -r stats
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1.6
4
+ version: 0.2.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-20 00:00:00.000000000 Z
11
+ date: 2016-08-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
@@ -130,6 +130,7 @@ files:
130
130
  - scripts/ogs.bash
131
131
  - scripts/read_quality.bash
132
132
  - scripts/ssu.bash
133
+ - scripts/stats.bash
133
134
  - scripts/subclades.bash
134
135
  - scripts/trimmed_fasta.bash
135
136
  - scripts/trimmed_reads.bash
@@ -154,6 +155,7 @@ files:
154
155
  - actions/list_datasets.rb
155
156
  - actions/list_files.rb
156
157
  - actions/project_info.rb
158
+ - actions/result_stats.rb
157
159
  - actions/tax_distributions.rb
158
160
  - actions/unlink_dataset.rb
159
161
  - Gemfile