miga-base 0.3.1.7 → 0.3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/actions/ncbi_get.rb +8 -0
  3. data/lib/miga/common.rb +9 -215
  4. data/lib/miga/common/base.rb +49 -0
  5. data/lib/miga/common/format.rb +135 -0
  6. data/lib/miga/common/path.rb +49 -0
  7. data/lib/miga/daemon.rb +3 -60
  8. data/lib/miga/daemon/base.rb +69 -0
  9. data/lib/miga/dataset.rb +3 -3
  10. data/lib/miga/dataset/result.rb +5 -5
  11. data/lib/miga/result.rb +5 -0
  12. data/lib/miga/version.rb +7 -5
  13. data/scripts/distances.bash +2 -19
  14. data/scripts/taxonomy.bash +2 -21
  15. data/test/common_test.rb +9 -0
  16. data/utils/distance/base.rb +6 -0
  17. data/utils/distance/commands.rb +82 -0
  18. data/utils/distance/database.rb +86 -0
  19. data/utils/distance/pipeline.rb +98 -0
  20. data/utils/distance/runner.rb +104 -0
  21. data/utils/distance/temporal.rb +37 -0
  22. data/utils/distances.rb +9 -0
  23. data/utils/enveomics/Docs/recplot2.md +233 -0
  24. data/utils/enveomics/Makefile +1 -1
  25. data/utils/enveomics/Manifest/Tasks/blasttab.json +66 -0
  26. data/utils/enveomics/Manifest/Tasks/fasta.json +10 -3
  27. data/utils/enveomics/Manifest/Tasks/fastq.json +4 -4
  28. data/utils/enveomics/Manifest/Tasks/mapping.json +38 -1
  29. data/utils/enveomics/Manifest/categories.json +11 -1
  30. data/utils/enveomics/Manifest/examples.json +2 -2
  31. data/utils/enveomics/README.md +2 -0
  32. data/utils/enveomics/Scripts/Aln.cat.rb +1 -0
  33. data/utils/enveomics/Scripts/BedGraph.tad.rb +52 -30
  34. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  35. data/utils/enveomics/Scripts/BlastTab.recplot2.R +7 -2
  36. data/utils/enveomics/Scripts/FastA.interpose.pl +26 -20
  37. data/utils/enveomics/Scripts/FastQ.interpose.pl +20 -20
  38. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  39. data/utils/enveomics/Scripts/SRA.download.bash +28 -21
  40. data/utils/enveomics/Scripts/Table.barplot.R +1 -0
  41. data/utils/enveomics/Scripts/aai.rb +4 -2
  42. data/utils/enveomics/build_enveomics_r.bash +5 -5
  43. data/utils/enveomics/enveomics.R/DESCRIPTION +1 -1
  44. data/utils/enveomics/enveomics.R/NAMESPACE +6 -2
  45. data/utils/enveomics/enveomics.R/R/recplot2.R +471 -71
  46. data/utils/enveomics/enveomics.R/README.md +26 -17
  47. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -1
  48. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +23 -0
  49. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +6 -3
  50. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +32 -0
  51. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +24 -0
  52. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +12 -7
  53. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +8 -37
  54. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +20 -0
  55. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +20 -0
  56. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +29 -0
  57. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +42 -0
  58. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +18 -0
  59. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +33 -0
  60. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +28 -0
  61. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +56 -0
  62. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +3 -1
  63. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +22 -0
  64. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +20 -14
  65. data/utils/requirements.txt +1 -1
  66. metadata +28 -4
  67. data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeak.Rd +0 -40
  68. data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeaks.Rd +0 -18
@@ -0,0 +1,49 @@
1
+
2
+ module MiGA::Common::Path
3
+
4
+ ##
5
+ # Root path to MiGA (as estimated from the location of the current file).
6
+ def root_path
7
+ File.expand_path('../../../..', __FILE__)
8
+ end
9
+
10
+ ##
11
+ # Path to a script to be executed for +task+. Supported +opts+ are:
12
+ # - +:miga+ Path to the MiGA home to use. If not passed, the home of the
13
+ # library is used).
14
+ # - +:project+ MiGA::Project object to check within plugins. If not passed,
15
+ # only core scripts are supported.
16
+ def script_path(task, opts={})
17
+ opts[:miga] ||= root_path
18
+ unless opts[:project].nil?
19
+ opts[:project].plugins.each do |pl|
20
+ if File.exist? File.expand_path("scripts/#{task}.bash", pl)
21
+ opts[:miga] = pl
22
+ end
23
+ end
24
+ end
25
+ File.expand_path("scripts/#{task}.bash", opts[:miga])
26
+ end
27
+
28
+ end
29
+
30
+ ##
31
+ # MiGA extensions to the File class.
32
+ class File
33
+
34
+ ##
35
+ # Method to transfer a file from +old_name+ to +new_name+, using a +method+
36
+ # that can be one of :symlink for File#symlink, :hardlink for File#link, or
37
+ # :copy for FileUtils#cp_r.
38
+ def self.generic_transfer(old_name, new_name, method)
39
+ return nil if exist? new_name
40
+ if(method==:copy)
41
+ FileUtils.cp_r(old_name, new_name)
42
+ else
43
+ method=:link if method==:hardlink
44
+ File.send(method, old_name, new_name)
45
+ end
46
+ end
47
+
48
+ end
49
+
@@ -2,13 +2,14 @@
2
2
  # @license Artistic-2.0
3
3
 
4
4
  require 'miga/project'
5
- require 'daemons'
6
- require 'date'
5
+ require 'miga/daemon/base'
7
6
 
8
7
  ##
9
8
  # MiGA Daemons handling job submissions.
10
9
  class MiGA::Daemon < MiGA::MiGA
11
10
 
11
+ include MiGA::Daemon::Base
12
+
12
13
  ##
13
14
  # When was the last time a daemon for the MiGA::Project +project+ was seen
14
15
  # active? Returns DateTime.
@@ -61,64 +62,6 @@ class MiGA::Daemon < MiGA::MiGA
61
62
  multiple: false, log_output: true }
62
63
  end
63
64
 
64
- ##
65
- # Set/get #options, where +k+ is the Symbol of the option and +v+ is the value
66
- # (or nil to use as getter). Skips consistency tests if +force+. Returns new
67
- # value.
68
- def runopts(k, v=nil, force=false)
69
- k = k.to_sym
70
- unless v.nil?
71
- if [:latency, :maxjobs, :ppn].include?(k)
72
- v = v.to_i
73
- elsif [:shutdown_when_done].include?(k)
74
- v = !!v
75
- end
76
- raise "Daemon's #{k} cannot be set to zero." if !force and v==0
77
- @runopts[k] = v
78
- end
79
- if k==:kill and v.nil?
80
- case @runopts[:type].to_s
81
- when 'bash' then return "kill -9 '%s'"
82
- when 'qsub' then return "qdel '%s'"
83
- else return "canceljob '%s'"
84
- end
85
- end
86
- @runopts[k]
87
- end
88
-
89
- ##
90
- # Returns Integer indicating the number of seconds to sleep between checks.
91
- def latency() runopts(:latency); end
92
-
93
- ##
94
- # Returns Integer indicating the maximum number of concurrent jobs to run.
95
- def maxjobs() runopts(:maxjobs); end
96
-
97
- ##
98
- # Returns Integer indicating the number of CPUs per job.
99
- def ppn() runopts(:ppn); end
100
-
101
- ##
102
- # Returns Boolean indicating if the daemon should shutdown when processing is
103
- # complete.
104
- def shutdown_when_done?() !!runopts(:shutdown_when_done); end
105
-
106
- ##
107
- # Initializes the daemon with +opts+.
108
- def start(opts=[]) daemon('start', opts); end
109
-
110
- ##
111
- # Stops the daemon with +opts+.
112
- def stop(opts=[]) daemon('stop', opts); end
113
-
114
- ##
115
- # Restarts the daemon with +opts+.
116
- def restart(opts=[]) daemon('restart', opts); end
117
-
118
- ##
119
- # Returns the status of the daemon with +opts+.
120
- def status(opts=[]) daemon('status', opts); end
121
-
122
65
  ##
123
66
  # Launches the +task+ with options +opts+ (as command-line arguments).
124
67
  # Supported tasks include: start, stop, restart, status.
@@ -0,0 +1,69 @@
1
+
2
+ require 'daemons'
3
+ require 'date'
4
+
5
+ class MiGA::Daemon < MiGA::MiGA
6
+ end
7
+
8
+ module MiGA::Daemon::Base
9
+
10
+ ##
11
+ # Set/get #options, where +k+ is the Symbol of the option and +v+ is the value
12
+ # (or nil to use as getter). Skips consistency tests if +force+. Returns new
13
+ # value.
14
+ def runopts(k, v=nil, force=false)
15
+ k = k.to_sym
16
+ unless v.nil?
17
+ if [:latency, :maxjobs, :ppn].include?(k)
18
+ v = v.to_i
19
+ elsif [:shutdown_when_done].include?(k)
20
+ v = !!v
21
+ end
22
+ raise "Daemon's #{k} cannot be set to zero." if !force and v==0
23
+ @runopts[k] = v
24
+ end
25
+ if k==:kill and v.nil?
26
+ case @runopts[:type].to_s
27
+ when 'bash' then return "kill -9 '%s'"
28
+ when 'qsub' then return "qdel '%s'"
29
+ else return "canceljob '%s'"
30
+ end
31
+ end
32
+ @runopts[k]
33
+ end
34
+
35
+ ##
36
+ # Returns Integer indicating the number of seconds to sleep between checks.
37
+ def latency() runopts(:latency); end
38
+
39
+ ##
40
+ # Returns Integer indicating the maximum number of concurrent jobs to run.
41
+ def maxjobs() runopts(:maxjobs); end
42
+
43
+ ##
44
+ # Returns Integer indicating the number of CPUs per job.
45
+ def ppn() runopts(:ppn); end
46
+
47
+ ##
48
+ # Returns Boolean indicating if the daemon should shutdown when processing is
49
+ # complete.
50
+ def shutdown_when_done?() !!runopts(:shutdown_when_done); end
51
+
52
+ ##
53
+ # Initializes the daemon with +opts+.
54
+ def start(opts=[]) daemon('start', opts); end
55
+
56
+ ##
57
+ # Stops the daemon with +opts+.
58
+ def stop(opts=[]) daemon('stop', opts); end
59
+
60
+ ##
61
+ # Restarts the daemon with +opts+.
62
+ def restart(opts=[]) daemon('restart', opts); end
63
+
64
+ ##
65
+ # Returns the status of the daemon with +opts+.
66
+ def status(opts=[]) daemon('status', opts); end
67
+
68
+ end
69
+
@@ -1,9 +1,9 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require "miga/metadata"
5
- require "miga/dataset/result"
6
- require "sqlite3"
4
+ require 'miga/metadata'
5
+ require 'miga/dataset/result'
6
+ require 'sqlite3'
7
7
 
8
8
  ##
9
9
  # Dataset representation in MiGA.
@@ -1,7 +1,7 @@
1
1
 
2
- require "sqlite3"
3
- require "miga/result"
4
- require "miga/dataset/base"
2
+ require 'sqlite3'
3
+ require 'miga/result'
4
+ require 'miga/dataset/base'
5
5
 
6
6
  ##
7
7
  # Helper module including specific functions to add dataset results.
@@ -35,8 +35,8 @@ module MiGA::Dataset::Result
35
35
  # returns a result if the expected files are complete. The +opts+ hash
36
36
  # controls result creation (if necessary). Supported values include:
37
37
  # - +is_clean+: A Boolean indicating if the input files are clean.
38
- # - +force+: A Boolean indicating if the result must be re-indexed. If true, it
39
- # implies save=true.
38
+ # - +force+: A Boolean indicating if the result must be re-indexed. If true,
39
+ # it implies save=true.
40
40
  # Returns MiGA::Result or nil.
41
41
  def add_result(result_type, save=true, opts={})
42
42
  dir = @@RESULT_DIRS[result_type]
@@ -117,6 +117,11 @@ class MiGA::Result < MiGA::MiGA
117
117
  # Save the result persistently (in the JSON file #path).
118
118
  def save
119
119
  @data[:updated] = Time.now.to_s
120
+ s = path(:start)
121
+ if File.exist? s
122
+ @data[:started] = File.read(s).chomp
123
+ File.unlink s
124
+ end
120
125
  json = JSON.pretty_generate data
121
126
  ofh = File.open(path, "w")
122
127
  ofh.puts json
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 1, 7]
13
+ VERSION = [0.3, 2, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -22,15 +22,17 @@ module MiGA
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
25
- CITATION = "Rodriguez-R et al, in preparation. Microbial Genomes Atlas: " +
26
- "Standardizing genomic and metagenomic analyses for Archaea and Bacteria."
25
+ CITATION = "Rodriguez-R et al (2018). " +
26
+ "The Microbial Genomes Atlas (MiGA) webserver: taxonomic and gene " +
27
+ "diversity analysis of Archaea and Bacteria at the whole genome level. " +
28
+ "Nucleic Acids Research, gky467. DOI: 10.1093/nar/gky467."
27
29
 
28
30
  end
29
31
 
30
32
  class MiGA::MiGA
31
-
33
+
32
34
  include MiGA
33
-
35
+
34
36
  ##
35
37
  # Major.minor version as Float.
36
38
  def self.VERSION ; VERSION[0] ; end
@@ -10,26 +10,9 @@ cd "$PROJECT/data/09.distances"
10
10
 
11
11
  # Initialize
12
12
  miga date > "$DATASET.start"
13
- TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
14
- trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
15
13
 
16
- # Check type of dataset
17
- NOMULTI=$(miga ls -P "$PROJECT" -D "$DATASET" --no-multi \
18
- | wc -l | awk '{print $1}')
19
- REF=$(miga ls -P "$PROJECT" -D "$DATASET" --ref \
20
- | wc -l | awk '{print $1}')
21
-
22
- # Call submodules
23
- # shellcheck source=scripts/_distances_functions.bash
24
- source "$MIGA/scripts/_distances_functions.bash"
25
- if [[ "$NOMULTI" -eq "1" && "$REF" -eq "1" ]] ; then
26
- # shellcheck source=scripts/_distances_ref_nomulti.bash
27
- source "$MIGA/scripts/_distances_ref_nomulti.bash"
28
- elif [[ "$NOMULTI" -eq "1" ]] ; then
29
- S_PROJ=$PROJECT
30
- # shellcheck source=scripts/_distances_noref_nomulti.bash
31
- source "$MIGA/scripts/_distances_noref_nomulti.bash"
32
- fi
14
+ # Run
15
+ ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"
33
16
 
34
17
  # Finalize
35
18
  rm -R "$TMPDIR"
@@ -13,27 +13,8 @@ cd "$DIR"
13
13
  # Initialize
14
14
  miga date > "$DATASET.start"
15
15
 
16
- # Check if there is a reference project
17
- S_PROJ=$(miga about -P "$PROJECT" -m ref_project)
18
-
19
- if [[ "$S_PROJ" != "?" ]] ; then
20
-
21
- # Check type of dataset
22
- NOMULTI=$(miga ls -P "$PROJECT" -D "$DATASET" --no-multi \
23
- | wc -l | awk '{print $1}')
24
-
25
- if [[ "$NOMULTI" -eq "1" ]] ; then
26
- # Call submodules
27
- TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
28
- trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
29
- # shellcheck source=scripts/_distances_functions.bash
30
- source "$MIGA/scripts/_distances_functions.bash"
31
- # shellcheck source=scripts/_distances_noref_nomulti.bash
32
- source "$MIGA/scripts/_distances_noref_nomulti.bash"
33
- rm -R "$TMPDIR"
34
- fi
35
-
36
- fi
16
+ # Run
17
+ ruby "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET" run_taxonomy=1
37
18
 
38
19
  # Finalize
39
20
  miga date > "$DATASET.done"
@@ -79,4 +79,13 @@ class CommonTest < Test::Unit::TestCase
79
79
  assert_equal("678 90", tab[3])
80
80
  end
81
81
 
82
+ def test_miga_name
83
+ assert_equal('Xa sp. C', 'Xa_sp__C'.unmiga_name)
84
+ assert_equal('X_______', 'X^*.!{}!'.miga_name)
85
+ assert_equal('aB09', 'aB09'.miga_name)
86
+ assert('R2D2'.miga_name?)
87
+ assert(!'C3-PO'.miga_name?)
88
+ assert_equal("123\n1\n", '1231'.wrap_width(3))
89
+ end
90
+
82
91
  end
@@ -0,0 +1,6 @@
1
+
2
+ require 'miga'
3
+ require 'miga/tax_dist'
4
+
5
+ class MiGA::DistanceRunner
6
+ end
@@ -0,0 +1,82 @@
1
+
2
+ module MiGA::DistanceRunner::Commands
3
+ # Estimates or calculates AAI against +target+
4
+ def aai(target)
5
+ # Check if the request makes sense
6
+ return nil if target.nil? or target.result(:essential_genes).nil?
7
+ # Check if it's been calculated
8
+ y = stored_value(target, :aai)
9
+ return y unless y.nil? or y.zero?
10
+ # Try hAAI (except in clade projects)
11
+ unless @ref_project.is_clade?
12
+ y = haai(target)
13
+ return y unless y.nil? or y.zero?
14
+ end
15
+ # Full AAI
16
+ aai_cmd(
17
+ tmp_file("proteins.fa"), target.result(:cds).file_path(:proteins),
18
+ dataset.name, target.name, tmp_dbs[:aai]).tap{ checkpoint :aai }
19
+ end
20
+
21
+ # Estimates AAI against +target+ using hAAI
22
+ def haai(target)
23
+ haai = aai_cmd(tmp_file("ess_genes.fa"),
24
+ target.result(:essential_genes).file_path(:ess_genes),
25
+ dataset.name, target.name, tmp_dbs[:haai],
26
+ aai_save_rbm: "no-save-rbm", aai_p: opts[:haai_p])
27
+ checkpoint :haai
28
+ return nil if haai.nil? or haai.zero? or haai > 90.0
29
+ aai = 100.0 - Math.exp(2.435076 + 0.4275193*Math.log(100.0-haai))
30
+ SQLite3::Database.new(tmp_dbs[:aai]) do |conn|
31
+ conn.execute "insert into aai values(?, ?, ?, 0, 0, 0)",
32
+ [dataset.name, target.name, aai]
33
+ end
34
+ aai
35
+ end
36
+
37
+ # Calculates ANI against +target+
38
+ def ani(target)
39
+ # Check if the request makes sense
40
+ t = tmp_file("largecontigs.fa")
41
+ r = target.result(:assembly)
42
+ return nil if r.nil? or !File.size?(t)
43
+ # Check if it's been calculated
44
+ y = stored_value(target, :ani)
45
+ return y unless y.nil? or y.zero?
46
+ # Run it
47
+ ani_cmd(
48
+ t, r.file_path(:largecontigs),
49
+ dataset.name, target.name, tmp_dbs[:ani]).tap{ checkpoint :ani }
50
+ end
51
+
52
+ # Execute an AAI command
53
+ def aai_cmd(f1, f2, n1, n2, db, o={})
54
+ o = opts.merge(o)
55
+ v = `aai.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
56
+ --name1 "#{n1}" --name2 "#{n2}" \
57
+ -t "#{o[:thr]}" -a --lookup-first "--#{o[:aai_save_rbm]}" \
58
+ -p "#{o[:aai_p] || "blast+"}"`.chomp
59
+ (v.nil? or v.empty?) ? 0 : v.to_f
60
+ end
61
+
62
+ # Execute an ANI command
63
+ def ani_cmd(f1, f2, n1, n2, db, o={})
64
+ o = opts.merge(o)
65
+ v = nil
66
+ if o[:ani_p] == 'fastani'
67
+ out = `fastANI -r "#{f1}" -q "#{f2}" \
68
+ -o /dev/stdout 2>/dev/null`.chomp.split(/\s+/)
69
+ SQLite3::Database.new(db) do |conn|
70
+ conn.execute "insert into ani values(?, ?, ?, 0, ?, ?)",
71
+ [n1, n2, out[2], out[3], out[4]]
72
+ end
73
+ v = out[2]
74
+ else
75
+ v = `ani.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
76
+ --name1 "#{n1}" --name2 "#{n2}" \
77
+ -t "#{opts[:thr]}" -a --no-save-regions --no-save-rbm \
78
+ --lookup-first -p "#{o[:ani_p] || "blast+"}"`.chomp
79
+ end
80
+ v.nil? or v.empty? ? 0 : v.to_f
81
+ end
82
+ end