miga-base 0.3.1.7 → 0.3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/actions/ncbi_get.rb +8 -0
  3. data/lib/miga/common.rb +9 -215
  4. data/lib/miga/common/base.rb +49 -0
  5. data/lib/miga/common/format.rb +135 -0
  6. data/lib/miga/common/path.rb +49 -0
  7. data/lib/miga/daemon.rb +3 -60
  8. data/lib/miga/daemon/base.rb +69 -0
  9. data/lib/miga/dataset.rb +3 -3
  10. data/lib/miga/dataset/result.rb +5 -5
  11. data/lib/miga/result.rb +5 -0
  12. data/lib/miga/version.rb +7 -5
  13. data/scripts/distances.bash +2 -19
  14. data/scripts/taxonomy.bash +2 -21
  15. data/test/common_test.rb +9 -0
  16. data/utils/distance/base.rb +6 -0
  17. data/utils/distance/commands.rb +82 -0
  18. data/utils/distance/database.rb +86 -0
  19. data/utils/distance/pipeline.rb +98 -0
  20. data/utils/distance/runner.rb +104 -0
  21. data/utils/distance/temporal.rb +37 -0
  22. data/utils/distances.rb +9 -0
  23. data/utils/enveomics/Docs/recplot2.md +233 -0
  24. data/utils/enveomics/Makefile +1 -1
  25. data/utils/enveomics/Manifest/Tasks/blasttab.json +66 -0
  26. data/utils/enveomics/Manifest/Tasks/fasta.json +10 -3
  27. data/utils/enveomics/Manifest/Tasks/fastq.json +4 -4
  28. data/utils/enveomics/Manifest/Tasks/mapping.json +38 -1
  29. data/utils/enveomics/Manifest/categories.json +11 -1
  30. data/utils/enveomics/Manifest/examples.json +2 -2
  31. data/utils/enveomics/README.md +2 -0
  32. data/utils/enveomics/Scripts/Aln.cat.rb +1 -0
  33. data/utils/enveomics/Scripts/BedGraph.tad.rb +52 -30
  34. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  35. data/utils/enveomics/Scripts/BlastTab.recplot2.R +7 -2
  36. data/utils/enveomics/Scripts/FastA.interpose.pl +26 -20
  37. data/utils/enveomics/Scripts/FastQ.interpose.pl +20 -20
  38. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  39. data/utils/enveomics/Scripts/SRA.download.bash +28 -21
  40. data/utils/enveomics/Scripts/Table.barplot.R +1 -0
  41. data/utils/enveomics/Scripts/aai.rb +4 -2
  42. data/utils/enveomics/build_enveomics_r.bash +5 -5
  43. data/utils/enveomics/enveomics.R/DESCRIPTION +1 -1
  44. data/utils/enveomics/enveomics.R/NAMESPACE +6 -2
  45. data/utils/enveomics/enveomics.R/R/recplot2.R +471 -71
  46. data/utils/enveomics/enveomics.R/README.md +26 -17
  47. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -1
  48. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +23 -0
  49. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +6 -3
  50. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +32 -0
  51. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +24 -0
  52. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +12 -7
  53. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +8 -37
  54. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +20 -0
  55. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +20 -0
  56. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +29 -0
  57. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +42 -0
  58. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +18 -0
  59. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +33 -0
  60. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +28 -0
  61. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +56 -0
  62. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +3 -1
  63. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +22 -0
  64. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +20 -14
  65. data/utils/requirements.txt +1 -1
  66. metadata +28 -4
  67. data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeak.Rd +0 -40
  68. data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeaks.Rd +0 -18
@@ -0,0 +1,49 @@
1
+
2
+ module MiGA::Common::Path
3
+
4
+ ##
5
+ # Root path to MiGA (as estimated from the location of the current file).
6
+ def root_path
7
+ File.expand_path('../../../..', __FILE__)
8
+ end
9
+
10
+ ##
11
+ # Path to a script to be executed for +task+. Supported +opts+ are:
12
+ # - +:miga+ Path to the MiGA home to use. If not passed, the home of the
13
+ # library is used).
14
+ # - +:project+ MiGA::Project object to check within plugins. If not passed,
15
+ # only core scripts are supported.
16
+ def script_path(task, opts={})
17
+ opts[:miga] ||= root_path
18
+ unless opts[:project].nil?
19
+ opts[:project].plugins.each do |pl|
20
+ if File.exist? File.expand_path("scripts/#{task}.bash", pl)
21
+ opts[:miga] = pl
22
+ end
23
+ end
24
+ end
25
+ File.expand_path("scripts/#{task}.bash", opts[:miga])
26
+ end
27
+
28
+ end
29
+
30
+ ##
31
+ # MiGA extensions to the File class.
32
+ class File
33
+
34
+ ##
35
+ # Method to transfer a file from +old_name+ to +new_name+, using a +method+
36
+ # that can be one of :symlink for File#symlink, :hardlink for File#link, or
37
+ # :copy for FileUtils#cp_r.
38
+ def self.generic_transfer(old_name, new_name, method)
39
+ return nil if exist? new_name
40
+ if(method==:copy)
41
+ FileUtils.cp_r(old_name, new_name)
42
+ else
43
+ method=:link if method==:hardlink
44
+ File.send(method, old_name, new_name)
45
+ end
46
+ end
47
+
48
+ end
49
+
@@ -2,13 +2,14 @@
2
2
  # @license Artistic-2.0
3
3
 
4
4
  require 'miga/project'
5
- require 'daemons'
6
- require 'date'
5
+ require 'miga/daemon/base'
7
6
 
8
7
  ##
9
8
  # MiGA Daemons handling job submissions.
10
9
  class MiGA::Daemon < MiGA::MiGA
11
10
 
11
+ include MiGA::Daemon::Base
12
+
12
13
  ##
13
14
  # When was the last time a daemon for the MiGA::Project +project+ was seen
14
15
  # active? Returns DateTime.
@@ -61,64 +62,6 @@ class MiGA::Daemon < MiGA::MiGA
61
62
  multiple: false, log_output: true }
62
63
  end
63
64
 
64
- ##
65
- # Set/get #options, where +k+ is the Symbol of the option and +v+ is the value
66
- # (or nil to use as getter). Skips consistency tests if +force+. Returns new
67
- # value.
68
- def runopts(k, v=nil, force=false)
69
- k = k.to_sym
70
- unless v.nil?
71
- if [:latency, :maxjobs, :ppn].include?(k)
72
- v = v.to_i
73
- elsif [:shutdown_when_done].include?(k)
74
- v = !!v
75
- end
76
- raise "Daemon's #{k} cannot be set to zero." if !force and v==0
77
- @runopts[k] = v
78
- end
79
- if k==:kill and v.nil?
80
- case @runopts[:type].to_s
81
- when 'bash' then return "kill -9 '%s'"
82
- when 'qsub' then return "qdel '%s'"
83
- else return "canceljob '%s'"
84
- end
85
- end
86
- @runopts[k]
87
- end
88
-
89
- ##
90
- # Returns Integer indicating the number of seconds to sleep between checks.
91
- def latency() runopts(:latency); end
92
-
93
- ##
94
- # Returns Integer indicating the maximum number of concurrent jobs to run.
95
- def maxjobs() runopts(:maxjobs); end
96
-
97
- ##
98
- # Returns Integer indicating the number of CPUs per job.
99
- def ppn() runopts(:ppn); end
100
-
101
- ##
102
- # Returns Boolean indicating if the daemon should shutdown when processing is
103
- # complete.
104
- def shutdown_when_done?() !!runopts(:shutdown_when_done); end
105
-
106
- ##
107
- # Initializes the daemon with +opts+.
108
- def start(opts=[]) daemon('start', opts); end
109
-
110
- ##
111
- # Stops the daemon with +opts+.
112
- def stop(opts=[]) daemon('stop', opts); end
113
-
114
- ##
115
- # Restarts the daemon with +opts+.
116
- def restart(opts=[]) daemon('restart', opts); end
117
-
118
- ##
119
- # Returns the status of the daemon with +opts+.
120
- def status(opts=[]) daemon('status', opts); end
121
-
122
65
  ##
123
66
  # Launches the +task+ with options +opts+ (as command-line arguments).
124
67
  # Supported tasks include: start, stop, restart, status.
@@ -0,0 +1,69 @@
1
+
2
+ require 'daemons'
3
+ require 'date'
4
+
5
+ class MiGA::Daemon < MiGA::MiGA
6
+ end
7
+
8
+ module MiGA::Daemon::Base
9
+
10
+ ##
11
+ # Set/get #options, where +k+ is the Symbol of the option and +v+ is the value
12
+ # (or nil to use as getter). Skips consistency tests if +force+. Returns new
13
+ # value.
14
+ def runopts(k, v=nil, force=false)
15
+ k = k.to_sym
16
+ unless v.nil?
17
+ if [:latency, :maxjobs, :ppn].include?(k)
18
+ v = v.to_i
19
+ elsif [:shutdown_when_done].include?(k)
20
+ v = !!v
21
+ end
22
+ raise "Daemon's #{k} cannot be set to zero." if !force and v==0
23
+ @runopts[k] = v
24
+ end
25
+ if k==:kill and v.nil?
26
+ case @runopts[:type].to_s
27
+ when 'bash' then return "kill -9 '%s'"
28
+ when 'qsub' then return "qdel '%s'"
29
+ else return "canceljob '%s'"
30
+ end
31
+ end
32
+ @runopts[k]
33
+ end
34
+
35
+ ##
36
+ # Returns Integer indicating the number of seconds to sleep between checks.
37
+ def latency() runopts(:latency); end
38
+
39
+ ##
40
+ # Returns Integer indicating the maximum number of concurrent jobs to run.
41
+ def maxjobs() runopts(:maxjobs); end
42
+
43
+ ##
44
+ # Returns Integer indicating the number of CPUs per job.
45
+ def ppn() runopts(:ppn); end
46
+
47
+ ##
48
+ # Returns Boolean indicating if the daemon should shutdown when processing is
49
+ # complete.
50
+ def shutdown_when_done?() !!runopts(:shutdown_when_done); end
51
+
52
+ ##
53
+ # Initializes the daemon with +opts+.
54
+ def start(opts=[]) daemon('start', opts); end
55
+
56
+ ##
57
+ # Stops the daemon with +opts+.
58
+ def stop(opts=[]) daemon('stop', opts); end
59
+
60
+ ##
61
+ # Restarts the daemon with +opts+.
62
+ def restart(opts=[]) daemon('restart', opts); end
63
+
64
+ ##
65
+ # Returns the status of the daemon with +opts+.
66
+ def status(opts=[]) daemon('status', opts); end
67
+
68
+ end
69
+
@@ -1,9 +1,9 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require "miga/metadata"
5
- require "miga/dataset/result"
6
- require "sqlite3"
4
+ require 'miga/metadata'
5
+ require 'miga/dataset/result'
6
+ require 'sqlite3'
7
7
 
8
8
  ##
9
9
  # Dataset representation in MiGA.
@@ -1,7 +1,7 @@
1
1
 
2
- require "sqlite3"
3
- require "miga/result"
4
- require "miga/dataset/base"
2
+ require 'sqlite3'
3
+ require 'miga/result'
4
+ require 'miga/dataset/base'
5
5
 
6
6
  ##
7
7
  # Helper module including specific functions to add dataset results.
@@ -35,8 +35,8 @@ module MiGA::Dataset::Result
35
35
  # returns a result if the expected files are complete. The +opts+ hash
36
36
  # controls result creation (if necessary). Supported values include:
37
37
  # - +is_clean+: A Boolean indicating if the input files are clean.
38
- # - +force+: A Boolean indicating if the result must be re-indexed. If true, it
39
- # implies save=true.
38
+ # - +force+: A Boolean indicating if the result must be re-indexed. If true,
39
+ # it implies save=true.
40
40
  # Returns MiGA::Result or nil.
41
41
  def add_result(result_type, save=true, opts={})
42
42
  dir = @@RESULT_DIRS[result_type]
@@ -117,6 +117,11 @@ class MiGA::Result < MiGA::MiGA
117
117
  # Save the result persistently (in the JSON file #path).
118
118
  def save
119
119
  @data[:updated] = Time.now.to_s
120
+ s = path(:start)
121
+ if File.exist? s
122
+ @data[:started] = File.read(s).chomp
123
+ File.unlink s
124
+ end
120
125
  json = JSON.pretty_generate data
121
126
  ofh = File.open(path, "w")
122
127
  ofh.puts json
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 1, 7]
13
+ VERSION = [0.3, 2, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -22,15 +22,17 @@ module MiGA
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
25
- CITATION = "Rodriguez-R et al, in preparation. Microbial Genomes Atlas: " +
26
- "Standardizing genomic and metagenomic analyses for Archaea and Bacteria."
25
+ CITATION = "Rodriguez-R et al (2018). " +
26
+ "The Microbial Genomes Atlas (MiGA) webserver: taxonomic and gene " +
27
+ "diversity analysis of Archaea and Bacteria at the whole genome level. " +
28
+ "Nucleic Acids Research, gky467. DOI: 10.1093/nar/gky467."
27
29
 
28
30
  end
29
31
 
30
32
  class MiGA::MiGA
31
-
33
+
32
34
  include MiGA
33
-
35
+
34
36
  ##
35
37
  # Major.minor version as Float.
36
38
  def self.VERSION ; VERSION[0] ; end
@@ -10,26 +10,9 @@ cd "$PROJECT/data/09.distances"
10
10
 
11
11
  # Initialize
12
12
  miga date > "$DATASET.start"
13
- TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
14
- trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
15
13
 
16
- # Check type of dataset
17
- NOMULTI=$(miga ls -P "$PROJECT" -D "$DATASET" --no-multi \
18
- | wc -l | awk '{print $1}')
19
- REF=$(miga ls -P "$PROJECT" -D "$DATASET" --ref \
20
- | wc -l | awk '{print $1}')
21
-
22
- # Call submodules
23
- # shellcheck source=scripts/_distances_functions.bash
24
- source "$MIGA/scripts/_distances_functions.bash"
25
- if [[ "$NOMULTI" -eq "1" && "$REF" -eq "1" ]] ; then
26
- # shellcheck source=scripts/_distances_ref_nomulti.bash
27
- source "$MIGA/scripts/_distances_ref_nomulti.bash"
28
- elif [[ "$NOMULTI" -eq "1" ]] ; then
29
- S_PROJ=$PROJECT
30
- # shellcheck source=scripts/_distances_noref_nomulti.bash
31
- source "$MIGA/scripts/_distances_noref_nomulti.bash"
32
- fi
14
+ # Run
15
+ ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"
33
16
 
34
17
  # Finalize
35
18
  rm -R "$TMPDIR"
@@ -13,27 +13,8 @@ cd "$DIR"
13
13
  # Initialize
14
14
  miga date > "$DATASET.start"
15
15
 
16
- # Check if there is a reference project
17
- S_PROJ=$(miga about -P "$PROJECT" -m ref_project)
18
-
19
- if [[ "$S_PROJ" != "?" ]] ; then
20
-
21
- # Check type of dataset
22
- NOMULTI=$(miga ls -P "$PROJECT" -D "$DATASET" --no-multi \
23
- | wc -l | awk '{print $1}')
24
-
25
- if [[ "$NOMULTI" -eq "1" ]] ; then
26
- # Call submodules
27
- TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
28
- trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
29
- # shellcheck source=scripts/_distances_functions.bash
30
- source "$MIGA/scripts/_distances_functions.bash"
31
- # shellcheck source=scripts/_distances_noref_nomulti.bash
32
- source "$MIGA/scripts/_distances_noref_nomulti.bash"
33
- rm -R "$TMPDIR"
34
- fi
35
-
36
- fi
16
+ # Run
17
+ ruby "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET" run_taxonomy=1
37
18
 
38
19
  # Finalize
39
20
  miga date > "$DATASET.done"
@@ -79,4 +79,13 @@ class CommonTest < Test::Unit::TestCase
79
79
  assert_equal("678 90", tab[3])
80
80
  end
81
81
 
82
+ def test_miga_name
83
+ assert_equal('Xa sp. C', 'Xa_sp__C'.unmiga_name)
84
+ assert_equal('X_______', 'X^*.!{}!'.miga_name)
85
+ assert_equal('aB09', 'aB09'.miga_name)
86
+ assert('R2D2'.miga_name?)
87
+ assert(!'C3-PO'.miga_name?)
88
+ assert_equal("123\n1\n", '1231'.wrap_width(3))
89
+ end
90
+
82
91
  end
@@ -0,0 +1,6 @@
1
+
2
+ require 'miga'
3
+ require 'miga/tax_dist'
4
+
5
+ class MiGA::DistanceRunner
6
+ end
@@ -0,0 +1,82 @@
1
+
2
+ module MiGA::DistanceRunner::Commands
3
+ # Estimates or calculates AAI against +target+
4
+ def aai(target)
5
+ # Check if the request makes sense
6
+ return nil if target.nil? or target.result(:essential_genes).nil?
7
+ # Check if it's been calculated
8
+ y = stored_value(target, :aai)
9
+ return y unless y.nil? or y.zero?
10
+ # Try hAAI (except in clade projects)
11
+ unless @ref_project.is_clade?
12
+ y = haai(target)
13
+ return y unless y.nil? or y.zero?
14
+ end
15
+ # Full AAI
16
+ aai_cmd(
17
+ tmp_file("proteins.fa"), target.result(:cds).file_path(:proteins),
18
+ dataset.name, target.name, tmp_dbs[:aai]).tap{ checkpoint :aai }
19
+ end
20
+
21
+ # Estimates AAI against +target+ using hAAI
22
+ def haai(target)
23
+ haai = aai_cmd(tmp_file("ess_genes.fa"),
24
+ target.result(:essential_genes).file_path(:ess_genes),
25
+ dataset.name, target.name, tmp_dbs[:haai],
26
+ aai_save_rbm: "no-save-rbm", aai_p: opts[:haai_p])
27
+ checkpoint :haai
28
+ return nil if haai.nil? or haai.zero? or haai > 90.0
29
+ aai = 100.0 - Math.exp(2.435076 + 0.4275193*Math.log(100.0-haai))
30
+ SQLite3::Database.new(tmp_dbs[:aai]) do |conn|
31
+ conn.execute "insert into aai values(?, ?, ?, 0, 0, 0)",
32
+ [dataset.name, target.name, aai]
33
+ end
34
+ aai
35
+ end
36
+
37
+ # Calculates ANI against +target+
38
+ def ani(target)
39
+ # Check if the request makes sense
40
+ t = tmp_file("largecontigs.fa")
41
+ r = target.result(:assembly)
42
+ return nil if r.nil? or !File.size?(t)
43
+ # Check if it's been calculated
44
+ y = stored_value(target, :ani)
45
+ return y unless y.nil? or y.zero?
46
+ # Run it
47
+ ani_cmd(
48
+ t, r.file_path(:largecontigs),
49
+ dataset.name, target.name, tmp_dbs[:ani]).tap{ checkpoint :ani }
50
+ end
51
+
52
+ # Execute an AAI command
53
+ def aai_cmd(f1, f2, n1, n2, db, o={})
54
+ o = opts.merge(o)
55
+ v = `aai.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
56
+ --name1 "#{n1}" --name2 "#{n2}" \
57
+ -t "#{o[:thr]}" -a --lookup-first "--#{o[:aai_save_rbm]}" \
58
+ -p "#{o[:aai_p] || "blast+"}"`.chomp
59
+ (v.nil? or v.empty?) ? 0 : v.to_f
60
+ end
61
+
62
+ # Execute an ANI command
63
+ def ani_cmd(f1, f2, n1, n2, db, o={})
64
+ o = opts.merge(o)
65
+ v = nil
66
+ if o[:ani_p] == 'fastani'
67
+ out = `fastANI -r "#{f1}" -q "#{f2}" \
68
+ -o /dev/stdout 2>/dev/null`.chomp.split(/\s+/)
69
+ SQLite3::Database.new(db) do |conn|
70
+ conn.execute "insert into ani values(?, ?, ?, 0, ?, ?)",
71
+ [n1, n2, out[2], out[3], out[4]]
72
+ end
73
+ v = out[2]
74
+ else
75
+ v = `ani.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
76
+ --name1 "#{n1}" --name2 "#{n2}" \
77
+ -t "#{opts[:thr]}" -a --no-save-regions --no-save-rbm \
78
+ --lookup-first -p "#{o[:ani_p] || "blast+"}"`.chomp
79
+ end
80
+ v.nil? or v.empty? ? 0 : v.to_f
81
+ end
82
+ end