miga-base 0.3.1.7 → 0.3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/actions/ncbi_get.rb +8 -0
- data/lib/miga/common.rb +9 -215
- data/lib/miga/common/base.rb +49 -0
- data/lib/miga/common/format.rb +135 -0
- data/lib/miga/common/path.rb +49 -0
- data/lib/miga/daemon.rb +3 -60
- data/lib/miga/daemon/base.rb +69 -0
- data/lib/miga/dataset.rb +3 -3
- data/lib/miga/dataset/result.rb +5 -5
- data/lib/miga/result.rb +5 -0
- data/lib/miga/version.rb +7 -5
- data/scripts/distances.bash +2 -19
- data/scripts/taxonomy.bash +2 -21
- data/test/common_test.rb +9 -0
- data/utils/distance/base.rb +6 -0
- data/utils/distance/commands.rb +82 -0
- data/utils/distance/database.rb +86 -0
- data/utils/distance/pipeline.rb +98 -0
- data/utils/distance/runner.rb +104 -0
- data/utils/distance/temporal.rb +37 -0
- data/utils/distances.rb +9 -0
- data/utils/enveomics/Docs/recplot2.md +233 -0
- data/utils/enveomics/Makefile +1 -1
- data/utils/enveomics/Manifest/Tasks/blasttab.json +66 -0
- data/utils/enveomics/Manifest/Tasks/fasta.json +10 -3
- data/utils/enveomics/Manifest/Tasks/fastq.json +4 -4
- data/utils/enveomics/Manifest/Tasks/mapping.json +38 -1
- data/utils/enveomics/Manifest/categories.json +11 -1
- data/utils/enveomics/Manifest/examples.json +2 -2
- data/utils/enveomics/README.md +2 -0
- data/utils/enveomics/Scripts/Aln.cat.rb +1 -0
- data/utils/enveomics/Scripts/BedGraph.tad.rb +52 -30
- data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +7 -2
- data/utils/enveomics/Scripts/FastA.interpose.pl +26 -20
- data/utils/enveomics/Scripts/FastQ.interpose.pl +20 -20
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
- data/utils/enveomics/Scripts/SRA.download.bash +28 -21
- data/utils/enveomics/Scripts/Table.barplot.R +1 -0
- data/utils/enveomics/Scripts/aai.rb +4 -2
- data/utils/enveomics/build_enveomics_r.bash +5 -5
- data/utils/enveomics/enveomics.R/DESCRIPTION +1 -1
- data/utils/enveomics/enveomics.R/NAMESPACE +6 -2
- data/utils/enveomics/enveomics.R/R/recplot2.R +471 -71
- data/utils/enveomics/enveomics.R/README.md +26 -17
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -1
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +6 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +32 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +12 -7
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +8 -37
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +20 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +20 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +29 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +42 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +33 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +28 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +56 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +3 -1
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +22 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +20 -14
- data/utils/requirements.txt +1 -1
- metadata +28 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeak.Rd +0 -40
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeaks.Rd +0 -18
data/utils/enveomics/Makefile
CHANGED
@@ -623,6 +623,7 @@
|
|
623
623
|
{ "r_package": "optparse" },
|
624
624
|
{ "r_package": "enveomics.R" }
|
625
625
|
],
|
626
|
+
"see_also": [ "RecPlot2.compareIdentities.R" ],
|
626
627
|
"options": [
|
627
628
|
{
|
628
629
|
"opt": "--prefix",
|
@@ -644,6 +645,12 @@
|
|
644
645
|
"default": 300,
|
645
646
|
"description": ["Breaks in the identity histogram."]
|
646
647
|
},
|
648
|
+
{
|
649
|
+
"opt": "--id-free-range",
|
650
|
+
"description": ["Indicates that the range should be freely set from",
|
651
|
+
"the observed values. Otherwise, 70-100% is included in the",
|
652
|
+
"identity histogram (default)."]
|
653
|
+
},
|
647
654
|
{
|
648
655
|
"opt": "--id-metric",
|
649
656
|
"arg": "select",
|
@@ -684,6 +691,13 @@
|
|
684
691
|
"description": ["Color of peaks, mandatory for peak-finding (e.g.,",
|
685
692
|
"darkred)."]
|
686
693
|
},
|
694
|
+
{
|
695
|
+
"opt": "--peaks-method",
|
696
|
+
"arg": "select",
|
697
|
+
"values": ["emauto","em","mower"],
|
698
|
+
"default": "emauto",
|
699
|
+
"description": "Method to detect peaks."
|
700
|
+
},
|
687
701
|
{
|
688
702
|
"name": "R Object Output",
|
689
703
|
"arg": "out_file",
|
@@ -707,6 +721,58 @@
|
|
707
721
|
"description": "Height of the plot in inches (7 by default)."
|
708
722
|
}
|
709
723
|
]
|
724
|
+
},
|
725
|
+
{
|
726
|
+
"task": "RecPlot2.compareIdentities.R",
|
727
|
+
"description": ["Calculates the difference between identity",
|
728
|
+
"distributions of two recruitment plots."],
|
729
|
+
"help_arg": "--help",
|
730
|
+
"requires": [
|
731
|
+
{ "r_package": "optparse" },
|
732
|
+
{ "r_package": "enveomics.R" }
|
733
|
+
],
|
734
|
+
"see_also": [ "BlastTab.recplot2.R" ],
|
735
|
+
"options": [
|
736
|
+
{
|
737
|
+
"opt": "--method",
|
738
|
+
"arg": "string",
|
739
|
+
"default": "hellinger",
|
740
|
+
"description": ["Distance method to use. This should be (an",
|
741
|
+
"unambiguous abbreviation of) one of:",
|
742
|
+
"'hellinger' (Hellinger, 1090, doi:10.1515/crll.1909.136.210),",
|
743
|
+
"'bhattacharyya' (Bhattacharyya, 1943, Bull. Calcutta Math. Soc.",
|
744
|
+
"35), 'kl' or 'kullback–leibler' (Kullback & Leibler, 1951,",
|
745
|
+
"doi:10.1214/aoms/1177729694), or 'euclidean'."]
|
746
|
+
},
|
747
|
+
{
|
748
|
+
"opt": "--pseudocounts",
|
749
|
+
"arg": "float",
|
750
|
+
"default": 0.0,
|
751
|
+
"description": ["Smoothing parameter for Laplace smoothing. Use 0",
|
752
|
+
"for no smoothing, or 1 for add-one smoothing."]
|
753
|
+
},
|
754
|
+
{
|
755
|
+
"opt": "--max-deviation",
|
756
|
+
"arg": "float",
|
757
|
+
"description": ["Maximum mean deviation between identity breaks",
|
758
|
+
"tolerated (as percent identity). Difference in number of",
|
759
|
+
"id.breaks is never tolerated."]
|
760
|
+
},
|
761
|
+
{
|
762
|
+
"arg": "in_file",
|
763
|
+
"name": "RecPlot A",
|
764
|
+
"mandatory": true,
|
765
|
+
"description": ["File containing the R object `rp` for the first",
|
766
|
+
"recruitment plot."]
|
767
|
+
},
|
768
|
+
{
|
769
|
+
"arg": "in_file",
|
770
|
+
"name": "RecPlot B",
|
771
|
+
"mandatory": true,
|
772
|
+
"description": ["File containing the R object `rp` for the second",
|
773
|
+
"recruitment plot."]
|
774
|
+
}
|
775
|
+
]
|
710
776
|
}
|
711
777
|
]
|
712
778
|
}
|
@@ -158,12 +158,19 @@
|
|
158
158
|
"warn": ["Please note that this script will check for the consistency of",
|
159
159
|
"the names (assuming a pair of related reads contains the same name",
|
160
160
|
"varying only in a trailing slash (/) followed by a digit. If you want",
|
161
|
-
"to turn this feature off just set the
|
161
|
+
"to turn this feature off just set the checking period to zero. If",
|
162
162
|
"you want to decrease the sampling period (to speed the script up) or",
|
163
|
-
"increase it (to make it more sensitive to errors) just change
|
164
|
-
"accordingly."],
|
163
|
+
"increase it (to make it more sensitive to errors) just change the ",
|
164
|
+
"checking period accordingly."],
|
165
165
|
"help_arg": "",
|
166
166
|
"options": [
|
167
|
+
{
|
168
|
+
"name": "Checking period",
|
169
|
+
"opt": "-T",
|
170
|
+
"arg": "integer",
|
171
|
+
"default": 1000,
|
172
|
+
"description": "Sampling period for names evaluation."
|
173
|
+
},
|
167
174
|
{
|
168
175
|
"arg": "out_file",
|
169
176
|
"mandatory": true,
|
@@ -45,10 +45,10 @@
|
|
45
45
|
"warn": ["Note that this script will check for the consistency of the",
|
46
46
|
"names (assuming a pair of related reads contains the same name",
|
47
47
|
"varying only in a trailing slash (/) followed by a digit. If you want",
|
48
|
-
"to turn this feature off just set the
|
49
|
-
"to decrease the sampling period (to speed the script up) or
|
50
|
-
"it (to make it more sensitive to errors) just change
|
51
|
-
"accordingly."],
|
48
|
+
"to turn this feature off just set the checking period to zero. If you",
|
49
|
+
"want to decrease the sampling period (to speed the script up) or",
|
50
|
+
"increase it (to make it more sensitive to errors) just change the",
|
51
|
+
"checking period accordingly."],
|
52
52
|
"see_also": ["FastQ.split.pl","FastA.interpose.pl"],
|
53
53
|
"help_arg": "",
|
54
54
|
"options": [
|
@@ -8,7 +8,8 @@
|
|
8
8
|
"missing from the file. If you produce your BedGraph file with",
|
9
9
|
"bedtools genomecov and want to consider zero-coverage position, be",
|
10
10
|
"sure to use -bga (not -bg)."],
|
11
|
-
"see_also": ["
|
11
|
+
"see_also": ["BedGraph.window.rb",
|
12
|
+
"BlastTab.seqdepth.pl", "BlastTab.seqdepth_ZIP.pl"],
|
12
13
|
"help_arg": "--help",
|
13
14
|
"options": [
|
14
15
|
{
|
@@ -23,6 +24,42 @@
|
|
23
24
|
"default": 0.5,
|
24
25
|
"description": ["Central range to consider, between 0 and 1. By",
|
25
26
|
"default: inter-quartile range (0.5)."]
|
27
|
+
},
|
28
|
+
{
|
29
|
+
"opt": "--per-seq",
|
30
|
+
"description": ["Calculate averages per reference sequence, not",
|
31
|
+
"total. Assumes a sorted BedGraph file."]
|
32
|
+
},
|
33
|
+
{
|
34
|
+
"opt": "--length",
|
35
|
+
"description": "Add sequence length to the output."
|
36
|
+
}
|
37
|
+
]
|
38
|
+
},
|
39
|
+
{
|
40
|
+
"task": "BedGraph.window.rb",
|
41
|
+
"description": ["Estimates the sequencing depth per windows from a",
|
42
|
+
"BedGraph file."],
|
43
|
+
"warn": ["This script doesn't consider zero-coverage positions if",
|
44
|
+
"missing from the file. If you produce your BedGraph file with",
|
45
|
+
"bedtools genomecov and want to consider zero-coverage position, be",
|
46
|
+
"sure to use -bga (not -bg)."],
|
47
|
+
"see_also": ["BedGraph.tad.rb",
|
48
|
+
"BlastTab.seqdepth.pl", "BlastTab.seqdepth_ZIP.pl"],
|
49
|
+
"help_arg": "--help",
|
50
|
+
"options": [
|
51
|
+
{
|
52
|
+
"opt": "--input",
|
53
|
+
"arg": "in_file",
|
54
|
+
"mandatory": true,
|
55
|
+
"description": "Input BedGraph file."
|
56
|
+
},
|
57
|
+
{
|
58
|
+
"name": "Window size",
|
59
|
+
"opt": "--win",
|
60
|
+
"arg": "float",
|
61
|
+
"default": 1000,
|
62
|
+
"description": "Window size, in base pairs."
|
26
63
|
}
|
27
64
|
]
|
28
65
|
}
|
@@ -3,6 +3,7 @@
|
|
3
3
|
"Sequence similarity search": {
|
4
4
|
"Statistics": [
|
5
5
|
"BedGraph.tad.rb",
|
6
|
+
"BedGraph.window.rb",
|
6
7
|
"BlastPairwise.AAsubs.pl",
|
7
8
|
"BlastTab.advance.bash",
|
8
9
|
"BlastTab.recplot2.R",
|
@@ -10,7 +11,8 @@
|
|
10
11
|
"BlastTab.seqdepth_nomedian.pl",
|
11
12
|
"BlastTab.seqdepth_ZIP.pl",
|
12
13
|
"BlastTab.sumPerHit.pl",
|
13
|
-
"FastQ.test-error.rb"
|
14
|
+
"FastQ.test-error.rb",
|
15
|
+
"RecPlot2.compareIdentities.R"
|
14
16
|
],
|
15
17
|
"Manipulation": [
|
16
18
|
"BlastTab.addlen.rb",
|
@@ -134,6 +136,14 @@
|
|
134
136
|
"Clustering": [
|
135
137
|
"ogs.mcl.rb",
|
136
138
|
"clust.rand.rb"
|
139
|
+
],
|
140
|
+
"Read recruitments": [
|
141
|
+
"BedGraph.tad.rb",
|
142
|
+
"BedGraph.window.rb",
|
143
|
+
"BlastTab.catsbj.pl",
|
144
|
+
"BlastTab.pairedHits.rb",
|
145
|
+
"BlastTab.recplot2.R",
|
146
|
+
"RecPlot2.compareIdentities.R"
|
137
147
|
]
|
138
148
|
}
|
139
149
|
}
|
@@ -56,8 +56,8 @@
|
|
56
56
|
"task": "BlastTab.recplot2.R",
|
57
57
|
"description": ["Generates recruitment plots for a comparison",
|
58
58
|
"between a virome containing HIV and the HIV-1 genome."],
|
59
|
-
"values": ["hiv_mix-hiv1.blast.tsv",50,100,null,null,null,null,null,
|
60
|
-
"hiv_mix-hiv1.Rdata","hiv_mix-hiv1.pdf",null,null]
|
59
|
+
"values": ["hiv_mix-hiv1.blast.tsv",50,100,null,null,null,null,null,null,
|
60
|
+
null,null,"hiv_mix-hiv1.Rdata","hiv_mix-hiv1.pdf",null,null]
|
61
61
|
},
|
62
62
|
{
|
63
63
|
"_": "== Examples of functional annotations ==",
|
data/utils/enveomics/README.md
CHANGED
@@ -29,6 +29,8 @@ http://www.perlfoundation.org/artistic_license_2_0.
|
|
29
29
|
Most scripts in this repository are self-documented. However,
|
30
30
|
more extensive documentation (and some discussion) can be found at the
|
31
31
|
[documentation website](http://enve-omics.ce.gatech.edu/enveomics/docs).
|
32
|
+
Additional documentation for recruitment plots can be found
|
33
|
+
[here](Docs/recplot2.md).
|
32
34
|
|
33
35
|
## Citation
|
34
36
|
|
@@ -129,6 +129,7 @@ begin
|
|
129
129
|
puts ">#{key}", a[key].join("").gsub(/(.{1,60})/, "\\1\n")
|
130
130
|
a.delete(key)
|
131
131
|
end
|
132
|
+
$stderr.puts " #{lengths.inject(:+)} columns." unless o[:q]
|
132
133
|
unless o[:coords].nil?
|
133
134
|
$stderr.puts "Generating coordinates." unless o[:q]
|
134
135
|
coords = File.open(o[:coords],"w")
|
@@ -1,9 +1,9 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
3
|
+
require 'optparse'
|
4
4
|
|
5
|
-
o = {range:0.5}
|
6
|
-
ARGV <<
|
5
|
+
o = {range: 0.5, perseq: false, length: false}
|
6
|
+
ARGV << '-h' if ARGV.empty?
|
7
7
|
OptionParser.new do |opt|
|
8
8
|
opt.banner = "
|
9
9
|
Estimates the truncated average sequencing depth (TAD) from a BedGraph file.
|
@@ -13,20 +13,26 @@ OptionParser.new do |opt|
|
|
13
13
|
want to consider zero-coverage position, be sure to use -bga (not -bg).
|
14
14
|
|
15
15
|
Usage: #{$0} [options]"
|
16
|
-
opt.separator
|
17
|
-
opt.on(
|
18
|
-
|
19
|
-
opt.on(
|
20
|
-
|
16
|
+
opt.separator ''
|
17
|
+
opt.on('-i', '--input PATH',
|
18
|
+
'Input BedGraph file (mandatory).'){ |v| o[:i]=v }
|
19
|
+
opt.on('-r', '--range FLOAT',
|
20
|
+
'Central range to consider, between 0 and 1.',
|
21
21
|
"By default: #{o[:range]} (inter-quartile range)."
|
22
22
|
){ |v| o[:range]=v.to_f }
|
23
|
-
opt.on(
|
23
|
+
opt.on('-s', '--per-seq',
|
24
|
+
'Calculate averages per reference sequence, not total.',
|
25
|
+
'Assumes a sorted BedGraph file.'
|
26
|
+
){ |v| o[:perseq] = v }
|
27
|
+
opt.on('-l', '--length',
|
28
|
+
'Add sequence length to the output.'){ |v| o[:length] = v }
|
29
|
+
opt.on('-h', '--help', 'Display this screen.') do
|
24
30
|
puts opt
|
25
31
|
exit
|
26
32
|
end
|
27
|
-
opt.separator
|
33
|
+
opt.separator ''
|
28
34
|
end.parse!
|
29
|
-
abort
|
35
|
+
abort '-i is mandatory.' if o[:i].nil?
|
30
36
|
|
31
37
|
def pad(d, idx, r)
|
32
38
|
idx.each do |i|
|
@@ -39,33 +45,49 @@ def pad(d, idx, r)
|
|
39
45
|
d
|
40
46
|
end
|
41
47
|
|
48
|
+
def report(sq, d, ln, o)
|
49
|
+
# Estimate padding ranges
|
50
|
+
pad = (1.0-o[:range])/2.0
|
51
|
+
r = (pad*ln).round
|
52
|
+
|
53
|
+
# Pad
|
54
|
+
d = pad(d, d.each_index.to_a, r+0)
|
55
|
+
d = pad(d, d.each_index.to_a.reverse, r+0)
|
56
|
+
|
57
|
+
# Average
|
58
|
+
y = [0.0]
|
59
|
+
unless d.compact.empty?
|
60
|
+
s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
|
61
|
+
y[0] = s.to_f/d.compact.inject(:+)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Report
|
65
|
+
y.unshift(sq) if o[:perseq]
|
66
|
+
y << ln if o[:length]
|
67
|
+
puts y.join("\t")
|
68
|
+
end
|
69
|
+
|
42
70
|
# Read BedGraph
|
43
|
-
d
|
71
|
+
d = []
|
44
72
|
ln = 0
|
73
|
+
pre_sq = nil
|
45
74
|
File.open(o[:i], "r") do |ifh|
|
46
75
|
ifh.each_line do |i|
|
47
76
|
next if i =~ /^#/
|
48
|
-
r
|
77
|
+
r = i.chomp.split("\t")
|
78
|
+
sq = r.shift
|
79
|
+
if o[:perseq] and !pre_sq.nil? and pre_sq!=sq
|
80
|
+
report(pre_sq, d, ln, o)
|
81
|
+
d = []
|
82
|
+
ln = 0
|
83
|
+
end
|
84
|
+
r.map! { |j| j.to_i }
|
49
85
|
l = r[1]-r[0]
|
50
86
|
d[ r[2] ] ||= 0
|
51
|
-
d[ r[2] ]
|
87
|
+
d[ r[2] ] += l
|
52
88
|
ln += l
|
89
|
+
pre_sq = sq
|
53
90
|
end
|
54
91
|
end
|
55
|
-
|
56
|
-
# Estimate padding ranges
|
57
|
-
pad = (1.0-o[:range])/2.0
|
58
|
-
r = (pad*ln).round
|
59
|
-
|
60
|
-
# Pad
|
61
|
-
d = pad(d, d.each_index.to_a, r+0)
|
62
|
-
d = pad(d, d.each_index.to_a.reverse, r+0)
|
63
|
-
|
64
|
-
# Average
|
65
|
-
if d.compact.empty?
|
66
|
-
p 0.0
|
67
|
-
else
|
68
|
-
s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
|
69
|
-
p s.to_f/d.compact.inject(:+)
|
70
|
-
end
|
92
|
+
report(pre_sq, d, ln, o)
|
71
93
|
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
o = {win: 1000}
|
6
|
+
ARGV << '-h' if ARGV.empty?
|
7
|
+
OptionParser.new do |opt|
|
8
|
+
opt.banner = "
|
9
|
+
Estimates the sequencing depth per windows from a BedGraph file.
|
10
|
+
|
11
|
+
IMPORTANT: This script doesn't consider zero-coverage positions if missing
|
12
|
+
from the file. If you produce your BedGraph file with bedtools genomecov and
|
13
|
+
want to consider zero-coverage position, be sure to use -bga (not -bg).
|
14
|
+
|
15
|
+
Usage: #{$0} [options]"
|
16
|
+
opt.separator ''
|
17
|
+
opt.on('-i', '--input PATH',
|
18
|
+
'Input BedGraph file (mandatory).'){ |v| o[:i]=v }
|
19
|
+
opt.on('-w', '--win INT',
|
20
|
+
'Window size, in base pairs.', "By default: #{o[:win]}."
|
21
|
+
){ |v| o[:win]=v.to_i }
|
22
|
+
opt.on('-h', '--help', 'Display this screen.') do
|
23
|
+
puts opt
|
24
|
+
exit
|
25
|
+
end
|
26
|
+
opt.separator ''
|
27
|
+
end.parse!
|
28
|
+
abort '-i is mandatory.' if o[:i].nil?
|
29
|
+
|
30
|
+
def report(d, a, b, seqs)
|
31
|
+
# Average
|
32
|
+
y = 0.0
|
33
|
+
unless d.compact.empty?
|
34
|
+
s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
|
35
|
+
y = s.to_f/d.compact.inject(:+)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Report
|
39
|
+
puts [a, b, y, seqs.keys.join(",")].join("\t")
|
40
|
+
end
|
41
|
+
|
42
|
+
# Read BedGraph
|
43
|
+
d = []
|
44
|
+
ln = 0
|
45
|
+
a = 1
|
46
|
+
seqs = {}
|
47
|
+
b = o[:win]
|
48
|
+
File.open(o[:i], "r") do |ifh|
|
49
|
+
ifh.each_line do |i|
|
50
|
+
next if i =~ /^#/
|
51
|
+
r = i.chomp.split("\t")
|
52
|
+
sq = r.shift
|
53
|
+
seqs[sq] = 1
|
54
|
+
r.map!{ |j| j.to_i }
|
55
|
+
l = r[1]-r[0]
|
56
|
+
d[ r[2] ] ||= 0
|
57
|
+
d[ r[2] ] += l
|
58
|
+
ln += l
|
59
|
+
while ln >= b
|
60
|
+
d[ r[2] ] -= (ln-b)
|
61
|
+
report(d, a, b, seqs)
|
62
|
+
seqs = {}
|
63
|
+
seqs[ sq ] = 1 if ln > b
|
64
|
+
d = []
|
65
|
+
d[ r[2] ] = (ln-b)
|
66
|
+
a = b + 1
|
67
|
+
b = a + o[:win] - 1
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
@@ -19,11 +19,12 @@ opt <- enve.cliopts(enve.recplot2,
|
|
19
19
|
o_desc=list(pos.breaks="Breaks in the positions histogram.",
|
20
20
|
id.breaks="Breaks in the identity histogram.",
|
21
21
|
id.summary="Function summarizing the identity bins. By default: sum.",
|
22
|
-
peaks.col="Color of peaks, mandatory for peak-finding (e.g., darkred)."
|
22
|
+
peaks.col="Color of peaks, mandatory for peak-finding (e.g., darkred).",
|
23
|
+
peaks.method="Method to detect peaks; one of emauto, em, or mower."),
|
23
24
|
p_desc=paste("","Produce recruitment plot objects provided that",
|
24
25
|
"BlastTab.catsbj.pl has been previously executed.", sep="\n\t"),
|
25
26
|
ignore=c("plot"),
|
26
|
-
defaults=c(id.metric="identity", peaks.col=NA))
|
27
|
+
defaults=c(id.metric="identity", peaks.col=NA, peaks.method="emauto"))
|
27
28
|
|
28
29
|
#= Run it!
|
29
30
|
if(length(opt$args)>1){
|
@@ -35,6 +36,10 @@ if(length(opt$args)>1){
|
|
35
36
|
}
|
36
37
|
pc <- opt$options[["peaks.col"]]
|
37
38
|
if(!is.na(pc) && pc=="NA") opt$options[["peaks.col"]] <- NA
|
39
|
+
if(!is.null(opt$options[["peaks.method"]])){
|
40
|
+
opt$options[["peaks.opts"]] <- list(method=opt$options[["peaks.method"]])
|
41
|
+
opt$options[["peaks.method"]] <- NULL
|
42
|
+
}
|
38
43
|
rp <- do.call("enve.recplot2", opt$options)
|
39
44
|
save(rp, file=opt$args[1])
|
40
45
|
if(length(opt$args)>1) dev.off()
|