miga-base 0.3.1.7 → 0.3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/actions/ncbi_get.rb +8 -0
- data/lib/miga/common.rb +9 -215
- data/lib/miga/common/base.rb +49 -0
- data/lib/miga/common/format.rb +135 -0
- data/lib/miga/common/path.rb +49 -0
- data/lib/miga/daemon.rb +3 -60
- data/lib/miga/daemon/base.rb +69 -0
- data/lib/miga/dataset.rb +3 -3
- data/lib/miga/dataset/result.rb +5 -5
- data/lib/miga/result.rb +5 -0
- data/lib/miga/version.rb +7 -5
- data/scripts/distances.bash +2 -19
- data/scripts/taxonomy.bash +2 -21
- data/test/common_test.rb +9 -0
- data/utils/distance/base.rb +6 -0
- data/utils/distance/commands.rb +82 -0
- data/utils/distance/database.rb +86 -0
- data/utils/distance/pipeline.rb +98 -0
- data/utils/distance/runner.rb +104 -0
- data/utils/distance/temporal.rb +37 -0
- data/utils/distances.rb +9 -0
- data/utils/enveomics/Docs/recplot2.md +233 -0
- data/utils/enveomics/Makefile +1 -1
- data/utils/enveomics/Manifest/Tasks/blasttab.json +66 -0
- data/utils/enveomics/Manifest/Tasks/fasta.json +10 -3
- data/utils/enveomics/Manifest/Tasks/fastq.json +4 -4
- data/utils/enveomics/Manifest/Tasks/mapping.json +38 -1
- data/utils/enveomics/Manifest/categories.json +11 -1
- data/utils/enveomics/Manifest/examples.json +2 -2
- data/utils/enveomics/README.md +2 -0
- data/utils/enveomics/Scripts/Aln.cat.rb +1 -0
- data/utils/enveomics/Scripts/BedGraph.tad.rb +52 -30
- data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +7 -2
- data/utils/enveomics/Scripts/FastA.interpose.pl +26 -20
- data/utils/enveomics/Scripts/FastQ.interpose.pl +20 -20
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
- data/utils/enveomics/Scripts/SRA.download.bash +28 -21
- data/utils/enveomics/Scripts/Table.barplot.R +1 -0
- data/utils/enveomics/Scripts/aai.rb +4 -2
- data/utils/enveomics/build_enveomics_r.bash +5 -5
- data/utils/enveomics/enveomics.R/DESCRIPTION +1 -1
- data/utils/enveomics/enveomics.R/NAMESPACE +6 -2
- data/utils/enveomics/enveomics.R/R/recplot2.R +471 -71
- data/utils/enveomics/enveomics.R/README.md +26 -17
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -1
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +6 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +32 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +12 -7
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +8 -37
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +20 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +20 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +29 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +42 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +33 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +28 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +56 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +3 -1
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +22 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +20 -14
- data/utils/requirements.txt +1 -1
- metadata +28 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeak.Rd +0 -40
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeaks.Rd +0 -18
data/utils/enveomics/Makefile
CHANGED
@@ -623,6 +623,7 @@
|
|
623
623
|
{ "r_package": "optparse" },
|
624
624
|
{ "r_package": "enveomics.R" }
|
625
625
|
],
|
626
|
+
"see_also": [ "RecPlot2.compareIdentities.R" ],
|
626
627
|
"options": [
|
627
628
|
{
|
628
629
|
"opt": "--prefix",
|
@@ -644,6 +645,12 @@
|
|
644
645
|
"default": 300,
|
645
646
|
"description": ["Breaks in the identity histogram."]
|
646
647
|
},
|
648
|
+
{
|
649
|
+
"opt": "--id-free-range",
|
650
|
+
"description": ["Indicates that the range should be freely set from",
|
651
|
+
"the observed values. Otherwise, 70-100% is included in the",
|
652
|
+
"identity histogram (default)."]
|
653
|
+
},
|
647
654
|
{
|
648
655
|
"opt": "--id-metric",
|
649
656
|
"arg": "select",
|
@@ -684,6 +691,13 @@
|
|
684
691
|
"description": ["Color of peaks, mandatory for peak-finding (e.g.,",
|
685
692
|
"darkred)."]
|
686
693
|
},
|
694
|
+
{
|
695
|
+
"opt": "--peaks-method",
|
696
|
+
"arg": "select",
|
697
|
+
"values": ["emauto","em","mower"],
|
698
|
+
"default": "emauto",
|
699
|
+
"description": "Method to detect peaks."
|
700
|
+
},
|
687
701
|
{
|
688
702
|
"name": "R Object Output",
|
689
703
|
"arg": "out_file",
|
@@ -707,6 +721,58 @@
|
|
707
721
|
"description": "Height of the plot in inches (7 by default)."
|
708
722
|
}
|
709
723
|
]
|
724
|
+
},
|
725
|
+
{
|
726
|
+
"task": "RecPlot2.compareIdentities.R",
|
727
|
+
"description": ["Calculates the difference between identity",
|
728
|
+
"distributions of two recruitment plots."],
|
729
|
+
"help_arg": "--help",
|
730
|
+
"requires": [
|
731
|
+
{ "r_package": "optparse" },
|
732
|
+
{ "r_package": "enveomics.R" }
|
733
|
+
],
|
734
|
+
"see_also": [ "BlastTab.recplot2.R" ],
|
735
|
+
"options": [
|
736
|
+
{
|
737
|
+
"opt": "--method",
|
738
|
+
"arg": "string",
|
739
|
+
"default": "hellinger",
|
740
|
+
"description": ["Distance method to use. This should be (an",
|
741
|
+
"unambiguous abbreviation of) one of:",
|
742
|
+
"'hellinger' (Hellinger, 1090, doi:10.1515/crll.1909.136.210),",
|
743
|
+
"'bhattacharyya' (Bhattacharyya, 1943, Bull. Calcutta Math. Soc.",
|
744
|
+
"35), 'kl' or 'kullback–leibler' (Kullback & Leibler, 1951,",
|
745
|
+
"doi:10.1214/aoms/1177729694), or 'euclidean'."]
|
746
|
+
},
|
747
|
+
{
|
748
|
+
"opt": "--pseudocounts",
|
749
|
+
"arg": "float",
|
750
|
+
"default": 0.0,
|
751
|
+
"description": ["Smoothing parameter for Laplace smoothing. Use 0",
|
752
|
+
"for no smoothing, or 1 for add-one smoothing."]
|
753
|
+
},
|
754
|
+
{
|
755
|
+
"opt": "--max-deviation",
|
756
|
+
"arg": "float",
|
757
|
+
"description": ["Maximum mean deviation between identity breaks",
|
758
|
+
"tolerated (as percent identity). Difference in number of",
|
759
|
+
"id.breaks is never tolerated."]
|
760
|
+
},
|
761
|
+
{
|
762
|
+
"arg": "in_file",
|
763
|
+
"name": "RecPlot A",
|
764
|
+
"mandatory": true,
|
765
|
+
"description": ["File containing the R object `rp` for the first",
|
766
|
+
"recruitment plot."]
|
767
|
+
},
|
768
|
+
{
|
769
|
+
"arg": "in_file",
|
770
|
+
"name": "RecPlot B",
|
771
|
+
"mandatory": true,
|
772
|
+
"description": ["File containing the R object `rp` for the second",
|
773
|
+
"recruitment plot."]
|
774
|
+
}
|
775
|
+
]
|
710
776
|
}
|
711
777
|
]
|
712
778
|
}
|
@@ -158,12 +158,19 @@
|
|
158
158
|
"warn": ["Please note that this script will check for the consistency of",
|
159
159
|
"the names (assuming a pair of related reads contains the same name",
|
160
160
|
"varying only in a trailing slash (/) followed by a digit. If you want",
|
161
|
-
"to turn this feature off just set the
|
161
|
+
"to turn this feature off just set the checking period to zero. If",
|
162
162
|
"you want to decrease the sampling period (to speed the script up) or",
|
163
|
-
"increase it (to make it more sensitive to errors) just change
|
164
|
-
"accordingly."],
|
163
|
+
"increase it (to make it more sensitive to errors) just change the ",
|
164
|
+
"checking period accordingly."],
|
165
165
|
"help_arg": "",
|
166
166
|
"options": [
|
167
|
+
{
|
168
|
+
"name": "Checking period",
|
169
|
+
"opt": "-T",
|
170
|
+
"arg": "integer",
|
171
|
+
"default": 1000,
|
172
|
+
"description": "Sampling period for names evaluation."
|
173
|
+
},
|
167
174
|
{
|
168
175
|
"arg": "out_file",
|
169
176
|
"mandatory": true,
|
@@ -45,10 +45,10 @@
|
|
45
45
|
"warn": ["Note that this script will check for the consistency of the",
|
46
46
|
"names (assuming a pair of related reads contains the same name",
|
47
47
|
"varying only in a trailing slash (/) followed by a digit. If you want",
|
48
|
-
"to turn this feature off just set the
|
49
|
-
"to decrease the sampling period (to speed the script up) or
|
50
|
-
"it (to make it more sensitive to errors) just change
|
51
|
-
"accordingly."],
|
48
|
+
"to turn this feature off just set the checking period to zero. If you",
|
49
|
+
"want to decrease the sampling period (to speed the script up) or",
|
50
|
+
"increase it (to make it more sensitive to errors) just change the",
|
51
|
+
"checking period accordingly."],
|
52
52
|
"see_also": ["FastQ.split.pl","FastA.interpose.pl"],
|
53
53
|
"help_arg": "",
|
54
54
|
"options": [
|
@@ -8,7 +8,8 @@
|
|
8
8
|
"missing from the file. If you produce your BedGraph file with",
|
9
9
|
"bedtools genomecov and want to consider zero-coverage position, be",
|
10
10
|
"sure to use -bga (not -bg)."],
|
11
|
-
"see_also": ["
|
11
|
+
"see_also": ["BedGraph.window.rb",
|
12
|
+
"BlastTab.seqdepth.pl", "BlastTab.seqdepth_ZIP.pl"],
|
12
13
|
"help_arg": "--help",
|
13
14
|
"options": [
|
14
15
|
{
|
@@ -23,6 +24,42 @@
|
|
23
24
|
"default": 0.5,
|
24
25
|
"description": ["Central range to consider, between 0 and 1. By",
|
25
26
|
"default: inter-quartile range (0.5)."]
|
27
|
+
},
|
28
|
+
{
|
29
|
+
"opt": "--per-seq",
|
30
|
+
"description": ["Calculate averages per reference sequence, not",
|
31
|
+
"total. Assumes a sorted BedGraph file."]
|
32
|
+
},
|
33
|
+
{
|
34
|
+
"opt": "--length",
|
35
|
+
"description": "Add sequence length to the output."
|
36
|
+
}
|
37
|
+
]
|
38
|
+
},
|
39
|
+
{
|
40
|
+
"task": "BedGraph.window.rb",
|
41
|
+
"description": ["Estimates the sequencing depth per windows from a",
|
42
|
+
"BedGraph file."],
|
43
|
+
"warn": ["This script doesn't consider zero-coverage positions if",
|
44
|
+
"missing from the file. If you produce your BedGraph file with",
|
45
|
+
"bedtools genomecov and want to consider zero-coverage position, be",
|
46
|
+
"sure to use -bga (not -bg)."],
|
47
|
+
"see_also": ["BedGraph.tad.rb",
|
48
|
+
"BlastTab.seqdepth.pl", "BlastTab.seqdepth_ZIP.pl"],
|
49
|
+
"help_arg": "--help",
|
50
|
+
"options": [
|
51
|
+
{
|
52
|
+
"opt": "--input",
|
53
|
+
"arg": "in_file",
|
54
|
+
"mandatory": true,
|
55
|
+
"description": "Input BedGraph file."
|
56
|
+
},
|
57
|
+
{
|
58
|
+
"name": "Window size",
|
59
|
+
"opt": "--win",
|
60
|
+
"arg": "float",
|
61
|
+
"default": 1000,
|
62
|
+
"description": "Window size, in base pairs."
|
26
63
|
}
|
27
64
|
]
|
28
65
|
}
|
@@ -3,6 +3,7 @@
|
|
3
3
|
"Sequence similarity search": {
|
4
4
|
"Statistics": [
|
5
5
|
"BedGraph.tad.rb",
|
6
|
+
"BedGraph.window.rb",
|
6
7
|
"BlastPairwise.AAsubs.pl",
|
7
8
|
"BlastTab.advance.bash",
|
8
9
|
"BlastTab.recplot2.R",
|
@@ -10,7 +11,8 @@
|
|
10
11
|
"BlastTab.seqdepth_nomedian.pl",
|
11
12
|
"BlastTab.seqdepth_ZIP.pl",
|
12
13
|
"BlastTab.sumPerHit.pl",
|
13
|
-
"FastQ.test-error.rb"
|
14
|
+
"FastQ.test-error.rb",
|
15
|
+
"RecPlot2.compareIdentities.R"
|
14
16
|
],
|
15
17
|
"Manipulation": [
|
16
18
|
"BlastTab.addlen.rb",
|
@@ -134,6 +136,14 @@
|
|
134
136
|
"Clustering": [
|
135
137
|
"ogs.mcl.rb",
|
136
138
|
"clust.rand.rb"
|
139
|
+
],
|
140
|
+
"Read recruitments": [
|
141
|
+
"BedGraph.tad.rb",
|
142
|
+
"BedGraph.window.rb",
|
143
|
+
"BlastTab.catsbj.pl",
|
144
|
+
"BlastTab.pairedHits.rb",
|
145
|
+
"BlastTab.recplot2.R",
|
146
|
+
"RecPlot2.compareIdentities.R"
|
137
147
|
]
|
138
148
|
}
|
139
149
|
}
|
@@ -56,8 +56,8 @@
|
|
56
56
|
"task": "BlastTab.recplot2.R",
|
57
57
|
"description": ["Generates recruitment plots for a comparison",
|
58
58
|
"between a virome containing HIV and the HIV-1 genome."],
|
59
|
-
"values": ["hiv_mix-hiv1.blast.tsv",50,100,null,null,null,null,null,
|
60
|
-
"hiv_mix-hiv1.Rdata","hiv_mix-hiv1.pdf",null,null]
|
59
|
+
"values": ["hiv_mix-hiv1.blast.tsv",50,100,null,null,null,null,null,null,
|
60
|
+
null,null,"hiv_mix-hiv1.Rdata","hiv_mix-hiv1.pdf",null,null]
|
61
61
|
},
|
62
62
|
{
|
63
63
|
"_": "== Examples of functional annotations ==",
|
data/utils/enveomics/README.md
CHANGED
@@ -29,6 +29,8 @@ http://www.perlfoundation.org/artistic_license_2_0.
|
|
29
29
|
Most scripts in this repository are self-documented. However,
|
30
30
|
more extensive documentation (and some discussion) can be found at the
|
31
31
|
[documentation website](http://enve-omics.ce.gatech.edu/enveomics/docs).
|
32
|
+
Additional documentation for recruitment plots can be found
|
33
|
+
[here](Docs/recplot2.md).
|
32
34
|
|
33
35
|
## Citation
|
34
36
|
|
@@ -129,6 +129,7 @@ begin
|
|
129
129
|
puts ">#{key}", a[key].join("").gsub(/(.{1,60})/, "\\1\n")
|
130
130
|
a.delete(key)
|
131
131
|
end
|
132
|
+
$stderr.puts " #{lengths.inject(:+)} columns." unless o[:q]
|
132
133
|
unless o[:coords].nil?
|
133
134
|
$stderr.puts "Generating coordinates." unless o[:q]
|
134
135
|
coords = File.open(o[:coords],"w")
|
@@ -1,9 +1,9 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
3
|
+
require 'optparse'
|
4
4
|
|
5
|
-
o = {range:0.5}
|
6
|
-
ARGV <<
|
5
|
+
o = {range: 0.5, perseq: false, length: false}
|
6
|
+
ARGV << '-h' if ARGV.empty?
|
7
7
|
OptionParser.new do |opt|
|
8
8
|
opt.banner = "
|
9
9
|
Estimates the truncated average sequencing depth (TAD) from a BedGraph file.
|
@@ -13,20 +13,26 @@ OptionParser.new do |opt|
|
|
13
13
|
want to consider zero-coverage position, be sure to use -bga (not -bg).
|
14
14
|
|
15
15
|
Usage: #{$0} [options]"
|
16
|
-
opt.separator
|
17
|
-
opt.on(
|
18
|
-
|
19
|
-
opt.on(
|
20
|
-
|
16
|
+
opt.separator ''
|
17
|
+
opt.on('-i', '--input PATH',
|
18
|
+
'Input BedGraph file (mandatory).'){ |v| o[:i]=v }
|
19
|
+
opt.on('-r', '--range FLOAT',
|
20
|
+
'Central range to consider, between 0 and 1.',
|
21
21
|
"By default: #{o[:range]} (inter-quartile range)."
|
22
22
|
){ |v| o[:range]=v.to_f }
|
23
|
-
opt.on(
|
23
|
+
opt.on('-s', '--per-seq',
|
24
|
+
'Calculate averages per reference sequence, not total.',
|
25
|
+
'Assumes a sorted BedGraph file.'
|
26
|
+
){ |v| o[:perseq] = v }
|
27
|
+
opt.on('-l', '--length',
|
28
|
+
'Add sequence length to the output.'){ |v| o[:length] = v }
|
29
|
+
opt.on('-h', '--help', 'Display this screen.') do
|
24
30
|
puts opt
|
25
31
|
exit
|
26
32
|
end
|
27
|
-
opt.separator
|
33
|
+
opt.separator ''
|
28
34
|
end.parse!
|
29
|
-
abort
|
35
|
+
abort '-i is mandatory.' if o[:i].nil?
|
30
36
|
|
31
37
|
def pad(d, idx, r)
|
32
38
|
idx.each do |i|
|
@@ -39,33 +45,49 @@ def pad(d, idx, r)
|
|
39
45
|
d
|
40
46
|
end
|
41
47
|
|
48
|
+
def report(sq, d, ln, o)
|
49
|
+
# Estimate padding ranges
|
50
|
+
pad = (1.0-o[:range])/2.0
|
51
|
+
r = (pad*ln).round
|
52
|
+
|
53
|
+
# Pad
|
54
|
+
d = pad(d, d.each_index.to_a, r+0)
|
55
|
+
d = pad(d, d.each_index.to_a.reverse, r+0)
|
56
|
+
|
57
|
+
# Average
|
58
|
+
y = [0.0]
|
59
|
+
unless d.compact.empty?
|
60
|
+
s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
|
61
|
+
y[0] = s.to_f/d.compact.inject(:+)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Report
|
65
|
+
y.unshift(sq) if o[:perseq]
|
66
|
+
y << ln if o[:length]
|
67
|
+
puts y.join("\t")
|
68
|
+
end
|
69
|
+
|
42
70
|
# Read BedGraph
|
43
|
-
d
|
71
|
+
d = []
|
44
72
|
ln = 0
|
73
|
+
pre_sq = nil
|
45
74
|
File.open(o[:i], "r") do |ifh|
|
46
75
|
ifh.each_line do |i|
|
47
76
|
next if i =~ /^#/
|
48
|
-
r
|
77
|
+
r = i.chomp.split("\t")
|
78
|
+
sq = r.shift
|
79
|
+
if o[:perseq] and !pre_sq.nil? and pre_sq!=sq
|
80
|
+
report(pre_sq, d, ln, o)
|
81
|
+
d = []
|
82
|
+
ln = 0
|
83
|
+
end
|
84
|
+
r.map! { |j| j.to_i }
|
49
85
|
l = r[1]-r[0]
|
50
86
|
d[ r[2] ] ||= 0
|
51
|
-
d[ r[2] ]
|
87
|
+
d[ r[2] ] += l
|
52
88
|
ln += l
|
89
|
+
pre_sq = sq
|
53
90
|
end
|
54
91
|
end
|
55
|
-
|
56
|
-
# Estimate padding ranges
|
57
|
-
pad = (1.0-o[:range])/2.0
|
58
|
-
r = (pad*ln).round
|
59
|
-
|
60
|
-
# Pad
|
61
|
-
d = pad(d, d.each_index.to_a, r+0)
|
62
|
-
d = pad(d, d.each_index.to_a.reverse, r+0)
|
63
|
-
|
64
|
-
# Average
|
65
|
-
if d.compact.empty?
|
66
|
-
p 0.0
|
67
|
-
else
|
68
|
-
s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
|
69
|
-
p s.to_f/d.compact.inject(:+)
|
70
|
-
end
|
92
|
+
report(pre_sq, d, ln, o)
|
71
93
|
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
o = {win: 1000}
|
6
|
+
ARGV << '-h' if ARGV.empty?
|
7
|
+
OptionParser.new do |opt|
|
8
|
+
opt.banner = "
|
9
|
+
Estimates the sequencing depth per windows from a BedGraph file.
|
10
|
+
|
11
|
+
IMPORTANT: This script doesn't consider zero-coverage positions if missing
|
12
|
+
from the file. If you produce your BedGraph file with bedtools genomecov and
|
13
|
+
want to consider zero-coverage position, be sure to use -bga (not -bg).
|
14
|
+
|
15
|
+
Usage: #{$0} [options]"
|
16
|
+
opt.separator ''
|
17
|
+
opt.on('-i', '--input PATH',
|
18
|
+
'Input BedGraph file (mandatory).'){ |v| o[:i]=v }
|
19
|
+
opt.on('-w', '--win INT',
|
20
|
+
'Window size, in base pairs.', "By default: #{o[:win]}."
|
21
|
+
){ |v| o[:win]=v.to_i }
|
22
|
+
opt.on('-h', '--help', 'Display this screen.') do
|
23
|
+
puts opt
|
24
|
+
exit
|
25
|
+
end
|
26
|
+
opt.separator ''
|
27
|
+
end.parse!
|
28
|
+
abort '-i is mandatory.' if o[:i].nil?
|
29
|
+
|
30
|
+
def report(d, a, b, seqs)
|
31
|
+
# Average
|
32
|
+
y = 0.0
|
33
|
+
unless d.compact.empty?
|
34
|
+
s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
|
35
|
+
y = s.to_f/d.compact.inject(:+)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Report
|
39
|
+
puts [a, b, y, seqs.keys.join(",")].join("\t")
|
40
|
+
end
|
41
|
+
|
42
|
+
# Read BedGraph
|
43
|
+
d = []
|
44
|
+
ln = 0
|
45
|
+
a = 1
|
46
|
+
seqs = {}
|
47
|
+
b = o[:win]
|
48
|
+
File.open(o[:i], "r") do |ifh|
|
49
|
+
ifh.each_line do |i|
|
50
|
+
next if i =~ /^#/
|
51
|
+
r = i.chomp.split("\t")
|
52
|
+
sq = r.shift
|
53
|
+
seqs[sq] = 1
|
54
|
+
r.map!{ |j| j.to_i }
|
55
|
+
l = r[1]-r[0]
|
56
|
+
d[ r[2] ] ||= 0
|
57
|
+
d[ r[2] ] += l
|
58
|
+
ln += l
|
59
|
+
while ln >= b
|
60
|
+
d[ r[2] ] -= (ln-b)
|
61
|
+
report(d, a, b, seqs)
|
62
|
+
seqs = {}
|
63
|
+
seqs[ sq ] = 1 if ln > b
|
64
|
+
d = []
|
65
|
+
d[ r[2] ] = (ln-b)
|
66
|
+
a = b + 1
|
67
|
+
b = a + o[:win] - 1
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
@@ -19,11 +19,12 @@ opt <- enve.cliopts(enve.recplot2,
|
|
19
19
|
o_desc=list(pos.breaks="Breaks in the positions histogram.",
|
20
20
|
id.breaks="Breaks in the identity histogram.",
|
21
21
|
id.summary="Function summarizing the identity bins. By default: sum.",
|
22
|
-
peaks.col="Color of peaks, mandatory for peak-finding (e.g., darkred)."
|
22
|
+
peaks.col="Color of peaks, mandatory for peak-finding (e.g., darkred).",
|
23
|
+
peaks.method="Method to detect peaks; one of emauto, em, or mower."),
|
23
24
|
p_desc=paste("","Produce recruitment plot objects provided that",
|
24
25
|
"BlastTab.catsbj.pl has been previously executed.", sep="\n\t"),
|
25
26
|
ignore=c("plot"),
|
26
|
-
defaults=c(id.metric="identity", peaks.col=NA))
|
27
|
+
defaults=c(id.metric="identity", peaks.col=NA, peaks.method="emauto"))
|
27
28
|
|
28
29
|
#= Run it!
|
29
30
|
if(length(opt$args)>1){
|
@@ -35,6 +36,10 @@ if(length(opt$args)>1){
|
|
35
36
|
}
|
36
37
|
pc <- opt$options[["peaks.col"]]
|
37
38
|
if(!is.na(pc) && pc=="NA") opt$options[["peaks.col"]] <- NA
|
39
|
+
if(!is.null(opt$options[["peaks.method"]])){
|
40
|
+
opt$options[["peaks.opts"]] <- list(method=opt$options[["peaks.method"]])
|
41
|
+
opt$options[["peaks.method"]] <- NULL
|
42
|
+
}
|
38
43
|
rp <- do.call("enve.recplot2", opt$options)
|
39
44
|
save(rp, file=opt$args[1])
|
40
45
|
if(length(opt$args)>1) dev.off()
|