bio-gngm 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +20 -0
- data/Gemfile.lock +33 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +33 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/bio-gngm.gemspec +173 -0
- data/doc/Bio.html +129 -0
- data/doc/Bio/DB.html +128 -0
- data/doc/Bio/DB/Pileup.html +316 -0
- data/doc/Bio/DB/Vcf.html +683 -0
- data/doc/Bio/Util.html +135 -0
- data/doc/Bio/Util/Gngm.html +1655 -0
- data/doc/LICENSE_txt.html +111 -0
- data/doc/_index.html +169 -0
- data/doc/class_list.html +47 -0
- data/doc/created.rid +4 -0
- data/doc/css/common.css +1 -0
- data/doc/css/full_list.css +55 -0
- data/doc/css/style.css +322 -0
- data/doc/doc/created.rid +0 -0
- data/doc/file_list.html +52 -0
- data/doc/frames.html +13 -0
- data/doc/images/add.png +0 -0
- data/doc/images/bands.png +0 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/delete.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/signal.png +0 -0
- data/doc/images/tag_blue.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/threads.png +0 -0
- data/doc/images/transparent.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +88 -0
- data/doc/js/app.js +205 -0
- data/doc/js/darkfish.js +153 -0
- data/doc/js/full_list.js +167 -0
- data/doc/js/jquery.js +18 -0
- data/doc/js/navigation.js +142 -0
- data/doc/js/search.js +94 -0
- data/doc/js/search_index.js +1 -0
- data/doc/js/searcher.js +228 -0
- data/doc/lib/bio-gngm_rb.html +103 -0
- data/doc/lib/bio/util/bio-gngm_rb.html +96 -0
- data/doc/method_list.html +382 -0
- data/doc/rdoc.css +543 -0
- data/doc/table_of_contents.html +161 -0
- data/examples/.DS_Store +0 -0
- data/examples/make_histograms.rb +40 -0
- data/examples/make_threads.rb +42 -0
- data/examples/make_threads_isize.rb +41 -0
- data/examples/use_indels.rb +36 -0
- data/lib/bio-gngm.rb +12 -0
- data/lib/bio/util/bio-gngm.rb +1029 -0
- data/scripts/get_subseq.rb +16 -0
- data/scripts/make_histograms_laerfyve.rb +83 -0
- data/scripts/make_histograms_laerfyve_stitched.rb +59 -0
- data/scripts/make_threads_isize_laerfyfe.rb +52 -0
- data/scripts/make_threads_unmapped_laerfyfe.rb +72 -0
- data/scripts/make_threads_unmapped_laerfyfe_pseudo.rb +56 -0
- data/scripts/make_threads_unmapped_simulation.rb +54 -0
- data/scripts/make_threads_unmapped_simulation_immediate_region.rb +59 -0
- data/scripts/optimise_freq_window_size.rb +82 -0
- data/stitched_contigs.zip +0 -0
- data/test/data/ids2.txt +1 -0
- data/test/data/sorted.bam +0 -0
- data/test/data/test +0 -0
- data/test/data/test.bam +0 -0
- data/test/data/test.fa +20 -0
- data/test/data/test.fai +0 -0
- data/test/data/test.sai +0 -0
- data/test/data/test.tam +10 -0
- data/test/data/test_chr.fasta +1000 -0
- data/test/data/test_chr.fasta.amb +2 -0
- data/test/data/test_chr.fasta.ann +3 -0
- data/test/data/test_chr.fasta.bwt +0 -0
- data/test/data/test_chr.fasta.fai +1 -0
- data/test/data/test_chr.fasta.pac +0 -0
- data/test/data/test_chr.fasta.rbwt +0 -0
- data/test/data/test_chr.fasta.rpac +0 -0
- data/test/data/test_chr.fasta.rsa +0 -0
- data/test/data/test_chr.fasta.sa +0 -0
- data/test/data/testu.bam +0 -0
- data/test/data/testu.bam.bai +0 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-gngm.rb +126 -0
- metadata +276 -0
data/doc/Bio/Util.html
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
|
3
|
+
<html>
|
4
|
+
<head>
|
5
|
+
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type">
|
6
|
+
|
7
|
+
<title>Class: Bio::Util</title>
|
8
|
+
|
9
|
+
<link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet">
|
10
|
+
|
11
|
+
<script type="text/javascript">
|
12
|
+
var rdoc_rel_prefix = "../";
|
13
|
+
</script>
|
14
|
+
|
15
|
+
<script type="text/javascript" charset="utf-8" src="../js/jquery.js"></script>
|
16
|
+
<script type="text/javascript" charset="utf-8" src="../js/navigation.js"></script>
|
17
|
+
<script type="text/javascript" charset="utf-8" src="../js/search_index.js"></script>
|
18
|
+
<script type="text/javascript" charset="utf-8" src="../js/search.js"></script>
|
19
|
+
<script type="text/javascript" charset="utf-8" src="../js/searcher.js"></script>
|
20
|
+
<script type="text/javascript" charset="utf-8" src="../js/darkfish.js"></script>
|
21
|
+
|
22
|
+
|
23
|
+
<body id="top" class="class">
|
24
|
+
<nav id="metadata">
|
25
|
+
<nav id="home-section" class="section">
|
26
|
+
<h3 class="section-header">
|
27
|
+
<a href="../index.html">Home</a>
|
28
|
+
<a href="../table_of_contents.html#classes">Classes</a>
|
29
|
+
<a href="../table_of_contents.html#methods">Methods</a>
|
30
|
+
</h3>
|
31
|
+
</nav>
|
32
|
+
|
33
|
+
|
34
|
+
<nav id="search-section" class="section project-section" class="initially-hidden">
|
35
|
+
<form action="#" method="get" accept-charset="utf-8">
|
36
|
+
<h3 class="section-header">
|
37
|
+
<input type="text" name="search" placeholder="Search" id="search-field"
|
38
|
+
title="Type to search, Up and Down to navigate, Enter to load">
|
39
|
+
</h3>
|
40
|
+
</form>
|
41
|
+
|
42
|
+
<ul id="search-results" class="initially-hidden"></ul>
|
43
|
+
</nav>
|
44
|
+
|
45
|
+
|
46
|
+
<div id="file-metadata">
|
47
|
+
<nav id="file-list-section" class="section">
|
48
|
+
<h3 class="section-header">Defined In</h3>
|
49
|
+
<ul>
|
50
|
+
<li>lib/bio/util/bio-gngm.rb
|
51
|
+
</ul>
|
52
|
+
</nav>
|
53
|
+
|
54
|
+
|
55
|
+
</div>
|
56
|
+
|
57
|
+
<div id="class-metadata">
|
58
|
+
|
59
|
+
<nav id="parent-class-section" class="section">
|
60
|
+
<h3 class="section-header">Parent</h3>
|
61
|
+
|
62
|
+
<p class="link">Object
|
63
|
+
|
64
|
+
</nav>
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
</div>
|
69
|
+
|
70
|
+
<div id="project-metadata">
|
71
|
+
<nav id="fileindex-section" class="section project-section">
|
72
|
+
<h3 class="section-header">Pages</h3>
|
73
|
+
|
74
|
+
<ul>
|
75
|
+
|
76
|
+
<li class="file"><a href="../LICENSE_txt.html">LICENSE</a>
|
77
|
+
|
78
|
+
</ul>
|
79
|
+
</nav>
|
80
|
+
|
81
|
+
<nav id="classindex-section" class="section project-section">
|
82
|
+
<h3 class="section-header">Class and Module Index</h3>
|
83
|
+
|
84
|
+
<ul class="link-list">
|
85
|
+
|
86
|
+
<li><a href="../Bio.html">Bio</a>
|
87
|
+
|
88
|
+
<li><a href="../Bio/DB.html">Bio::DB</a>
|
89
|
+
|
90
|
+
<li><a href="../Bio/DB/Pileup.html">Bio::DB::Pileup</a>
|
91
|
+
|
92
|
+
<li><a href="../Bio/DB/Vcf.html">Bio::DB::Vcf</a>
|
93
|
+
|
94
|
+
<li><a href="../Bio/Util.html">Bio::Util</a>
|
95
|
+
|
96
|
+
<li><a href="../Bio/Util/Gngm.html">Bio::Util::Gngm</a>
|
97
|
+
|
98
|
+
</ul>
|
99
|
+
</nav>
|
100
|
+
|
101
|
+
</div>
|
102
|
+
</nav>
|
103
|
+
|
104
|
+
<div id="documentation">
|
105
|
+
<h1 class="class">class Bio::Util</h1>
|
106
|
+
|
107
|
+
<div id="description" class="description">
|
108
|
+
|
109
|
+
</div><!-- description -->
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
<section id="5Buntitled-5D" class="documentation-section">
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
<!-- Methods -->
|
124
|
+
|
125
|
+
</section><!-- 5Buntitled-5D -->
|
126
|
+
|
127
|
+
</div><!-- documentation -->
|
128
|
+
|
129
|
+
|
130
|
+
<footer id="validator-badges">
|
131
|
+
<p><a href="http://validator.w3.org/check/referer">[Validate]</a>
|
132
|
+
<p>Generated by <a href="https://github.com/rdoc/rdoc">RDoc</a> 3.11.
|
133
|
+
<p>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish Rdoc Generator</a> 3.
|
134
|
+
</footer>
|
135
|
+
|
@@ -0,0 +1,1655 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
|
3
|
+
<html>
|
4
|
+
<head>
|
5
|
+
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type">
|
6
|
+
|
7
|
+
<title>Class: Bio::Util::Gngm</title>
|
8
|
+
|
9
|
+
<link type="text/css" media="screen" href="../../rdoc.css" rel="stylesheet">
|
10
|
+
|
11
|
+
<script type="text/javascript">
|
12
|
+
var rdoc_rel_prefix = "../../";
|
13
|
+
</script>
|
14
|
+
|
15
|
+
<script type="text/javascript" charset="utf-8" src="../../js/jquery.js"></script>
|
16
|
+
<script type="text/javascript" charset="utf-8" src="../../js/navigation.js"></script>
|
17
|
+
<script type="text/javascript" charset="utf-8" src="../../js/search_index.js"></script>
|
18
|
+
<script type="text/javascript" charset="utf-8" src="../../js/search.js"></script>
|
19
|
+
<script type="text/javascript" charset="utf-8" src="../../js/searcher.js"></script>
|
20
|
+
<script type="text/javascript" charset="utf-8" src="../../js/darkfish.js"></script>
|
21
|
+
|
22
|
+
|
23
|
+
<body id="top" class="class">
|
24
|
+
<nav id="metadata">
|
25
|
+
<nav id="home-section" class="section">
|
26
|
+
<h3 class="section-header">
|
27
|
+
<a href="../../index.html">Home</a>
|
28
|
+
<a href="../../table_of_contents.html#classes">Classes</a>
|
29
|
+
<a href="../../table_of_contents.html#methods">Methods</a>
|
30
|
+
</h3>
|
31
|
+
</nav>
|
32
|
+
|
33
|
+
|
34
|
+
<nav id="search-section" class="section project-section" class="initially-hidden">
|
35
|
+
<form action="#" method="get" accept-charset="utf-8">
|
36
|
+
<h3 class="section-header">
|
37
|
+
<input type="text" name="search" placeholder="Search" id="search-field"
|
38
|
+
title="Type to search, Up and Down to navigate, Enter to load">
|
39
|
+
</h3>
|
40
|
+
</form>
|
41
|
+
|
42
|
+
<ul id="search-results" class="initially-hidden"></ul>
|
43
|
+
</nav>
|
44
|
+
|
45
|
+
|
46
|
+
<div id="file-metadata">
|
47
|
+
<nav id="file-list-section" class="section">
|
48
|
+
<h3 class="section-header">Defined In</h3>
|
49
|
+
<ul>
|
50
|
+
<li>lib/bio/util/bio-gngm.rb
|
51
|
+
</ul>
|
52
|
+
</nav>
|
53
|
+
|
54
|
+
|
55
|
+
</div>
|
56
|
+
|
57
|
+
<div id="class-metadata">
|
58
|
+
|
59
|
+
<nav id="parent-class-section" class="section">
|
60
|
+
<h3 class="section-header">Parent</h3>
|
61
|
+
|
62
|
+
<p class="link">Object
|
63
|
+
|
64
|
+
</nav>
|
65
|
+
|
66
|
+
|
67
|
+
<!-- Method Quickref -->
|
68
|
+
<nav id="method-list-section" class="section">
|
69
|
+
<h3 class="section-header">Methods</h3>
|
70
|
+
|
71
|
+
<ul class="link-list">
|
72
|
+
|
73
|
+
<li><a href="#method-c-new">::new</a>
|
74
|
+
|
75
|
+
<li><a href="#method-i-calculate_clusters">#calculate_clusters</a>
|
76
|
+
|
77
|
+
<li><a href="#method-i-calculate_densities">#calculate_densities</a>
|
78
|
+
|
79
|
+
<li><a href="#method-i-calculate_signal">#calculate_signal</a>
|
80
|
+
|
81
|
+
<li><a href="#method-i-close">#close</a>
|
82
|
+
|
83
|
+
<li><a href="#method-i-clusters">#clusters</a>
|
84
|
+
|
85
|
+
<li><a href="#method-i-collect_threads">#collect_threads</a>
|
86
|
+
|
87
|
+
<li><a href="#method-i-densities">#densities</a>
|
88
|
+
|
89
|
+
<li><a href="#method-i-draw_bands">#draw_bands</a>
|
90
|
+
|
91
|
+
<li><a href="#method-i-draw_hit_count">#draw_hit_count</a>
|
92
|
+
|
93
|
+
<li><a href="#method-i-draw_peaks">#draw_peaks</a>
|
94
|
+
|
95
|
+
<li><a href="#method-i-draw_signal">#draw_signal</a>
|
96
|
+
|
97
|
+
<li><a href="#method-i-draw_threads">#draw_threads</a>
|
98
|
+
|
99
|
+
<li><a href="#method-i-frequency_histogram">#frequency_histogram</a>
|
100
|
+
|
101
|
+
<li><a href="#method-i-get_band">#get_band</a>
|
102
|
+
|
103
|
+
<li><a href="#method-i-get_insert_size_frequency">#get_insert_size_frequency</a>
|
104
|
+
|
105
|
+
<li><a href="#method-i-get_unmapped_mate_frequency">#get_unmapped_mate_frequency</a>
|
106
|
+
|
107
|
+
<li><a href="#method-i-hit_count">#hit_count</a>
|
108
|
+
|
109
|
+
<li><a href="#method-i-peaks">#peaks</a>
|
110
|
+
|
111
|
+
<li><a href="#method-i-signal">#signal</a>
|
112
|
+
|
113
|
+
<li><a href="#method-i-snp_positions">#snp_positions</a>
|
114
|
+
|
115
|
+
<li><a href="#method-i-threads">#threads</a>
|
116
|
+
|
117
|
+
</ul>
|
118
|
+
</nav>
|
119
|
+
|
120
|
+
</div>
|
121
|
+
|
122
|
+
<div id="project-metadata">
|
123
|
+
<nav id="fileindex-section" class="section project-section">
|
124
|
+
<h3 class="section-header">Pages</h3>
|
125
|
+
|
126
|
+
<ul>
|
127
|
+
|
128
|
+
<li class="file"><a href="../../LICENSE_txt.html">LICENSE</a>
|
129
|
+
|
130
|
+
</ul>
|
131
|
+
</nav>
|
132
|
+
|
133
|
+
<nav id="classindex-section" class="section project-section">
|
134
|
+
<h3 class="section-header">Class and Module Index</h3>
|
135
|
+
|
136
|
+
<ul class="link-list">
|
137
|
+
|
138
|
+
<li><a href="../../Bio.html">Bio</a>
|
139
|
+
|
140
|
+
<li><a href="../../Bio/DB.html">Bio::DB</a>
|
141
|
+
|
142
|
+
<li><a href="../../Bio/DB/Pileup.html">Bio::DB::Pileup</a>
|
143
|
+
|
144
|
+
<li><a href="../../Bio/DB/Vcf.html">Bio::DB::Vcf</a>
|
145
|
+
|
146
|
+
<li><a href="../../Bio/Util.html">Bio::Util</a>
|
147
|
+
|
148
|
+
<li><a href="../../Bio/Util/Gngm.html">Bio::Util::Gngm</a>
|
149
|
+
|
150
|
+
</ul>
|
151
|
+
</nav>
|
152
|
+
|
153
|
+
</div>
|
154
|
+
</nav>
|
155
|
+
|
156
|
+
<div id="documentation">
|
157
|
+
<h1 class="class">class Bio::Util::Gngm</h1>
|
158
|
+
|
159
|
+
<div id="description" class="description">
|
160
|
+
|
161
|
+
<p>A <a href="Gngm.html">Bio::Util::Gngm</a> object represents a single region
|
162
|
+
on a reference genome that is to be examined using the NGM technique
|
163
|
+
described in Austin et al (2011) <a
|
164
|
+
href="http://bar.utoronto.ca/ngm/description.html">bar.utoronto.ca/ngm/description.html</a>
|
165
|
+
and <a
|
166
|
+
href="http://onlinelibrary.wiley.com/doi/10.1111/j.1365-313X.2011.04619.x/abstract;jsessionid=F73E2DA628523B26205297CEE95526DA.d02t04">onlinelibrary.wiley.com/doi/10.1111/j.1365-313X.2011.04619.x/abstract;jsessionid=F73E2DA628523B26205297CEE95526DA.d02t04</a>
|
167
|
+
Austin <em>et</em> <em>al</em> (2011) <strong>Next-generation</strong>
|
168
|
+
<strong>mapping</strong> <strong>of</strong> <strong>Arabidopsis</strong>
|
169
|
+
<strong>genes</strong> <em>Plant</em> <em>Journal</em>
|
170
|
+
<strong>67</strong>(4):7125-725 .</p>
|
171
|
+
|
172
|
+
<p><a href="Gngm.html">Bio::Util::Gngm</a> provides methods for finding SNPs,
|
173
|
+
small INDELS and larger INDELS, creating histograms of polymorphism
|
174
|
+
frequency, creating and clustering density curves, creating signal plots
|
175
|
+
and finding peaks. The ratio of reference-agreeing and reference-differing
|
176
|
+
reads can be specified.</p>
|
177
|
+
|
178
|
+
<h2 id="label-Background">Background</h2>
|
179
|
+
|
180
|
+
<p>The basic concept of the technique is that density curves of polymorphism
|
181
|
+
frequency across the region of interest are plotted and analysed. Each
|
182
|
+
curve is called a thread, as it represents a polymorphism that was called
|
183
|
+
with a statistic within a certain user-specified range, eg if a SNP was
|
184
|
+
called with 50% non-reference bases from sequence reads (say all A), and
|
185
|
+
50% reference reads (all T) then a discordant chastity statistic (ChD) of
|
186
|
+
0.5 would be calculated and assigned to that SNP. Depending on the width
|
187
|
+
and slide of the windows the user had specified, the frequency of SNPs with
|
188
|
+
ChD in the specified range would be drawn in the same density curve. In the
|
189
|
+
figure below each different coloured curve represents the frequency of SNPs
|
190
|
+
with similar ChD.</p>
|
191
|
+
|
192
|
+
<p><img src="../../images/threads.png" /></p>
|
193
|
+
|
194
|
+
<p>Each of these density curves is called a thread. Threads are clustered into
|
195
|
+
groups called bands and the bands containing the expected and control
|
196
|
+
polymorphisms extracted. In the figure below, the control band is 0.5, the
|
197
|
+
expected mutation in 1.0. Typically and in the Austin et al (2011)
|
198
|
+
description of NGM the control band is the heterophasic band that
|
199
|
+
represents natural variation, the thing taken to be the baseline. For a
|
200
|
+
simple SNP, numerically the discordant chastity is expected to be 0.5.
|
201
|
+
Conversely the expected band is the homophasic band that represents the
|
202
|
+
selected for SNP region. Normally the discordant chastity is expected to be
|
203
|
+
1.0.</p>
|
204
|
+
|
205
|
+
<p><img src="../../images/bands.png" /></p>
|
206
|
+
|
207
|
+
<p>The points where the signal from the control and expected band converge
|
208
|
+
most is a likely candidate region for the causative mutation, so here at
|
209
|
+
about the 1.6 millionth nucleotide.</p>
|
210
|
+
|
211
|
+
<p><img src="../../images/signal.png" /></p>
|
212
|
+
|
213
|
+
<h2 id="label-Example">Example</h2>
|
214
|
+
|
215
|
+
<pre class="ruby"><span class="ruby-identifier">require</span> <span class="ruby-string">'bio-gngm'</span>
|
216
|
+
|
217
|
+
<span class="ruby-identifier">g</span> = <span class="ruby-constant">Bio</span><span class="ruby-operator">::</span><span class="ruby-constant">Util</span><span class="ruby-operator">::</span><span class="ruby-constant">Gngm</span>.<span class="ruby-identifier">new</span>(:<span class="ruby-identifier">file</span> =<span class="ruby-operator">></span> <span class="ruby-string">"aln.sorted.bam"</span>,
|
218
|
+
:<span class="ruby-identifier">format</span> =<span class="ruby-operator">></span> :<span class="ruby-identifier">bam</span>,
|
219
|
+
:<span class="ruby-identifier">fasta</span> =<span class="ruby-operator">></span> <span class="ruby-string">"reference.fasta"</span>,
|
220
|
+
:<span class="ruby-identifier">samtools</span> =<span class="ruby-operator">></span> {:<span class="ruby-identifier">r</span> =<span class="ruby-operator">></span> <span class="ruby-string">"chr1:1-100000"</span>,
|
221
|
+
:<span class="ruby-identifier">q</span> =<span class="ruby-operator">></span> <span class="ruby-value">20</span>,
|
222
|
+
:<span class="ruby-constant">Q</span> =<span class="ruby-operator">></span> <span class="ruby-value">50</span>
|
223
|
+
},
|
224
|
+
:<span class="ruby-identifier">min_non_ref_freq</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.5</span>,
|
225
|
+
:<span class="ruby-identifier">min_non_ref</span> =<span class="ruby-operator">></span> <span class="ruby-value">3</span>
|
226
|
+
)
|
227
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">snp_positions</span>
|
228
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">collect_threads</span>(:<span class="ruby-identifier">start</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.2</span>, :<span class="ruby-identifier">stop</span> =<span class="ruby-operator">></span> <span class="ruby-value">1.0</span>, :<span class="ruby-identifier">slide</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.01</span>, :<span class="ruby-identifier">size</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.1</span> )
|
229
|
+
[<span class="ruby-value">0.25</span>, <span class="ruby-value">0.5</span>, <span class="ruby-value">1.0</span>].<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">kernel_adjust</span><span class="ruby-operator">|</span> <span class="ruby-comment"># loop through different kernel values</span>
|
230
|
+
[<span class="ruby-value">4</span>, <span class="ruby-value">9</span>, <span class="ruby-value">11</span>].<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span> <span class="ruby-identifier">k</span> <span class="ruby-operator">|</span> <span class="ruby-comment"># loop through different cluster numbers </span>
|
231
|
+
|
232
|
+
<span class="ruby-comment">#cluster</span>
|
233
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">calculate_clusters</span>(:<span class="ruby-identifier">k</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">k</span>, :<span class="ruby-identifier">adjust</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">kernel_adjust</span>, :<span class="ruby-identifier">control_chd</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.7</span>, :<span class="ruby-identifier">expected_chd</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.5</span>)
|
234
|
+
<span class="ruby-comment">#draw thread and bands</span>
|
235
|
+
<span class="ruby-identifier">filename</span> = <span class="ruby-node">"#{name}_#{k}_#{kernel_adjust}_all_threads.png"</span>
|
236
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">draw_threads</span>(<span class="ruby-identifier">filename</span>)
|
237
|
+
|
238
|
+
<span class="ruby-identifier">filename</span> = <span class="ruby-node">"#{name}_#{k}_#{kernel_adjust}_clustered_bands.png"</span>
|
239
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">draw_bands</span>(<span class="ruby-identifier">filename</span>, :<span class="ruby-identifier">add_lines</span> =<span class="ruby-operator">></span> [<span class="ruby-value">100</span>,<span class="ruby-value">30000</span>,<span class="ruby-value">675432</span>])
|
240
|
+
|
241
|
+
<span class="ruby-comment">#draw signal</span>
|
242
|
+
<span class="ruby-identifier">filename</span> = <span class="ruby-node">"#{name}_#{k}_#{kernel_adjust}_signal.png"</span>
|
243
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">draw_signal</span>(<span class="ruby-identifier">filename</span>)
|
244
|
+
|
245
|
+
<span class="ruby-comment">#auto-guess peaks</span>
|
246
|
+
<span class="ruby-identifier">filename</span> = <span class="ruby-node">"#{name}_#{k}_#{kernel_adjust}_peaks.png"</span>
|
247
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">draw_peaks</span>(<span class="ruby-identifier">filename</span>)
|
248
|
+
<span class="ruby-keyword">end</span>
|
249
|
+
<span class="ruby-keyword">end</span>
|
250
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">close</span> <span class="ruby-comment">#close BAM file</span>
|
251
|
+
</pre>
|
252
|
+
|
253
|
+
<h2 id="label-Polymorphisms+and+statistics">Polymorphisms and statistics</h2>
|
254
|
+
|
255
|
+
<p><a href="Gngm.html">Bio::Util::Gngm</a> will allow you to look for
|
256
|
+
polymorphisms that are SNPs, INDELS (as insertions uniquely, deletions
|
257
|
+
uniquely or both) and longer insertions or deletions based on the insert
|
258
|
+
size on paired-end read alignments. Each has a different statistic attached
|
259
|
+
to it.</p>
|
260
|
+
|
261
|
+
<h3 id="label-SNPs">SNPs</h3>
|
262
|
+
|
263
|
+
<p>Simple Single Nucleotide Polymorphisms are called and its ChD statistic
|
264
|
+
calculated as described in Austin et al (2011).</p>
|
265
|
+
|
266
|
+
<h3 id="label-Short+INDELS">Short INDELS</h3>
|
267
|
+
|
268
|
+
<p>These are called via SAMtools/BCFtools so are limited to the INDELs that
|
269
|
+
can be called that way. The implementation at the moment only considers
|
270
|
+
positions with one INDEL, sites with more than one potential INDEL (ie
|
271
|
+
multiple alleles) are disregarded as a position at all. See the <a
|
272
|
+
href="../DB/Vcf.html">Bio::DB::Vcf</a> extensions in this package for a
|
273
|
+
description of what constitutes an INDEL. The Vcf attribute <a
|
274
|
+
href="../DB/Vcf.html#method-i-non_ref_allele_freq">Bio::DB::Vcf#non_ref_allele_freq</a>
|
275
|
+
is used as the statistic in this case.</p>
|
276
|
+
|
277
|
+
<h3 id="label-Insertion+Size">Insertion Size</h3>
|
278
|
+
|
279
|
+
<p>Paired-end alignments have an expected distance between the paired reads
|
280
|
+
(called insert size, or isize). Groups of reads in one position with larger
|
281
|
+
or smaller than expected isize can indicate large deletions or insertions.
|
282
|
+
Due to the details of read preparation the actual isize varies around a
|
283
|
+
mean value with an expected proportion of 50% of reads having isize above
|
284
|
+
the mean, and 50% below. To create density curves of insertion size
|
285
|
+
frequency a moves along the window of user-specified size is moved along
|
286
|
+
the reference genome in user-specified steps and all alignments in that
|
287
|
+
window are examined. The Bio::DB::Sam#isize attribute is inspected for all
|
288
|
+
alignments passing user-specified quality and the proportion of reads in
|
289
|
+
that window that have an insert size > the expected insert size is used
|
290
|
+
as the statistic in this case. Proportions approaching 1 indicate that the
|
291
|
+
sequenced organism has a deletion in that section relative to the
|
292
|
+
reference. Proportions approaching 0 indicate an insertion in that section
|
293
|
+
relative to the reference. Proportions around 0.5 indicate random variation
|
294
|
+
of insert size, IE no INDEL. Seems to be a good idea to keep the window
|
295
|
+
size similar to the read + isize. Useful in conjunction with assessing
|
296
|
+
unmapped mates.</p>
|
297
|
+
|
298
|
+
<h3 id="label-Unmapped+Mate+Pairs+%2F+Paired+Ends.">Unmapped Mate Pairs / Paired Ends.</h3>
|
299
|
+
|
300
|
+
<p>Paired-end alignments where one mate finds a mapping but the other doesnt,
|
301
|
+
can indicate an insertion/deletion larger than the insert size of the reads
|
302
|
+
used (IE one read disappeared into the deleted section). This method uses a
|
303
|
+
statistic based on proportion of mapped/unmapped reads in a window.
|
304
|
+
Proportions of reads that are mapped but the mate is unmapped should be
|
305
|
+
about 0.5 in a window over an insertion/deletion (since the reads can go
|
306
|
+
in either direction..). With no insertion deletion, the proportion should
|
307
|
+
be closer to 1.</p>
|
308
|
+
|
309
|
+
<h2 id="label-Input+types">Input types</h2>
|
310
|
+
|
311
|
+
<p>A sorted BAM file is used as the source of alignments. Pileup is not used
|
312
|
+
nor likely to be as it is a deprecated function within SAMtools. With the
|
313
|
+
BAM file you will need the reference FASTA and the BAM index (.bai).</p>
|
314
|
+
|
315
|
+
<h2 id="label-Workflow">Workflow</h2>
|
316
|
+
<ol><li>
|
317
|
+
<p>Create <a href="Gngm.html">Bio::Util::Gngm</a> object for a specific region
|
318
|
+
in the reference genome</p>
|
319
|
+
</li><li>
|
320
|
+
<p>Polymorphisms are found</p>
|
321
|
+
</li><li>
|
322
|
+
<p>Density curves (threads) are calculated</p>
|
323
|
+
</li><li>
|
324
|
+
<p>Clustering density threads into bands is done</p>
|
325
|
+
</li><li>
|
326
|
+
<p>Signal is compared between band of interest and control</p>
|
327
|
+
</li><li>
|
328
|
+
<p>Figures are printed</p>
|
329
|
+
</li></ol>
|
330
|
+
|
331
|
+
<h2 id="label-Prerequisites">Prerequisites</h2>
|
332
|
+
<ul><li>
|
333
|
+
<p>Ruby 1.9.3 or greater (if you have an earlier version, try RVM for
|
334
|
+
installing different versions of Ruby alongside your system install and
|
335
|
+
switching nicely between them)</p>
|
336
|
+
</li><li>
|
337
|
+
<p>R 2.11.1 or greater</p>
|
338
|
+
</li></ul>
|
339
|
+
|
340
|
+
<p>The following ruby-gems are required</p>
|
341
|
+
<ul><li>
|
342
|
+
<p>rinruby >= 2.0.2</p>
|
343
|
+
</li><li>
|
344
|
+
<p>bio-samtools >= 0.5.0</p>
|
345
|
+
</li></ul>
|
346
|
+
|
347
|
+
<p>The following R packages are required</p>
|
348
|
+
<ul><li>
|
349
|
+
<p>ggplot2</p>
|
350
|
+
</li><li>
|
351
|
+
<p>peaks</p>
|
352
|
+
</li></ul>
|
353
|
+
|
354
|
+
<h2 id="label-Acknowledgements">Acknowledgements</h2>
|
355
|
+
|
356
|
+
<h2 id="label-Using+bio-gngm">Using bio-gngm</h2>
|
357
|
+
|
358
|
+
<p>The package is not yet released, a gem will be prepared soon. Until then
|
359
|
+
scripts run fine when saved in the package scripts from within the package
|
360
|
+
directory with the below pre-amble at the top of the script. Run scripts
|
361
|
+
from the root of the package directory.</p>
|
362
|
+
|
363
|
+
<pre class="ruby"><span class="ruby-identifier">$LOAD_PATH</span>.<span class="ruby-identifier">unshift</span>(<span class="ruby-constant">File</span>.<span class="ruby-identifier">join</span>(<span class="ruby-constant">File</span>.<span class="ruby-identifier">dirname</span>(<span class="ruby-keyword">__FILE__</span>), <span class="ruby-string">'..'</span>, <span class="ruby-string">'lib'</span>))
|
364
|
+
<span class="ruby-identifier">$LOAD_PATH</span>.<span class="ruby-identifier">unshift</span>(<span class="ruby-constant">File</span>.<span class="ruby-identifier">dirname</span>(<span class="ruby-keyword">__FILE__</span>))
|
365
|
+
<span class="ruby-identifier">require</span> <span class="ruby-string">'bio-samtools'</span>
|
366
|
+
<span class="ruby-identifier">require</span> <span class="ruby-string">'bio-gngm'</span>
|
367
|
+
</pre>
|
368
|
+
|
369
|
+
<h2 id="label-API">API</h2>
|
370
|
+
|
371
|
+
</div><!-- description -->
|
372
|
+
|
373
|
+
|
374
|
+
|
375
|
+
|
376
|
+
<section id="5Buntitled-5D" class="documentation-section">
|
377
|
+
|
378
|
+
|
379
|
+
|
380
|
+
|
381
|
+
|
382
|
+
<!-- Constants -->
|
383
|
+
<section id="constants-list" class="section">
|
384
|
+
<h3 class="section-header">Constants</h3>
|
385
|
+
<dl>
|
386
|
+
|
387
|
+
<dt id="ERROR_MARGIN">ERROR_MARGIN
|
388
|
+
|
389
|
+
<dd class="description"><p>Ruby 1.9.3 has a rounding error in the Range#step function such that some
|
390
|
+
decimal places are rounded off to 0.00000000000000…1 above their place. So
|
391
|
+
this constant is used to identify windows within a short distance and
|
392
|
+
prevent any rounding errors. Hopefully I should be able to remove this in
|
393
|
+
later versions.</p>
|
394
|
+
|
395
|
+
|
396
|
+
</dl>
|
397
|
+
</section>
|
398
|
+
|
399
|
+
|
400
|
+
|
401
|
+
<!-- Attributes -->
|
402
|
+
<section id="attribute-method-details" class="method-section section">
|
403
|
+
<h3 class="section-header">Attributes</h3>
|
404
|
+
|
405
|
+
|
406
|
+
<div id="attribute-i-file" class="method-detail">
|
407
|
+
<div class="method-heading attribute-method-heading">
|
408
|
+
<span class="method-name">file</span><span
|
409
|
+
class="attribute-access-type">[RW]</span>
|
410
|
+
</div>
|
411
|
+
|
412
|
+
<div class="method-description">
|
413
|
+
|
414
|
+
|
415
|
+
|
416
|
+
</div>
|
417
|
+
</div>
|
418
|
+
|
419
|
+
</section><!-- attribute-method-details -->
|
420
|
+
|
421
|
+
|
422
|
+
<!-- Methods -->
|
423
|
+
|
424
|
+
<section id="public-class-5Buntitled-5D-method-details" class="method-section section">
|
425
|
+
<h3 class="section-header">Public Class Methods</h3>
|
426
|
+
|
427
|
+
|
428
|
+
<div id="method-c-new" class="method-detail ">
|
429
|
+
|
430
|
+
<div class="method-heading">
|
431
|
+
<span class="method-name">new</span><span
|
432
|
+
class="method-args">(options)</span>
|
433
|
+
<span class="method-click-advice">click to toggle source</span>
|
434
|
+
</div>
|
435
|
+
|
436
|
+
|
437
|
+
<div class="method-description">
|
438
|
+
|
439
|
+
<p>Returns a new <a href="Gngm.html">Bio::Util::Gngm</a> object.</p>
|
440
|
+
|
441
|
+
<pre class="ruby"><span class="ruby-identifier">g</span> = <span class="ruby-constant">Bio</span><span class="ruby-operator">::</span><span class="ruby-constant">Util</span><span class="ruby-operator">::</span><span class="ruby-constant">Gngm</span>.<span class="ruby-identifier">new</span>(:<span class="ruby-identifier">file</span> =<span class="ruby-operator">></span> <span class="ruby-string">"aln.sort.bam"</span>,
|
442
|
+
:<span class="ruby-identifier">format</span> =<span class="ruby-operator">></span> :<span class="ruby-identifier">bam</span>,
|
443
|
+
:<span class="ruby-identifier">samtools</span> =<span class="ruby-operator">></span> {:<span class="ruby-identifier">q</span> =<span class="ruby-operator">></span> <span class="ruby-value">20</span>, :<span class="ruby-constant">Q</span> =<span class="ruby-operator">></span> <span class="ruby-value">50</span>, :<span class="ruby-identifier">r</span> =<span class="ruby-operator">></span> <span class="ruby-string">"Chr1:1-100000"</span>},
|
444
|
+
:<span class="ruby-identifier">fasta</span> =<span class="ruby-operator">></span> <span class="ruby-string">"reference.fa"</span>
|
445
|
+
|
446
|
+
)
|
447
|
+
</pre>
|
448
|
+
|
449
|
+
<p>Required parameters and defaults:</p>
|
450
|
+
<ul><li>
|
451
|
+
<p><code>:file => nil</code> -the path to the bam file containing the
|
452
|
+
alignments, a .bai index must be present</p>
|
453
|
+
</li><li>
|
454
|
+
<p><code>:format => :bam</code> -always bam</p>
|
455
|
+
</li><li>
|
456
|
+
<p><code>:fasta => nil</code> -the path to the FASTA formatted reference
|
457
|
+
sequence</p>
|
458
|
+
</li><li>
|
459
|
+
<p><code>:samtools => {:q => 20, :Q => 50, :r =>
|
460
|
+
"Chr1:100-1100"}</code> -options for samtools, see bio-samtools
|
461
|
+
documentation for further details. The :r option is required to specify the
|
462
|
+
region of interest</p>
|
463
|
+
</li></ul>
|
464
|
+
|
465
|
+
<p>Optional parameters and defaults: Most of these are parameters for specific
|
466
|
+
methods and can be over-ridden when particular methods are called</p>
|
467
|
+
<ul><li>
|
468
|
+
<p><code>:variant_call => {:indels => false, :deletions_only =>
|
469
|
+
false, :insertions_only => false, :min_depth => 2, :max_depth =>
|
470
|
+
10000000, :mapping_quality => 10.0, :min_non_ref_count => 2,
|
471
|
+
:ignore_reference_n => true}</code> -for SNP/Indel calling only one of
|
472
|
+
<code>:indels, :deletions_only, :insertions_only</code> should be used.</p>
|
473
|
+
</li><li>
|
474
|
+
<p><code>:threads => {:start => 0.2, :stop => 1.0, :slide => 0.01,
|
475
|
+
:size => 0.1 }</code> -options for thread windows</p>
|
476
|
+
</li><li>
|
477
|
+
<p><code>:insert_size_opts => {:ref_window_size => 200,
|
478
|
+
:ref_window_slide => 50, :isize => 150}</code> -options for insert
|
479
|
+
size calculations</p>
|
480
|
+
</li><li>
|
481
|
+
<p><code>:histo_bin_width => 250000</code> -bin width for histograms of SNP
|
482
|
+
frequency</p>
|
483
|
+
</li><li>
|
484
|
+
<p><code>:graphics => {:width => 1000, :height => 500, :draw_legend
|
485
|
+
=> false, :add_boxes => nil}</code> -graphics output options,
|
486
|
+
<code>:draw_legend</code> draws a legend plot for band figures only</p>
|
487
|
+
</li><li>
|
488
|
+
<p><code>:peaks => {:sigma => 3.0, :threshold => 10.0, :background
|
489
|
+
=> false, :iterations => 13, :markov => false, :window => 3,
|
490
|
+
:range => 10000}</code> -parameters for automated peak calling,
|
491
|
+
parameters relate to R package Peaks. <code>:range</code> is the width of
|
492
|
+
the box to draw on the peak plot</p>
|
493
|
+
</li></ul>
|
494
|
+
|
495
|
+
|
496
|
+
|
497
|
+
<div class="method-source-code" id="new-source">
|
498
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 342</span>
|
499
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">options</span>)
|
500
|
+
<span class="ruby-ivar">@file</span> = <span class="ruby-keyword">nil</span>
|
501
|
+
<span class="ruby-ivar">@snp_positions</span> = <span class="ruby-keyword">nil</span>
|
502
|
+
<span class="ruby-ivar">@threads</span> = <span class="ruby-keyword">nil</span>
|
503
|
+
<span class="ruby-ivar">@densities</span> = <span class="ruby-keyword">nil</span>
|
504
|
+
<span class="ruby-ivar">@clusters</span> = <span class="ruby-keyword">nil</span>
|
505
|
+
<span class="ruby-ivar">@control_band</span> = <span class="ruby-keyword">nil</span>
|
506
|
+
<span class="ruby-ivar">@expected_band</span> = <span class="ruby-keyword">nil</span>
|
507
|
+
<span class="ruby-ivar">@signal</span> = <span class="ruby-keyword">nil</span>
|
508
|
+
<span class="ruby-ivar">@peak_indices</span> = <span class="ruby-keyword">nil</span>
|
509
|
+
<span class="ruby-ivar">@peak_y_values</span> = <span class="ruby-keyword">nil</span>
|
510
|
+
<span class="ruby-ivar">@density_max_y</span> = <span class="ruby-keyword">nil</span> <span class="ruby-comment">#the maximum y value needed to plot the entire set density plots of threads and maintain a consistent scale for plots</span>
|
511
|
+
<span class="ruby-ivar">@colours</span> = <span class="ruby-node">]#A6CEE3 #1F78B4 #B2DF8A #33A02C #FB9A99 #E31A1C #FDBF6F #FF7F00 #CAB2D6 #6A3D9A #FFFF99 #B15928]</span>
|
512
|
+
<span class="ruby-ivar">@thread_colours</span> = {}
|
513
|
+
<span class="ruby-ivar">@opts</span> = {
|
514
|
+
<span class="ruby-value">:file</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">nil</span>,
|
515
|
+
<span class="ruby-value">:format</span> =<span class="ruby-operator">></span> <span class="ruby-value">:bam</span>,
|
516
|
+
<span class="ruby-value">:fasta</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">nil</span>,
|
517
|
+
<span class="ruby-value">:samtools</span> =<span class="ruby-operator">></span> {<span class="ruby-value">:q</span> =<span class="ruby-operator">></span> <span class="ruby-value">20</span>, <span class="ruby-value">:Q</span> =<span class="ruby-operator">></span> <span class="ruby-value">50</span>},
|
518
|
+
<span class="ruby-comment">##indels = call any and only indels.. :deletions_only :insertions_only = only one tyoe</span>
|
519
|
+
<span class="ruby-comment">## some options are designed to be equivalent to vcfutils.pl from bvftools options when using vcf</span>
|
520
|
+
<span class="ruby-comment">##:min_depth (-d)</span>
|
521
|
+
<span class="ruby-comment">##:max_depth (-D)</span>
|
522
|
+
<span class="ruby-comment">##:mapping_quality (-Q) minimum RMS mappinq quality for SNPs (mq in info fields)</span>
|
523
|
+
<span class="ruby-comment">##:min_non_ref_count (-a) minimum num of alt bases ... the sum of the last two numbers in DP4 in info fields</span>
|
524
|
+
<span class="ruby-comment">##doesnt do anything with window filtering or pv values... </span>
|
525
|
+
<span class="ruby-value">:insert_size_opts</span> =<span class="ruby-operator">></span> {<span class="ruby-value">:ref_window_size</span> =<span class="ruby-operator">></span> <span class="ruby-value">200</span>, <span class="ruby-value">:ref_window_slide</span> =<span class="ruby-operator">></span> <span class="ruby-value">50</span>, <span class="ruby-value">:isize</span> =<span class="ruby-operator">></span> <span class="ruby-value">150</span>},
|
526
|
+
<span class="ruby-value">:variant_call</span> =<span class="ruby-operator">></span> {<span class="ruby-value">:indels</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>, <span class="ruby-value">:deletions_only</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>, <span class="ruby-value">:insertions_only</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>, <span class="ruby-value">:min_depth</span> =<span class="ruby-operator">></span> <span class="ruby-value">2</span>, <span class="ruby-value">:max_depth</span> =<span class="ruby-operator">></span> <span class="ruby-value">10000000</span>, <span class="ruby-value">:mapping_quality</span> =<span class="ruby-operator">></span> <span class="ruby-value">10.0</span>, <span class="ruby-value">:min_non_ref_count</span> =<span class="ruby-operator">></span> <span class="ruby-value">2</span>, <span class="ruby-value">:ignore_reference_n</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">true</span>},
|
527
|
+
<span class="ruby-value">:histo_bin_width</span> =<span class="ruby-operator">></span> <span class="ruby-value">250000</span>,
|
528
|
+
<span class="ruby-value">:graphics</span> =<span class="ruby-operator">></span> {<span class="ruby-value">:width</span> =<span class="ruby-operator">></span> <span class="ruby-value">1000</span>, <span class="ruby-value">:height</span> =<span class="ruby-operator">></span> <span class="ruby-value">500</span>, <span class="ruby-value">:draw_legend</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>, <span class="ruby-value">:add_boxes</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">nil</span>},
|
529
|
+
<span class="ruby-value">:adjust</span> =<span class="ruby-operator">></span> <span class="ruby-value">1</span>,
|
530
|
+
<span class="ruby-value">:control_chd</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.5</span>,
|
531
|
+
<span class="ruby-value">:expected_chd</span> =<span class="ruby-operator">></span> <span class="ruby-value">1.0</span>,
|
532
|
+
<span class="ruby-value">:threads</span> =<span class="ruby-operator">></span> {<span class="ruby-value">:start</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.2</span>, <span class="ruby-value">:stop</span> =<span class="ruby-operator">></span> <span class="ruby-value">1.0</span>, <span class="ruby-value">:slide</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.01</span>, <span class="ruby-value">:size</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.1</span> },
|
533
|
+
<span class="ruby-value">:peaks</span> =<span class="ruby-operator">></span> {<span class="ruby-value">:sigma</span> =<span class="ruby-operator">></span> <span class="ruby-value">3.0</span>, <span class="ruby-value">:threshold</span> =<span class="ruby-operator">></span> <span class="ruby-value">10.0</span>, <span class="ruby-value">:background</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>, <span class="ruby-value">:iterations</span> =<span class="ruby-operator">></span> <span class="ruby-value">13</span>, <span class="ruby-value">:markov</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>, <span class="ruby-value">:window</span> =<span class="ruby-operator">></span> <span class="ruby-value">3</span>, <span class="ruby-value">:range</span> =<span class="ruby-operator">></span> <span class="ruby-value">10000</span>} <span class="ruby-comment">##range is the width of the box to draw on the peak plot</span>
|
534
|
+
}
|
535
|
+
<span class="ruby-ivar">@opts</span>.<span class="ruby-identifier">merge!</span>(<span class="ruby-identifier">options</span>)
|
536
|
+
<span class="ruby-identifier">open_file</span>
|
537
|
+
<span class="ruby-keyword">end</span></pre>
|
538
|
+
</div><!-- new-source -->
|
539
|
+
|
540
|
+
</div>
|
541
|
+
|
542
|
+
|
543
|
+
|
544
|
+
|
545
|
+
</div><!-- new-method -->
|
546
|
+
|
547
|
+
|
548
|
+
</section><!-- public-class-method-details -->
|
549
|
+
|
550
|
+
<section id="public-instance-5Buntitled-5D-method-details" class="method-section section">
|
551
|
+
<h3 class="section-header">Public Instance Methods</h3>
|
552
|
+
|
553
|
+
|
554
|
+
<div id="method-i-calculate_clusters" class="method-detail ">
|
555
|
+
|
556
|
+
<div class="method-heading">
|
557
|
+
<span class="method-name">calculate_clusters</span><span
|
558
|
+
class="method-args">( opts={} )</span>
|
559
|
+
<span class="method-click-advice">click to toggle source</span>
|
560
|
+
</div>
|
561
|
+
|
562
|
+
|
563
|
+
<div class="method-description">
|
564
|
+
|
565
|
+
<p>Calculates the k-means clusters of density curves (groups threads into
|
566
|
+
bands), [density curve y values] ]</tt> Calculates the clusters using
|
567
|
+
the R function +kmeans()+ Recalculates @densities as it does with <a
|
568
|
+
href="Gngm.html#method-i-calculate_densities">#calculate_densities</a>, so
|
569
|
+
clustering can be done without having to explicitly call <a
|
570
|
+
href="Gngm.html#method-i-calculate_densities">#calculate_densities</a>.
|
571
|
+
Clusters are recalulated every time regardless of whether its been done
|
572
|
+
before contains anything or not so is useful for trying out different
|
573
|
+
values for the parameters. When clusters are calculated the expected and
|
574
|
+
control bands are compared with the <a
|
575
|
+
href="Gngm.html#method-i-calculate_signal">#calculate_signal</a> method and
|
576
|
+
the @signal array populated. Resets the instance variables @control_band,
|
577
|
+
@expected_band, @signal, @peak_indices, @peak_y_values and @clusters</p>
|
578
|
+
|
579
|
+
<p>Options and defaults</p>
|
580
|
+
<ul><li>
|
581
|
+
<p><code>:k => 9</code>, -the number of clusters for the R
|
582
|
+
<code>kmeans</code> function</p>
|
583
|
+
</li><li>
|
584
|
+
<p><code>:seed => false</code> -set this to a number to make the randomized
|
585
|
+
clustering reproducible</p>
|
586
|
+
</li><li>
|
587
|
+
<p><code>:control_chd => 0.5</code> -the value of the control thread/window</p>
|
588
|
+
</li><li>
|
589
|
+
<p><code>:expected_chd => 1.0</code> -the value of the expected
|
590
|
+
thread/window</p>
|
591
|
+
</li><li>
|
592
|
+
<p><code>:adjust => 1.0</code> -the kernel adjustment parameter for the R
|
593
|
+
<code>density</code> function</p>
|
594
|
+
</li><li>
|
595
|
+
<p><code>:pseudo => false</code> - force the densities into a single thread
|
596
|
+
cluster when the number of distinct threads with SNPs is < the value of
|
597
|
+
k. This is only useful in a situation where the spread of the statistic is
|
598
|
+
very limited. EG for using mapped/unmapped mate pairs then almost all
|
599
|
+
windows will have proportion 1.0 but a tiny number will be close to 0.5
|
600
|
+
with few other values considered.</p>
|
601
|
+
</li></ul>
|
602
|
+
|
603
|
+
|
604
|
+
|
605
|
+
<div class="method-source-code" id="calculate_clusters-source">
|
606
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 764</span>
|
607
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">calculate_clusters</span>( <span class="ruby-identifier">opts</span>={} )
|
608
|
+
<span class="ruby-identifier">options</span> = {<span class="ruby-value">:k</span> =<span class="ruby-operator">></span> <span class="ruby-value">9</span>, <span class="ruby-value">:seed</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>, <span class="ruby-value">:adjust</span> =<span class="ruby-operator">></span> <span class="ruby-value">1</span>, <span class="ruby-value">:control_chd</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.5</span>, <span class="ruby-value">:expected_chd</span> =<span class="ruby-operator">></span> <span class="ruby-value">1.0</span>, <span class="ruby-value">:pseudo</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>}
|
609
|
+
<span class="ruby-identifier">options</span> = <span class="ruby-identifier">options</span>.<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">opts</span>)
|
610
|
+
<span class="ruby-keyword">if</span> <span class="ruby-identifier">options</span>[<span class="ruby-value">:pseudo</span>]
|
611
|
+
<span class="ruby-identifier">put_threads_into_individual_clusters</span>(<span class="ruby-identifier">options</span>)
|
612
|
+
<span class="ruby-keyword">return</span>
|
613
|
+
<span class="ruby-keyword">end</span>
|
614
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
615
|
+
<span class="ruby-identifier">names</span> = []
|
616
|
+
<span class="ruby-identifier">name</span> = <span class="ruby-string">"a"</span>
|
617
|
+
<span class="ruby-ivar">@control_band</span> = <span class="ruby-keyword">nil</span> <span class="ruby-comment">#needs resetting as we are working with new clusters</span>
|
618
|
+
<span class="ruby-ivar">@expected_band</span> = <span class="ruby-keyword">nil</span> <span class="ruby-comment">#needs resetting as we are working with new clusters</span>
|
619
|
+
<span class="ruby-ivar">@signal</span> = <span class="ruby-keyword">nil</span> <span class="ruby-comment">#needs resetting as we are working with new clusters</span>
|
620
|
+
<span class="ruby-ivar">@peak_indices</span> = <span class="ruby-keyword">nil</span> <span class="ruby-comment">#needs resetting as we are working with new cluster</span>
|
621
|
+
<span class="ruby-ivar">@peak_y_values</span> = <span class="ruby-keyword">nil</span> <span class="ruby-comment">#needs resetting as we are working with new cluster</span>
|
622
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">calculate_densities</span>(<span class="ruby-identifier">options</span>[<span class="ruby-value">:adjust</span>]).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">d</span><span class="ruby-operator">|</span>
|
623
|
+
<span class="ruby-identifier">density_array</span> = <span class="ruby-identifier">d</span>.<span class="ruby-identifier">last</span>
|
624
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">assign</span> <span class="ruby-identifier">name</span>, <span class="ruby-identifier">density_array</span> <span class="ruby-comment">##although windows go in in numeric order, r wont allow numbers as names in data frames so we need a proxy</span>
|
625
|
+
<span class="ruby-identifier">names</span> <span class="ruby-operator"><<</span> <span class="ruby-node">"#{name}=#{name}"</span>
|
626
|
+
<span class="ruby-identifier">name</span> = <span class="ruby-identifier">name</span>.<span class="ruby-identifier">next</span>
|
627
|
+
<span class="ruby-keyword">end</span>
|
628
|
+
<span class="ruby-identifier">data_frame_command</span> = <span class="ruby-string">"data = data.frame("</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">names</span>.<span class="ruby-identifier">join</span>(<span class="ruby-string">","</span>) <span class="ruby-operator">+</span> <span class="ruby-string">")"</span>
|
629
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-identifier">data_frame_command</span>
|
630
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"set.seed(#{options[:seed]})"</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">options</span>[<span class="ruby-value">:seed</span>]
|
631
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"k = kmeans(cor(data),#{options[:k]},nstart=1000)"</span>
|
632
|
+
<span class="ruby-ivar">@clusters</span> = <span class="ruby-identifier">r</span>.<span class="ruby-identifier">pull</span> <span class="ruby-string">"k$cluster"</span> <span class="ruby-comment">##clusters are returned in the order in densities</span>
|
633
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">quit</span>
|
634
|
+
<span class="ruby-comment">##now set the cluster colours.. </span>
|
635
|
+
<span class="ruby-identifier">colours</span> = <span class="ruby-node">]#A6CEE3 #1F78B4 #B2DF8A #33A02C #FB9A99 #E31A1C #FDBF6F #FF7F00 #CAB2D6 #6A3D9A #FFFF99 #B15928]</span>
|
636
|
+
<span class="ruby-identifier">ci</span> = <span class="ruby-value">0</span>
|
637
|
+
<span class="ruby-identifier">col_nums</span> = {} <span class="ruby-comment">##hash of cluster numbers and colours</span>
|
638
|
+
<span class="ruby-ivar">@clusters</span>.<span class="ruby-identifier">each_index</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">i</span><span class="ruby-operator">|</span>
|
639
|
+
<span class="ruby-keyword">if</span> <span class="ruby-keyword">not</span> <span class="ruby-identifier">col_nums</span>[<span class="ruby-ivar">@clusters</span>[<span class="ruby-identifier">i</span>]]
|
640
|
+
<span class="ruby-identifier">col_nums</span>[<span class="ruby-ivar">@clusters</span>[<span class="ruby-identifier">i</span>]] = <span class="ruby-identifier">colours</span>[<span class="ruby-identifier">ci</span>]
|
641
|
+
<span class="ruby-identifier">ci</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
|
642
|
+
<span class="ruby-identifier">ci</span> = <span class="ruby-value">0</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">ci</span> <span class="ruby-operator">></span> <span class="ruby-value">11</span>
|
643
|
+
<span class="ruby-keyword">end</span>
|
644
|
+
<span class="ruby-ivar">@thread_colours</span>[<span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>[<span class="ruby-identifier">i</span>].<span class="ruby-identifier">first</span>] = <span class="ruby-identifier">col_nums</span>[<span class="ruby-ivar">@clusters</span>[<span class="ruby-identifier">i</span>]]
|
645
|
+
<span class="ruby-keyword">end</span>
|
646
|
+
<span class="ruby-ivar">@control_band</span> = <span class="ruby-identifier">get_band</span>(<span class="ruby-identifier">options</span>[<span class="ruby-value">:control_chd</span>])
|
647
|
+
<span class="ruby-ivar">@expected_band</span> = <span class="ruby-identifier">get_band</span>(<span class="ruby-identifier">options</span>[<span class="ruby-value">:expected_chd</span>])
|
648
|
+
<span class="ruby-identifier">calculate_signal</span>
|
649
|
+
<span class="ruby-keyword">end</span></pre>
|
650
|
+
</div><!-- calculate_clusters-source -->
|
651
|
+
|
652
|
+
</div>
|
653
|
+
|
654
|
+
|
655
|
+
|
656
|
+
|
657
|
+
</div><!-- calculate_clusters-method -->
|
658
|
+
|
659
|
+
|
660
|
+
<div id="method-i-calculate_densities" class="method-detail ">
|
661
|
+
|
662
|
+
<div class="method-heading">
|
663
|
+
<span class="method-name">calculate_densities</span><span
|
664
|
+
class="method-args">(adjust=1)</span>
|
665
|
+
<span class="method-click-advice">click to toggle source</span>
|
666
|
+
</div>
|
667
|
+
|
668
|
+
|
669
|
+
<div class="method-description">
|
670
|
+
|
671
|
+
<p>Sets and returns the array of arrays <code>[window, [density curve x
|
672
|
+
values], [density curve y values] ]</code> Calculates the density curve
|
673
|
+
using the R function +density()+ Always sets @densities regardless of
|
674
|
+
whether it contains anything or not so is useful for trying out adjustment
|
675
|
+
values. Ignores threads with fewer than 2 polymorphisms since density can’t
|
676
|
+
be computed with so few polymorphisms.</p>
|
677
|
+
|
678
|
+
<p>Options and defaults</p>
|
679
|
+
<ul><li>
|
680
|
+
<p><code>adjust = 1</code>, -the kernel adjustment parameter for the R
|
681
|
+
<code>density</code> function</p>
|
682
|
+
</li></ul>
|
683
|
+
|
684
|
+
|
685
|
+
|
686
|
+
<div class="method-source-code" id="calculate_densities-source">
|
687
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 679</span>
|
688
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">calculate_densities</span>(<span class="ruby-identifier">adjust</span>=<span class="ruby-value">1</span>)
|
689
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
690
|
+
<span class="ruby-identifier">densities</span> = []
|
691
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">threads</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">t</span><span class="ruby-operator">|</span>
|
692
|
+
<span class="ruby-keyword">next</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">t</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator"><</span> <span class="ruby-value">2</span> <span class="ruby-comment">##length of density array is smaller or == threads, since too small windows are ignored...</span>
|
693
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">curr_win</span> = <span class="ruby-identifier">t</span>.<span class="ruby-identifier">last</span>
|
694
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"d = density(curr_win,n=240,kernel=\"gaussian\", from=#{@snp_positions.first[0]}, to=#{@snp_positions.last[0]}, adjust=#{adjust})"</span>
|
695
|
+
<span class="ruby-identifier">densities</span> <span class="ruby-operator"><<</span> [<span class="ruby-identifier">t</span>.<span class="ruby-identifier">first</span>, <span class="ruby-identifier">r</span>.<span class="ruby-identifier">pull</span>(<span class="ruby-string">"d$x"</span>), <span class="ruby-identifier">r</span>.<span class="ruby-identifier">pull</span>(<span class="ruby-string">"d$y"</span>)]
|
696
|
+
<span class="ruby-keyword">end</span>
|
697
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">quit</span>
|
698
|
+
<span class="ruby-ivar">@densities</span> = <span class="ruby-identifier">densities</span>
|
699
|
+
<span class="ruby-identifier">calculate_density_max_y</span> <span class="ruby-comment">##need to re-do every time we get new densities</span>
|
700
|
+
<span class="ruby-identifier">densities</span>
|
701
|
+
<span class="ruby-keyword">end</span></pre>
|
702
|
+
</div><!-- calculate_densities-source -->
|
703
|
+
|
704
|
+
</div>
|
705
|
+
|
706
|
+
|
707
|
+
|
708
|
+
|
709
|
+
</div><!-- calculate_densities-method -->
|
710
|
+
|
711
|
+
|
712
|
+
<div id="method-i-calculate_signal" class="method-detail ">
|
713
|
+
|
714
|
+
<div class="method-heading">
|
715
|
+
<span class="method-name">calculate_signal</span><span
|
716
|
+
class="method-args">()</span>
|
717
|
+
<span class="method-click-advice">click to toggle source</span>
|
718
|
+
</div>
|
719
|
+
|
720
|
+
|
721
|
+
<div class="method-description">
|
722
|
+
|
723
|
+
<p>Returns an array of values representing the ratio of average of the
|
724
|
+
expected threads/windows to the control threads/windows. Sets @signal, the
|
725
|
+
signal curve.</p>
|
726
|
+
|
727
|
+
|
728
|
+
|
729
|
+
<div class="method-source-code" id="calculate_signal-source">
|
730
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 969</span>
|
731
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">calculate_signal</span>
|
732
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
733
|
+
<span class="ruby-identifier">name</span> = <span class="ruby-string">"a"</span>
|
734
|
+
<span class="ruby-identifier">control_names</span> = []
|
735
|
+
<span class="ruby-identifier">expected_names</span> = []
|
736
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">d</span><span class="ruby-operator">|</span>
|
737
|
+
<span class="ruby-keyword">if</span> <span class="ruby-ivar">@control_band</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-identifier">d</span>.<span class="ruby-identifier">first</span>)
|
738
|
+
<span class="ruby-identifier">density_array</span> = <span class="ruby-identifier">d</span>.<span class="ruby-identifier">last</span>
|
739
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">assign</span> <span class="ruby-identifier">name</span>, <span class="ruby-identifier">density_array</span> <span class="ruby-comment">##although windows go in in numeric order, r wont allow numbers as names in data frames so we need a proxy</span>
|
740
|
+
<span class="ruby-identifier">control_names</span> <span class="ruby-operator"><<</span> <span class="ruby-node">"#{name}=#{name}"</span>
|
741
|
+
<span class="ruby-keyword">elsif</span> <span class="ruby-ivar">@expected_band</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-identifier">d</span>.<span class="ruby-identifier">first</span>)
|
742
|
+
<span class="ruby-identifier">density_array</span> = <span class="ruby-identifier">d</span>.<span class="ruby-identifier">last</span>
|
743
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">assign</span> <span class="ruby-identifier">name</span>, <span class="ruby-identifier">density_array</span>
|
744
|
+
<span class="ruby-identifier">expected_names</span> <span class="ruby-operator"><<</span> <span class="ruby-node">"#{name}=#{name}"</span>
|
745
|
+
<span class="ruby-keyword">end</span>
|
746
|
+
<span class="ruby-identifier">name</span> = <span class="ruby-identifier">name</span>.<span class="ruby-identifier">next</span>
|
747
|
+
<span class="ruby-keyword">end</span>
|
748
|
+
<span class="ruby-identifier">data_frame_command</span> = <span class="ruby-string">"control = data.frame("</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">control_names</span>.<span class="ruby-identifier">join</span>(<span class="ruby-string">","</span>) <span class="ruby-operator">+</span> <span class="ruby-string">")"</span>
|
749
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-identifier">data_frame_command</span>
|
750
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"control_mean = apply(control, 1, function(ecks) mean((as.numeric(ecks))) )"</span>
|
751
|
+
<span class="ruby-identifier">data_frame_command</span> = <span class="ruby-string">"expected = data.frame("</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">expected_names</span>.<span class="ruby-identifier">join</span>(<span class="ruby-string">","</span>) <span class="ruby-operator">+</span> <span class="ruby-string">")"</span>
|
752
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-identifier">data_frame_command</span>
|
753
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"expected_mean = apply(expected, 1, function(ecks) mean((as.numeric(ecks))) )"</span>
|
754
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"signal = expected_mean / control_mean"</span>
|
755
|
+
<span class="ruby-identifier">signal</span> = <span class="ruby-identifier">r</span>.<span class="ruby-identifier">pull</span> <span class="ruby-string">"signal"</span>
|
756
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">quit</span>
|
757
|
+
<span class="ruby-ivar">@signal</span> = <span class="ruby-identifier">signal</span>
|
758
|
+
<span class="ruby-keyword">end</span></pre>
|
759
|
+
</div><!-- calculate_signal-source -->
|
760
|
+
|
761
|
+
</div>
|
762
|
+
|
763
|
+
|
764
|
+
|
765
|
+
|
766
|
+
</div><!-- calculate_signal-method -->
|
767
|
+
|
768
|
+
|
769
|
+
<div id="method-i-close" class="method-detail ">
|
770
|
+
|
771
|
+
<div class="method-heading">
|
772
|
+
<span class="method-name">close</span><span
|
773
|
+
class="method-args">()</span>
|
774
|
+
<span class="method-click-advice">click to toggle source</span>
|
775
|
+
</div>
|
776
|
+
|
777
|
+
|
778
|
+
<div class="method-description">
|
779
|
+
|
780
|
+
<p>for BAM files calls Bio::DB::Sam#close to close the connections to input
|
781
|
+
files safely</p>
|
782
|
+
|
783
|
+
|
784
|
+
|
785
|
+
<div class="method-source-code" id="close-source">
|
786
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 399</span>
|
787
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">close</span>
|
788
|
+
<span class="ruby-keyword">case</span> <span class="ruby-ivar">@opts</span>[<span class="ruby-value">:format</span>]
|
789
|
+
<span class="ruby-keyword">when</span> <span class="ruby-value">:bam</span> <span class="ruby-keyword">then</span> <span class="ruby-ivar">@file</span>.<span class="ruby-identifier">close</span>
|
790
|
+
<span class="ruby-keyword">end</span>
|
791
|
+
<span class="ruby-keyword">end</span></pre>
|
792
|
+
</div><!-- close-source -->
|
793
|
+
|
794
|
+
</div>
|
795
|
+
|
796
|
+
|
797
|
+
|
798
|
+
|
799
|
+
</div><!-- close-method -->
|
800
|
+
|
801
|
+
|
802
|
+
<div id="method-i-clusters" class="method-detail ">
|
803
|
+
|
804
|
+
<div class="method-heading">
|
805
|
+
<span class="method-name">clusters</span><span
|
806
|
+
class="method-args">(opts={})</span>
|
807
|
+
<span class="method-click-advice">click to toggle source</span>
|
808
|
+
</div>
|
809
|
+
|
810
|
+
|
811
|
+
<div class="method-description">
|
812
|
+
|
813
|
+
<p>Returns the array instance variable @clusters. The R function +kmeans()+ is
|
814
|
+
used to calculate the clusters based on a correlation matrix of the density
|
815
|
+
curves. If @clusters is nil when called this method will run the <a
|
816
|
+
href="Gngm.html#method-i-calculate_clusters">#calculate_clusters</a> method
|
817
|
+
and set @clusters With this method you cannot recalculate the clusters
|
818
|
+
after they have been done once.</p>
|
819
|
+
|
820
|
+
<p>Options and defaults</p>
|
821
|
+
<ul><li>
|
822
|
+
<p><code>:k => 9</code>, -the number of clusters for the R
|
823
|
+
<code>kmeans</code> function</p>
|
824
|
+
</li><li>
|
825
|
+
<p><code>:seed => false</code> -set this to a number to make the randomized
|
826
|
+
clustering reproducible</p>
|
827
|
+
</li><li>
|
828
|
+
<p><code>:control_chd => 0.5</code> -the value of the control thread/window</p>
|
829
|
+
</li><li>
|
830
|
+
<p><code>:expected_chd => 1.0</code> -the value of the expected
|
831
|
+
thread/window</p>
|
832
|
+
</li><li>
|
833
|
+
<p><code>:adjust => 1.0</code> -the kernel adjustment parameter for the R
|
834
|
+
<code>density</code> function</p>
|
835
|
+
</li></ul>
|
836
|
+
|
837
|
+
|
838
|
+
|
839
|
+
<div class="method-source-code" id="clusters-source">
|
840
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 748</span>
|
841
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">clusters</span>(<span class="ruby-identifier">opts</span>={})
|
842
|
+
<span class="ruby-ivar">@clusters</span> <span class="ruby-operator">||=</span> <span class="ruby-identifier">calculate_clusters</span>(<span class="ruby-identifier">opts</span>={})
|
843
|
+
<span class="ruby-keyword">end</span></pre>
|
844
|
+
</div><!-- clusters-source -->
|
845
|
+
|
846
|
+
</div>
|
847
|
+
|
848
|
+
|
849
|
+
|
850
|
+
|
851
|
+
</div><!-- clusters-method -->
|
852
|
+
|
853
|
+
|
854
|
+
<div id="method-i-collect_threads" class="method-detail ">
|
855
|
+
|
856
|
+
<div class="method-heading">
|
857
|
+
<span class="method-name">collect_threads</span><span
|
858
|
+
class="method-args">(opts=@opts[:threads])</span>
|
859
|
+
<span class="method-click-advice">click to toggle source</span>
|
860
|
+
</div>
|
861
|
+
|
862
|
+
|
863
|
+
<div class="method-description">
|
864
|
+
|
865
|
+
<p>Resets contents of instance variable @threads and returns an array of
|
866
|
+
arrays <code>[[window 1, snp position 1, snp position 2 ... snp position
|
867
|
+
n],[window 2, snp position 1, snp position 2 ... snp position n] ]</code>.
|
868
|
+
Always sets @threads regardless of whether it contains anything or not so
|
869
|
+
is useful for trying out different window sizes etc</p>
|
870
|
+
|
871
|
+
<p>Options and defaults:</p>
|
872
|
+
<ul><li>
|
873
|
+
<p><code>:start => 0.2</code> -first window</p>
|
874
|
+
</li><li>
|
875
|
+
<p><code>:stop => 1.0</code> -last window</p>
|
876
|
+
</li><li>
|
877
|
+
<p><code>:slide => 0.01</code> -distance between windows</p>
|
878
|
+
</li><li>
|
879
|
+
<p><code>:size => 0.1</code> -window width</p>
|
880
|
+
</li></ul>
|
881
|
+
|
882
|
+
|
883
|
+
|
884
|
+
<div class="method-source-code" id="collect_threads-source">
|
885
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 597</span>
|
886
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">collect_threads</span>(<span class="ruby-identifier">opts</span>=<span class="ruby-ivar">@opts</span>[<span class="ruby-value">:threads</span>])
|
887
|
+
<span class="ruby-identifier">opts</span>[<span class="ruby-value">:slide</span>] = <span class="ruby-value">0.000001</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">opts</span>[<span class="ruby-value">:slide</span>] <span class="ruby-operator"><</span> <span class="ruby-value">0.000001</span> <span class="ruby-comment">##to allow for the rounding error in the step function... </span>
|
888
|
+
<span class="ruby-identifier">raise</span> <span class="ruby-constant">RuntimeError</span>, <span class="ruby-string">"snp positions have not been calculated yet"</span> <span class="ruby-keyword">if</span> <span class="ruby-keyword">not</span> <span class="ruby-ivar">@snp_positions</span>
|
889
|
+
<span class="ruby-identifier">start</span>,<span class="ruby-identifier">stop</span>,<span class="ruby-identifier">slide</span>,<span class="ruby-identifier">size</span> = <span class="ruby-identifier">opts</span>[<span class="ruby-value">:start</span>].<span class="ruby-identifier">to_f</span>, <span class="ruby-identifier">opts</span>[<span class="ruby-value">:stop</span>].<span class="ruby-identifier">to_f</span>, <span class="ruby-identifier">opts</span>[<span class="ruby-value">:slide</span>].<span class="ruby-identifier">to_f</span>, <span class="ruby-identifier">opts</span>[<span class="ruby-value">:size</span>].<span class="ruby-identifier">to_f</span>
|
890
|
+
<span class="ruby-identifier">arr</span> = []
|
891
|
+
(<span class="ruby-identifier">start</span><span class="ruby-operator">..</span><span class="ruby-identifier">stop</span>).<span class="ruby-identifier">step</span>(<span class="ruby-identifier">slide</span>) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">win</span><span class="ruby-operator">|</span>
|
892
|
+
<span class="ruby-identifier">arr</span> <span class="ruby-operator"><<</span> [<span class="ruby-identifier">win</span>, <span class="ruby-ivar">@snp_positions</span>.<span class="ruby-identifier">select</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>.<span class="ruby-identifier">last</span> <span class="ruby-operator">>=</span> <span class="ruby-identifier">win</span> <span class="ruby-keyword">and</span> <span class="ruby-identifier">x</span>.<span class="ruby-identifier">last</span> <span class="ruby-operator"><</span> <span class="ruby-identifier">win</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">size</span> }.<span class="ruby-identifier">collect</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">y</span><span class="ruby-operator">|</span> <span class="ruby-identifier">y</span>.<span class="ruby-identifier">first</span>} ]
|
893
|
+
<span class="ruby-keyword">end</span>
|
894
|
+
<span class="ruby-ivar">@threads</span> = <span class="ruby-identifier">arr</span>
|
895
|
+
<span class="ruby-keyword">end</span></pre>
|
896
|
+
</div><!-- collect_threads-source -->
|
897
|
+
|
898
|
+
</div>
|
899
|
+
|
900
|
+
|
901
|
+
|
902
|
+
|
903
|
+
</div><!-- collect_threads-method -->
|
904
|
+
|
905
|
+
|
906
|
+
<div id="method-i-densities" class="method-detail ">
|
907
|
+
|
908
|
+
<div class="method-heading">
|
909
|
+
<span class="method-name">densities</span><span
|
910
|
+
class="method-args">(adjust=1)</span>
|
911
|
+
<span class="method-click-advice">click to toggle source</span>
|
912
|
+
</div>
|
913
|
+
|
914
|
+
|
915
|
+
<div class="method-description">
|
916
|
+
|
917
|
+
<p>Returns the instance variable @densities array of arrays <code>[window,
|
918
|
+
[density curve x values], [density curve y values] ]</code>. The R function
|
919
|
+
+density()+ is used to calculate the values. If @densities is nil when
|
920
|
+
called this method will run the <a
|
921
|
+
href="Gngm.html#method-i-calculate_densities">#calculate_densities</a>
|
922
|
+
method and set @densities With this method you cannot recalculate the
|
923
|
+
densities after they have been done once.</p>
|
924
|
+
|
925
|
+
<p>Options and defaults</p>
|
926
|
+
<ul><li>
|
927
|
+
<p><code>adjust = 1</code>, -the kernel adjustment parameter for the R
|
928
|
+
<code>density</code> function</p>
|
929
|
+
</li></ul>
|
930
|
+
|
931
|
+
|
932
|
+
|
933
|
+
<div class="method-source-code" id="densities-source">
|
934
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 669</span>
|
935
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">densities</span>(<span class="ruby-identifier">adjust</span>=<span class="ruby-value">1</span>)
|
936
|
+
<span class="ruby-ivar">@densities</span> <span class="ruby-operator">||=</span> <span class="ruby-identifier">calculate_densities</span>(<span class="ruby-identifier">adjust</span>)
|
937
|
+
<span class="ruby-keyword">end</span></pre>
|
938
|
+
</div><!-- densities-source -->
|
939
|
+
|
940
|
+
</div>
|
941
|
+
|
942
|
+
|
943
|
+
|
944
|
+
|
945
|
+
</div><!-- densities-method -->
|
946
|
+
|
947
|
+
|
948
|
+
<div id="method-i-draw_bands" class="method-detail ">
|
949
|
+
|
950
|
+
<div class="method-heading">
|
951
|
+
<span class="method-name">draw_bands</span><span
|
952
|
+
class="method-args">(file="myfile.png", optsa={})</span>
|
953
|
+
<span class="method-click-advice">click to toggle source</span>
|
954
|
+
</div>
|
955
|
+
|
956
|
+
|
957
|
+
<div class="method-description">
|
958
|
+
|
959
|
+
<p>Draws the clustered bands that correspond to the expected and control
|
960
|
+
window in a single PNG file <code>file</code></p>
|
961
|
+
|
962
|
+
<p>Options and defaults</p>
|
963
|
+
<ul><li>
|
964
|
+
<p><code>:add_lines => nil</code> -if an array of positions is provided eg
|
965
|
+
+[100,345] , vertical lines will be drawn at these positions. Useful for
|
966
|
+
indicating feature positions on the plot</p>
|
967
|
+
</li><li>
|
968
|
+
<p><code>:width => 1000</code> -width of the PNG in pixels</p>
|
969
|
+
</li><li>
|
970
|
+
<p><code>:height => 500</code> -height of the PNG in pixels</p>
|
971
|
+
</li></ul>
|
972
|
+
|
973
|
+
|
974
|
+
|
975
|
+
<div class="method-source-code" id="draw_bands-source">
|
976
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 702</span>
|
977
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">draw_bands</span>(<span class="ruby-identifier">file</span>=<span class="ruby-string">"myfile.png"</span>, <span class="ruby-identifier">optsa</span>={})
|
978
|
+
<span class="ruby-identifier">opts</span> = <span class="ruby-ivar">@opts</span>[<span class="ruby-value">:graphics</span>].<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">optsa</span>)
|
979
|
+
<span class="ruby-identifier">pp</span> <span class="ruby-identifier">optsa</span>
|
980
|
+
<span class="ruby-identifier">raise</span> <span class="ruby-constant">RuntimeError</span>, <span class="ruby-string">"Can't draw threads until clustering is done"</span> <span class="ruby-keyword">unless</span> <span class="ruby-ivar">@clusters</span>
|
981
|
+
<span class="ruby-comment">#uses R's standard plot functions.</span>
|
982
|
+
<span class="ruby-comment">##same as draw_threads, but skips threads that aren't on the bands lists</span>
|
983
|
+
<span class="ruby-comment">## </span>
|
984
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
985
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"png('#{file}', width=#{opts[:width]}, height=#{opts[:height]})"</span>
|
986
|
+
<span class="ruby-identifier">plot_open</span> = <span class="ruby-keyword">false</span>
|
987
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">t</span><span class="ruby-operator">|</span>
|
988
|
+
<span class="ruby-keyword">if</span> <span class="ruby-ivar">@control_band</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-identifier">t</span>[<span class="ruby-value">0</span>]) <span class="ruby-keyword">or</span> <span class="ruby-ivar">@expected_band</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-identifier">t</span>[<span class="ruby-value">0</span>])
|
989
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">dx</span> = <span class="ruby-identifier">t</span>[<span class="ruby-value">1</span>]
|
990
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">dy</span> = <span class="ruby-identifier">t</span>[<span class="ruby-value">2</span>]
|
991
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">curr_win</span> = <span class="ruby-identifier">t</span>.<span class="ruby-identifier">last</span>
|
992
|
+
<span class="ruby-comment">#r.eval "d = density(curr_win,n=240,kernel=\"gaussian\", from=#{@snp_positions.first[0]}, to=#{@snp_positions.last[0]})"</span>
|
993
|
+
<span class="ruby-keyword">if</span> <span class="ruby-identifier">plot_open</span>
|
994
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"lines(dx, dy, col=\"#{@thread_colours[t.first]}\")"</span>
|
995
|
+
<span class="ruby-keyword">else</span>
|
996
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"plot(dx, dy, type=\"l\", col=\"#{@thread_colours[t.first]}\",ylim=c(0,#{density_max_y}), main='#{file}',xlab='position', ylab='density')"</span>
|
997
|
+
<span class="ruby-identifier">plot_open</span> = <span class="ruby-keyword">true</span>
|
998
|
+
<span class="ruby-keyword">end</span>
|
999
|
+
<span class="ruby-keyword">end</span>
|
1000
|
+
<span class="ruby-keyword">end</span>
|
1001
|
+
<span class="ruby-identifier">label1</span> = <span class="ruby-string">"Control band: "</span> <span class="ruby-operator">+</span> <span class="ruby-ivar">@control_band</span>.<span class="ruby-identifier">min</span>.<span class="ruby-identifier">to_s</span> <span class="ruby-operator">+</span> <span class="ruby-string">" < ChD < "</span> <span class="ruby-operator">+</span> <span class="ruby-ivar">@control_band</span>.<span class="ruby-identifier">max</span>.<span class="ruby-identifier">to_s</span>
|
1002
|
+
<span class="ruby-identifier">label2</span> = <span class="ruby-string">"Expected band: "</span> <span class="ruby-operator">+</span> <span class="ruby-ivar">@expected_band</span>.<span class="ruby-identifier">min</span>.<span class="ruby-identifier">to_s</span> <span class="ruby-operator">+</span> <span class="ruby-string">" < ChD < "</span> <span class="ruby-operator">+</span> <span class="ruby-ivar">@expected_band</span>.<span class="ruby-identifier">max</span>.<span class="ruby-identifier">to_s</span>
|
1003
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"legend('top', c('#{label1}','#{label2}'), lty=c(1,1),lwd=c(2.5,2.5),col=c('#{@thread_colours[@control_band.first]}','#{@thread_colours[@expected_band.first]}'))"</span>
|
1004
|
+
<span class="ruby-keyword">if</span> <span class="ruby-identifier">opts</span>[<span class="ruby-value">:add_lines</span>] <span class="ruby-keyword">and</span> <span class="ruby-identifier">opts</span>[<span class="ruby-value">:add_lines</span>].<span class="ruby-identifier">instance_of?</span>(<span class="ruby-constant">Array</span>)
|
1005
|
+
<span class="ruby-identifier">opts</span>[<span class="ruby-value">:add_lines</span>].<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">pos</span><span class="ruby-operator">|</span>
|
1006
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"abline(v=#{pos})"</span>
|
1007
|
+
<span class="ruby-keyword">end</span>
|
1008
|
+
<span class="ruby-keyword">end</span>
|
1009
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"dev.off()"</span>
|
1010
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">quit</span>
|
1011
|
+
<span class="ruby-keyword">end</span></pre>
|
1012
|
+
</div><!-- draw_bands-source -->
|
1013
|
+
|
1014
|
+
</div>
|
1015
|
+
|
1016
|
+
|
1017
|
+
|
1018
|
+
|
1019
|
+
</div><!-- draw_bands-method -->
|
1020
|
+
|
1021
|
+
|
1022
|
+
<div id="method-i-draw_hit_count" class="method-detail ">
|
1023
|
+
|
1024
|
+
<div class="method-heading">
|
1025
|
+
<span class="method-name">draw_hit_count</span><span
|
1026
|
+
class="method-args">(file="myfile.png",opts=@opts[:graphics])</span>
|
1027
|
+
<span class="method-click-advice">click to toggle source</span>
|
1028
|
+
</div>
|
1029
|
+
|
1030
|
+
|
1031
|
+
<div class="method-description">
|
1032
|
+
|
1033
|
+
<p>Draws a barplot of the number of polymorphisms in each thread/window in a
|
1034
|
+
single PNG file <code>file</code></p>
|
1035
|
+
|
1036
|
+
|
1037
|
+
|
1038
|
+
<div class="method-source-code" id="draw_hit_count-source">
|
1039
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 948</span>
|
1040
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">draw_hit_count</span>(<span class="ruby-identifier">file</span>=<span class="ruby-string">"myfile.png"</span>,<span class="ruby-identifier">opts</span>=<span class="ruby-ivar">@opts</span>[<span class="ruby-value">:graphics</span>])
|
1041
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
1042
|
+
<span class="ruby-identifier">wins</span> = []
|
1043
|
+
<span class="ruby-identifier">hits</span> = []
|
1044
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">threads</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">thread</span><span class="ruby-operator">|</span>
|
1045
|
+
<span class="ruby-identifier">wins</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">thread</span>.<span class="ruby-identifier">first</span>
|
1046
|
+
<span class="ruby-keyword">if</span> <span class="ruby-identifier">thread</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">empty?</span>
|
1047
|
+
<span class="ruby-identifier">hits</span> <span class="ruby-operator"><<</span> <span class="ruby-value">0.01</span> <span class="ruby-comment">##pseudovalue gets around the case where a thread has no hits... which messes up barplot in R</span>
|
1048
|
+
<span class="ruby-keyword">else</span>
|
1049
|
+
<span class="ruby-identifier">hits</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">thread</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">length</span>
|
1050
|
+
<span class="ruby-keyword">end</span>
|
1051
|
+
<span class="ruby-keyword">end</span>
|
1052
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">wins</span> = <span class="ruby-identifier">wins</span>
|
1053
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">hits</span> = <span class="ruby-identifier">hits</span>
|
1054
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"png('#{file}', width=#{opts[:width]}, height=#{opts[:height]})"</span>
|
1055
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"barplot(hits, names.arg=c(wins), xlab='window', log='y', ylab='number of hits', main='Number of Polymorphisms #{file}', col=rgb(r=0,g=1,b=1, alpha=0.3), na.rm = TRUE)"</span>
|
1056
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"dev.off()"</span>
|
1057
|
+
<span class="ruby-keyword">end</span></pre>
|
1058
|
+
</div><!-- draw_hit_count-source -->
|
1059
|
+
|
1060
|
+
</div>
|
1061
|
+
|
1062
|
+
|
1063
|
+
|
1064
|
+
|
1065
|
+
</div><!-- draw_hit_count-method -->
|
1066
|
+
|
1067
|
+
|
1068
|
+
<div id="method-i-draw_peaks" class="method-detail ">
|
1069
|
+
|
1070
|
+
<div class="method-heading">
|
1071
|
+
<span class="method-name">draw_peaks</span><span
|
1072
|
+
class="method-args">(file="myfile.png",opts=@opts[:graphics])</span>
|
1073
|
+
<span class="method-click-advice">click to toggle source</span>
|
1074
|
+
</div>
|
1075
|
+
|
1076
|
+
|
1077
|
+
<div class="method-description">
|
1078
|
+
|
1079
|
+
<p>Draws the peaks calculated from the signal curve by the R function
|
1080
|
+
<code>Peaks</code> in Bio::Util::Gngm#calculate_peaks. Adds boxes of width
|
1081
|
+
<code>:range</code> to each peak and annotates the limits. Options are set
|
1082
|
+
in the global options hash <code>:peaks</code>. and relate to the Peaks
|
1083
|
+
function in R</p>
|
1084
|
+
|
1085
|
+
|
1086
|
+
|
1087
|
+
<div class="method-source-code" id="draw_peaks-source">
|
1088
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 891</span>
|
1089
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">draw_peaks</span>(<span class="ruby-identifier">file</span>=<span class="ruby-string">"myfile.png"</span>,<span class="ruby-identifier">opts</span>=<span class="ruby-ivar">@opts</span>[<span class="ruby-value">:graphics</span>])
|
1090
|
+
<span class="ruby-identifier">opts_a</span> = <span class="ruby-ivar">@opts</span>[<span class="ruby-value">:peaks</span>]
|
1091
|
+
<span class="ruby-identifier">opts_a</span>.<span class="ruby-identifier">merge!</span>(<span class="ruby-identifier">opts</span>)
|
1092
|
+
<span class="ruby-identifier">opts</span> = <span class="ruby-identifier">opts_a</span> <span class="ruby-comment">##sigh ... </span>
|
1093
|
+
<span class="ruby-comment">#opts[:background] = opts[:background].to_s.upcase</span>
|
1094
|
+
<span class="ruby-comment">#opts[:markov] = opts[:markov].to_s.upcase </span>
|
1095
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">get_peaks</span>(<span class="ruby-identifier">opts</span>)
|
1096
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
1097
|
+
<span class="ruby-comment">#r.eval "suppressMessages ( library('Peaks') )"</span>
|
1098
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">signal</span> = <span class="ruby-keyword">self</span>.<span class="ruby-identifier">signal</span>
|
1099
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">x_vals</span> = <span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>[<span class="ruby-value">0</span>][<span class="ruby-value">1</span>]
|
1100
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"png('#{file}', width=#{opts[:width]}, height=#{opts[:height]})"</span>
|
1101
|
+
<span class="ruby-comment">#r.eval "spec = SpectrumSearch(signal,#{opts[:sigma]},threshold=#{opts[:threshold]},background=#{opts[:background]},iterations=#{opts[:iterations]},markov=#{opts[:markov]},window=#{opts[:window]})"</span>
|
1102
|
+
<span class="ruby-comment">#peak_positions = r.pull "spec$pos"</span>
|
1103
|
+
<span class="ruby-comment">#y = r.pull "spec$y"</span>
|
1104
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">y</span> = <span class="ruby-ivar">@peak_y_values</span>
|
1105
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">pos</span> = <span class="ruby-ivar">@peak_indices</span>
|
1106
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"plot(x_vals,y, type=\"l\", xlab='position', ylab='Peaks', main='#{file}' )"</span>
|
1107
|
+
<span class="ruby-ivar">@peak_indices</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">peak</span><span class="ruby-operator">|</span>
|
1108
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"rect(x_vals[#{peak}]-(#{opts[:range]/2}), 0, x_vals[#{peak}]+#{opts[:range]/2}, max(y), col=rgb(r=0,g=1,b=0, alpha=0.3) )"</span>
|
1109
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"text(x_vals[#{peak}]-(#{opts[:range]/2}),max(y) + 0.05, floor(x_vals[#{peak}]-(#{opts[:range]/2})) )"</span>
|
1110
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"text(x_vals[#{peak}]+(#{opts[:range]/2}), max(y) + 0.05, floor(x_vals[#{peak}]+(#{opts[:range]/2})) )"</span>
|
1111
|
+
<span class="ruby-keyword">end</span>
|
1112
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"dev.off()"</span>
|
1113
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">quit</span>
|
1114
|
+
<span class="ruby-keyword">end</span></pre>
|
1115
|
+
</div><!-- draw_peaks-source -->
|
1116
|
+
|
1117
|
+
</div>
|
1118
|
+
|
1119
|
+
|
1120
|
+
|
1121
|
+
|
1122
|
+
</div><!-- draw_peaks-method -->
|
1123
|
+
|
1124
|
+
|
1125
|
+
<div id="method-i-draw_signal" class="method-detail ">
|
1126
|
+
|
1127
|
+
<div class="method-heading">
|
1128
|
+
<span class="method-name">draw_signal</span><span
|
1129
|
+
class="method-args">(file="myfile.png", opts=@opts[:graphics])</span>
|
1130
|
+
<span class="method-click-advice">click to toggle source</span>
|
1131
|
+
</div>
|
1132
|
+
|
1133
|
+
|
1134
|
+
<div class="method-description">
|
1135
|
+
|
1136
|
+
<p>Draws the contents of the @signal instance variable in a single PNG file
|
1137
|
+
<code>file</code></p>
|
1138
|
+
|
1139
|
+
|
1140
|
+
|
1141
|
+
<div class="method-source-code" id="draw_signal-source">
|
1142
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 868</span>
|
1143
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">draw_signal</span>(<span class="ruby-identifier">file</span>=<span class="ruby-string">"myfile.png"</span>, <span class="ruby-identifier">opts</span>=<span class="ruby-ivar">@opts</span>[<span class="ruby-value">:graphics</span>]) <span class="ruby-comment">#data.frame(bubs=data$bubbles_found,conf=data$bubbles_confirmed)</span>
|
1144
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
1145
|
+
<span class="ruby-identifier">x_vals</span> = <span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>[<span class="ruby-value">0</span>][<span class="ruby-value">1</span>]
|
1146
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"png('#{file}', width=#{opts[:width]}, height=#{opts[:height]})"</span>
|
1147
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">x_vals</span> = <span class="ruby-identifier">x_vals</span>
|
1148
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">signal</span> = <span class="ruby-keyword">self</span>.<span class="ruby-identifier">signal</span>
|
1149
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"plot(x_vals,signal, type=\"l\", xlab='position', ylab='ratio of signals (expected / control ~ homo / hetero)', main='#{file}' )"</span>
|
1150
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"dev.off()"</span>
|
1151
|
+
<span class="ruby-keyword">end</span></pre>
|
1152
|
+
</div><!-- draw_signal-source -->
|
1153
|
+
|
1154
|
+
</div>
|
1155
|
+
|
1156
|
+
|
1157
|
+
|
1158
|
+
|
1159
|
+
</div><!-- draw_signal-method -->
|
1160
|
+
|
1161
|
+
|
1162
|
+
<div id="method-i-draw_threads" class="method-detail ">
|
1163
|
+
|
1164
|
+
<div class="method-heading">
|
1165
|
+
<span class="method-name">draw_threads</span><span
|
1166
|
+
class="method-args">(file="myfile.png", options={})</span>
|
1167
|
+
<span class="method-click-advice">click to toggle source</span>
|
1168
|
+
</div>
|
1169
|
+
|
1170
|
+
|
1171
|
+
<div class="method-description">
|
1172
|
+
|
1173
|
+
<p>Draws the threads in a single PNG file <code>file</code></p>
|
1174
|
+
|
1175
|
+
<p>Options and defaults</p>
|
1176
|
+
<ul><li>
|
1177
|
+
<p><code>:draw_legend => nil</code> -if a filename is provided a legend
|
1178
|
+
will be drawn in a second plot</p>
|
1179
|
+
</li><li>
|
1180
|
+
<p><code>:width => 1000</code> -width of the PNG in pixels</p>
|
1181
|
+
</li><li>
|
1182
|
+
<p><code>:height => 500</code> -height of the PNG in pixels</p>
|
1183
|
+
</li></ul>
|
1184
|
+
|
1185
|
+
|
1186
|
+
|
1187
|
+
<div class="method-source-code" id="draw_threads-source">
|
1188
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 632</span>
|
1189
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">draw_threads</span>(<span class="ruby-identifier">file</span>=<span class="ruby-string">"myfile.png"</span>, <span class="ruby-identifier">options</span>={})
|
1190
|
+
<span class="ruby-identifier">opts</span> = <span class="ruby-ivar">@opts</span>[<span class="ruby-value">:graphics</span>].<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">options</span>)
|
1191
|
+
<span class="ruby-comment">#uses R's standard plot functions.. needed because ggplot can die unexpectedly...</span>
|
1192
|
+
<span class="ruby-identifier">raise</span> <span class="ruby-constant">RuntimeError</span>, <span class="ruby-string">"Can't draw threads until clustering is done"</span> <span class="ruby-keyword">unless</span> <span class="ruby-ivar">@clusters</span>
|
1193
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
1194
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"png('#{file}', width=#{opts[:width]}, height=#{opts[:height]})"</span>
|
1195
|
+
<span class="ruby-identifier">plot_open</span> = <span class="ruby-keyword">false</span>
|
1196
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">t</span><span class="ruby-operator">|</span>
|
1197
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">curr_win</span> = <span class="ruby-identifier">t</span>.<span class="ruby-identifier">last</span>
|
1198
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">dx</span> = <span class="ruby-identifier">t</span>[<span class="ruby-value">1</span>]
|
1199
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">dy</span> = <span class="ruby-identifier">t</span>[<span class="ruby-value">2</span>]
|
1200
|
+
<span class="ruby-keyword">if</span> <span class="ruby-identifier">plot_open</span>
|
1201
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"lines(dx,dy, col=\"#{@thread_colours[t.first]}\", xlab='position', ylab='density')"</span>
|
1202
|
+
<span class="ruby-keyword">else</span>
|
1203
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"plot(dx,dy, type=\"l\", col=\"#{@thread_colours[t.first]}\",ylim=c(0,#{density_max_y}), main='#{file}',xlab='position', ylab='density')"</span>
|
1204
|
+
<span class="ruby-identifier">plot_open</span> = <span class="ruby-keyword">true</span>
|
1205
|
+
<span class="ruby-keyword">end</span>
|
1206
|
+
<span class="ruby-keyword">end</span>
|
1207
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"dev.off()"</span>
|
1208
|
+
<span class="ruby-keyword">if</span> <span class="ruby-identifier">opts</span>[<span class="ruby-value">:draw_legend</span>]
|
1209
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"png('#{opts[:draw_legend]}', width=#{opts[:width]}, height=#{opts[:height]})"</span>
|
1210
|
+
<span class="ruby-identifier">colours</span> = <span class="ruby-ivar">@thread_colours</span>.<span class="ruby-identifier">each</span>.<span class="ruby-identifier">sort</span>.<span class="ruby-identifier">collect</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>.<span class="ruby-identifier">last</span>}.<span class="ruby-identifier">join</span>(<span class="ruby-string">"','"</span>)
|
1211
|
+
<span class="ruby-identifier">names</span> = <span class="ruby-ivar">@thread_colours</span>.<span class="ruby-identifier">each</span>.<span class="ruby-identifier">sort</span>.<span class="ruby-identifier">collect</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>.<span class="ruby-identifier">first</span>}.<span class="ruby-identifier">join</span>(<span class="ruby-string">"','"</span>)
|
1212
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"plot(1,xlab="</span><span class="ruby-string">",ylab="</span><span class="ruby-string">",axes=FALSE)"</span>
|
1213
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"legend('top', c('#{names}'), lty=c(1),lwd=c(1),col=c('#{colours}'), ncol=4)"</span>
|
1214
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"dev.off()"</span>
|
1215
|
+
<span class="ruby-keyword">end</span>
|
1216
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">quit</span>
|
1217
|
+
<span class="ruby-keyword">end</span></pre>
|
1218
|
+
</div><!-- draw_threads-source -->
|
1219
|
+
|
1220
|
+
</div>
|
1221
|
+
|
1222
|
+
|
1223
|
+
|
1224
|
+
|
1225
|
+
</div><!-- draw_threads-method -->
|
1226
|
+
|
1227
|
+
|
1228
|
+
<div id="method-i-frequency_histogram" class="method-detail ">
|
1229
|
+
|
1230
|
+
<div class="method-heading">
|
1231
|
+
<span class="method-name">frequency_histogram</span><span
|
1232
|
+
class="method-args">(file="myfile.png", bin_width=@opts[:histo_bin_width], opts=@opts[:graphics])</span>
|
1233
|
+
<span class="method-click-advice">click to toggle source</span>
|
1234
|
+
</div>
|
1235
|
+
|
1236
|
+
|
1237
|
+
<div class="method-description">
|
1238
|
+
|
1239
|
+
<p>Draws a histogram of polymorphism frequencies across the reference genome
|
1240
|
+
section defined in Bio::Util::Gngm#initialize with bin width
|
1241
|
+
<code>bin_width</code> and writes it to a PNG file <code>file</code></p>
|
1242
|
+
|
1243
|
+
|
1244
|
+
|
1245
|
+
<div class="method-source-code" id="frequency_histogram-source">
|
1246
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 562</span>
|
1247
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">frequency_histogram</span>(<span class="ruby-identifier">file</span>=<span class="ruby-string">"myfile.png"</span>, <span class="ruby-identifier">bin_width</span>=<span class="ruby-ivar">@opts</span>[<span class="ruby-value">:histo_bin_width</span>], <span class="ruby-identifier">opts</span>=<span class="ruby-ivar">@opts</span>[<span class="ruby-value">:graphics</span>])
|
1248
|
+
<span class="ruby-identifier">posns</span> = <span class="ruby-keyword">self</span>.<span class="ruby-identifier">snp_positions</span>.<span class="ruby-identifier">collect</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">a</span><span class="ruby-operator">|</span> <span class="ruby-identifier">a</span>.<span class="ruby-identifier">first</span>}
|
1249
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
1250
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"suppressMessages ( library(ggplot2) )"</span> <span class="ruby-comment">#setup R environment... </span>
|
1251
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">posns</span> = <span class="ruby-identifier">posns</span>
|
1252
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"data = data.frame(position=posns)"</span>
|
1253
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"png('#{file}', width=#{opts[:width]}, height=#{opts[:height]})"</span>
|
1254
|
+
<span class="ruby-identifier">graph_cmd</span> = <span class="ruby-node">"qplot(position,data=data, geom='histogram', binwidth = #{bin_width}, alpha=I(1/3), main='#{file}', color='red')"</span>
|
1255
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span>(<span class="ruby-identifier">graph_cmd</span>)
|
1256
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"dev.off()"</span>
|
1257
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">quit</span>
|
1258
|
+
<span class="ruby-keyword">end</span></pre>
|
1259
|
+
</div><!-- frequency_histogram-source -->
|
1260
|
+
|
1261
|
+
</div>
|
1262
|
+
|
1263
|
+
|
1264
|
+
|
1265
|
+
|
1266
|
+
</div><!-- frequency_histogram-method -->
|
1267
|
+
|
1268
|
+
|
1269
|
+
<div id="method-i-get_band" class="method-detail ">
|
1270
|
+
|
1271
|
+
<div class="method-heading">
|
1272
|
+
<span class="method-name">get_band</span><span
|
1273
|
+
class="method-args">(window=1.0)</span>
|
1274
|
+
<span class="method-click-advice">click to toggle source</span>
|
1275
|
+
</div>
|
1276
|
+
|
1277
|
+
|
1278
|
+
<div class="method-description">
|
1279
|
+
|
1280
|
+
<p>gets an array of windows that cluster with a given window</p>
|
1281
|
+
|
1282
|
+
|
1283
|
+
|
1284
|
+
<div class="method-source-code" id="get_band-source">
|
1285
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 848</span>
|
1286
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_band</span>(<span class="ruby-identifier">window</span>=<span class="ruby-value">1.0</span>)
|
1287
|
+
<span class="ruby-comment">##because of the weird step rounding error we need to find the internal name of the window.. so find it from the list from the name the user</span>
|
1288
|
+
<span class="ruby-comment">##expects it to be, may give more than one passing window so keep only first one..</span>
|
1289
|
+
<span class="ruby-identifier">windows</span> = <span class="ruby-identifier">find_window</span>(<span class="ruby-identifier">window</span>)
|
1290
|
+
<span class="ruby-identifier">raise</span> <span class="ruby-constant">RuntimeError</span>, <span class="ruby-node">"Couldnt find window #{window}, or window has no data to calculate: \n windows are #{self.densities.collect {|d| d.first} }"</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">windows</span>.<span class="ruby-identifier">empty?</span> <span class="ruby-comment">##if we have a window that is close enough to the specified window</span>
|
1291
|
+
<span class="ruby-identifier">idx</span> = <span class="ruby-identifier">find_index</span>(<span class="ruby-identifier">windows</span>.<span class="ruby-identifier">first</span>)
|
1292
|
+
<span class="ruby-comment">#find out which cluster the window is in</span>
|
1293
|
+
<span class="ruby-identifier">cluster</span> = <span class="ruby-keyword">self</span>.<span class="ruby-identifier">clusters</span>[<span class="ruby-identifier">idx</span>]
|
1294
|
+
<span class="ruby-comment">##get the other windows in the same cluster, ie the band...</span>
|
1295
|
+
<span class="ruby-identifier">band</span> = []
|
1296
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">clusters</span>.<span class="ruby-identifier">each_index</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">i</span><span class="ruby-operator">|</span>
|
1297
|
+
<span class="ruby-keyword">if</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">clusters</span>[<span class="ruby-identifier">i</span>] <span class="ruby-operator">==</span> <span class="ruby-identifier">cluster</span>
|
1298
|
+
<span class="ruby-identifier">band</span> <span class="ruby-operator"><<</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>[<span class="ruby-identifier">i</span>].<span class="ruby-identifier">first</span>
|
1299
|
+
<span class="ruby-keyword">end</span>
|
1300
|
+
<span class="ruby-keyword">end</span>
|
1301
|
+
<span class="ruby-identifier">band</span>
|
1302
|
+
<span class="ruby-keyword">end</span></pre>
|
1303
|
+
</div><!-- get_band-source -->
|
1304
|
+
|
1305
|
+
</div>
|
1306
|
+
|
1307
|
+
|
1308
|
+
|
1309
|
+
|
1310
|
+
</div><!-- get_band-method -->
|
1311
|
+
|
1312
|
+
|
1313
|
+
<div id="method-i-get_insert_size_frequency" class="method-detail ">
|
1314
|
+
|
1315
|
+
<div class="method-heading">
|
1316
|
+
<span class="method-name">get_insert_size_frequency</span><span
|
1317
|
+
class="method-args">(options={})</span>
|
1318
|
+
<span class="method-click-advice">click to toggle source</span>
|
1319
|
+
</div>
|
1320
|
+
|
1321
|
+
|
1322
|
+
<div class="method-description">
|
1323
|
+
|
1324
|
+
<p>Returns array of arrays <code>[[window start position, proportion of
|
1325
|
+
alignments > insert size]]</code>. Does this by taking successive
|
1326
|
+
windows across reference and collects the proportion of the reads in that
|
1327
|
+
window that have an insert size > the expected insert size. Proportions
|
1328
|
+
approaching 1 indicate that the sequenced organism has a deletion in that
|
1329
|
+
section, proportions approaching 0 indicate an insertion in that section,
|
1330
|
+
proportions around 0.5 indicate random variation of insert size, IE no
|
1331
|
+
indel.</p>
|
1332
|
+
|
1333
|
+
<p>Each section should be approximately the size of the insertion you expect
|
1334
|
+
to find and should increment in as small steps as possible.</p>
|
1335
|
+
|
1336
|
+
<p>Options and defaults:</p>
|
1337
|
+
<ul><li>
|
1338
|
+
<p><code>:ref_window_size => 200</code> width of window in which to
|
1339
|
+
calculate proportions</p>
|
1340
|
+
</li><li>
|
1341
|
+
<p><code>:ref_window_slide => 50</code> number of bases to move window in
|
1342
|
+
each step</p>
|
1343
|
+
</li><li>
|
1344
|
+
<p><code>:isize => 150</code> expected insert size</p>
|
1345
|
+
</li></ul>
|
1346
|
+
|
1347
|
+
<p>Sets the instance variable @snp_positions. Only gets positions the first
|
1348
|
+
time it is called, in subsequent calls pre-computed positions and
|
1349
|
+
statistics are returned, so changing parameters has no effect</p>
|
1350
|
+
|
1351
|
+
|
1352
|
+
|
1353
|
+
<div class="method-source-code" id="get_insert_size_frequency-source">
|
1354
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 531</span>
|
1355
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_insert_size_frequency</span>(<span class="ruby-identifier">options</span>={})
|
1356
|
+
<span class="ruby-identifier">opts</span> = <span class="ruby-ivar">@opts</span>[<span class="ruby-value">:insert_size_opts</span>].<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">options</span>)
|
1357
|
+
<span class="ruby-keyword">return</span> <span class="ruby-ivar">@snp_positions</span> <span class="ruby-keyword">if</span> <span class="ruby-ivar">@snp_positions</span>
|
1358
|
+
<span class="ruby-keyword">case</span>
|
1359
|
+
<span class="ruby-keyword">when</span> <span class="ruby-ivar">@file</span>.<span class="ruby-identifier">instance_of?</span>(<span class="ruby-constant">Bio</span><span class="ruby-operator">::</span><span class="ruby-constant">DB</span><span class="ruby-operator">::</span><span class="ruby-constant">Sam</span>) <span class="ruby-keyword">then</span> <span class="ruby-identifier">get_insert_size_frequency_from_bam</span>(<span class="ruby-identifier">opts</span>)
|
1360
|
+
<span class="ruby-keyword">end</span>
|
1361
|
+
<span class="ruby-keyword">end</span></pre>
|
1362
|
+
</div><!-- get_insert_size_frequency-source -->
|
1363
|
+
|
1364
|
+
</div>
|
1365
|
+
|
1366
|
+
|
1367
|
+
|
1368
|
+
|
1369
|
+
</div><!-- get_insert_size_frequency-method -->
|
1370
|
+
|
1371
|
+
|
1372
|
+
<div id="method-i-get_unmapped_mate_frequency" class="method-detail ">
|
1373
|
+
|
1374
|
+
<div class="method-heading">
|
1375
|
+
<span class="method-name">get_unmapped_mate_frequency</span><span
|
1376
|
+
class="method-args">(options={})</span>
|
1377
|
+
<span class="method-click-advice">click to toggle source</span>
|
1378
|
+
</div>
|
1379
|
+
|
1380
|
+
|
1381
|
+
<div class="method-description">
|
1382
|
+
|
1383
|
+
<p>Returns array of arrays <code>[[window start position, proportion of reads
|
1384
|
+
with unmapped mates]]</code>. Does this by taking successive windows across
|
1385
|
+
reference and counting the reads with unmapped mates Proportions
|
1386
|
+
approaching 0.5 indicate that the sequenced organism has an insertion in
|
1387
|
+
that section, proportions approaching 0 indicate nothing different in that
|
1388
|
+
section.</p>
|
1389
|
+
|
1390
|
+
<p>Each section should be approximately the size of the insertion you expect
|
1391
|
+
to find and should increment in as small steps as possible.</p>
|
1392
|
+
|
1393
|
+
<p>Options and defaults:</p>
|
1394
|
+
<ul><li>
|
1395
|
+
<p><code>:ref_window_size => 200</code> width of window in which to
|
1396
|
+
calculate proportions</p>
|
1397
|
+
</li><li>
|
1398
|
+
<p><code>:ref_window_slide => 50</code> number of bases to move window in
|
1399
|
+
each step</p>
|
1400
|
+
</li></ul>
|
1401
|
+
|
1402
|
+
<p>Sets the instance variable @snp_positions. Only gets positions the first
|
1403
|
+
time it is called, in subsequent calls pre-computed positions and
|
1404
|
+
statistics are returned, so changing parameters has no effect</p>
|
1405
|
+
|
1406
|
+
|
1407
|
+
|
1408
|
+
<div class="method-source-code" id="get_unmapped_mate_frequency-source">
|
1409
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 550</span>
|
1410
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_unmapped_mate_frequency</span>(<span class="ruby-identifier">options</span>={})
|
1411
|
+
<span class="ruby-identifier">opts</span> = <span class="ruby-ivar">@opts</span>[<span class="ruby-value">:insert_size_opts</span>].<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">options</span>)
|
1412
|
+
<span class="ruby-keyword">return</span> <span class="ruby-ivar">@snp_positions</span> <span class="ruby-keyword">if</span> <span class="ruby-ivar">@snp_positions</span>
|
1413
|
+
<span class="ruby-keyword">case</span>
|
1414
|
+
<span class="ruby-keyword">when</span> <span class="ruby-ivar">@file</span>.<span class="ruby-identifier">instance_of?</span>(<span class="ruby-constant">Bio</span><span class="ruby-operator">::</span><span class="ruby-constant">DB</span><span class="ruby-operator">::</span><span class="ruby-constant">Sam</span>) <span class="ruby-keyword">then</span> <span class="ruby-identifier">get_unmapped_mate_frequency_from_bam</span>(<span class="ruby-identifier">opts</span>)
|
1415
|
+
<span class="ruby-keyword">end</span>
|
1416
|
+
<span class="ruby-keyword">end</span></pre>
|
1417
|
+
</div><!-- get_unmapped_mate_frequency-source -->
|
1418
|
+
|
1419
|
+
</div>
|
1420
|
+
|
1421
|
+
|
1422
|
+
|
1423
|
+
|
1424
|
+
</div><!-- get_unmapped_mate_frequency-method -->
|
1425
|
+
|
1426
|
+
|
1427
|
+
<div id="method-i-hit_count" class="method-detail ">
|
1428
|
+
|
1429
|
+
<div class="method-heading">
|
1430
|
+
<span class="method-name">hit_count</span><span
|
1431
|
+
class="method-args">()</span>
|
1432
|
+
<span class="method-click-advice">click to toggle source</span>
|
1433
|
+
</div>
|
1434
|
+
|
1435
|
+
|
1436
|
+
<div class="method-description">
|
1437
|
+
|
1438
|
+
<p>Returns an array of polymorphisms in each thread/window <tt>[[window,
|
1439
|
+
polymorphism count] ]. Useful for sparse polymorphism counts or over small
|
1440
|
+
regions where small polymorphism counts can cause artificially large peaks
|
1441
|
+
in density curves.</p>
|
1442
|
+
|
1443
|
+
|
1444
|
+
|
1445
|
+
<div class="method-source-code" id="hit_count-source">
|
1446
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 938</span>
|
1447
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">hit_count</span>
|
1448
|
+
<span class="ruby-identifier">arr</span> = []
|
1449
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">threads</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">thread</span><span class="ruby-operator">|</span>
|
1450
|
+
<span class="ruby-identifier">arr</span> <span class="ruby-operator"><<</span> [<span class="ruby-identifier">thread</span>.<span class="ruby-identifier">first</span>, <span class="ruby-identifier">thread</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">length</span>]
|
1451
|
+
<span class="ruby-keyword">end</span>
|
1452
|
+
<span class="ruby-identifier">arr</span>
|
1453
|
+
<span class="ruby-keyword">end</span></pre>
|
1454
|
+
</div><!-- hit_count-source -->
|
1455
|
+
|
1456
|
+
</div>
|
1457
|
+
|
1458
|
+
|
1459
|
+
|
1460
|
+
|
1461
|
+
</div><!-- hit_count-method -->
|
1462
|
+
|
1463
|
+
|
1464
|
+
<div id="method-i-peaks" class="method-detail ">
|
1465
|
+
|
1466
|
+
<div class="method-heading">
|
1467
|
+
<span class="method-name">peaks</span><span
|
1468
|
+
class="method-args">()</span>
|
1469
|
+
<span class="method-click-advice">click to toggle source</span>
|
1470
|
+
</div>
|
1471
|
+
|
1472
|
+
|
1473
|
+
<div class="method-description">
|
1474
|
+
|
1475
|
+
<p>Returns the positions of the peaks in the signal curve calculated by
|
1476
|
+
Bio::Util::Gngm#get_peaks as an array</p>
|
1477
|
+
|
1478
|
+
|
1479
|
+
|
1480
|
+
<div class="method-source-code" id="peaks-source">
|
1481
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 884</span>
|
1482
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">peaks</span>
|
1483
|
+
<span class="ruby-ivar">@peak_indices</span>.<span class="ruby-identifier">collect</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>[<span class="ruby-value">0</span>][<span class="ruby-value">1</span>][<span class="ruby-identifier">x</span>].<span class="ruby-identifier">to_f</span>.<span class="ruby-identifier">floor</span>}
|
1484
|
+
<span class="ruby-keyword">end</span></pre>
|
1485
|
+
</div><!-- peaks-source -->
|
1486
|
+
|
1487
|
+
</div>
|
1488
|
+
|
1489
|
+
|
1490
|
+
|
1491
|
+
|
1492
|
+
</div><!-- peaks-method -->
|
1493
|
+
|
1494
|
+
|
1495
|
+
<div id="method-i-signal" class="method-detail ">
|
1496
|
+
|
1497
|
+
<div class="method-heading">
|
1498
|
+
<span class="method-name">signal</span><span
|
1499
|
+
class="method-args">()</span>
|
1500
|
+
<span class="method-click-advice">click to toggle source</span>
|
1501
|
+
</div>
|
1502
|
+
|
1503
|
+
|
1504
|
+
<div class="method-description">
|
1505
|
+
|
1506
|
+
|
1507
|
+
|
1508
|
+
|
1509
|
+
|
1510
|
+
<div class="method-source-code" id="signal-source">
|
1511
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 999</span>
|
1512
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">signal</span>
|
1513
|
+
<span class="ruby-ivar">@signal</span> <span class="ruby-operator">||=</span> <span class="ruby-identifier">calculate_signal</span>
|
1514
|
+
<span class="ruby-keyword">end</span></pre>
|
1515
|
+
</div><!-- signal-source -->
|
1516
|
+
|
1517
|
+
</div>
|
1518
|
+
|
1519
|
+
|
1520
|
+
|
1521
|
+
|
1522
|
+
</div><!-- signal-method -->
|
1523
|
+
|
1524
|
+
|
1525
|
+
<div id="method-i-snp_positions" class="method-detail ">
|
1526
|
+
|
1527
|
+
<div class="method-heading">
|
1528
|
+
<span class="method-name">snp_positions</span><span
|
1529
|
+
class="method-args">(optsa={})</span>
|
1530
|
+
<span class="method-click-advice">click to toggle source</span>
|
1531
|
+
</div>
|
1532
|
+
|
1533
|
+
|
1534
|
+
<div class="method-description">
|
1535
|
+
|
1536
|
+
<p>Returns array of arrays <code>[[position, statistic]]</code> for
|
1537
|
+
polymorphisms passing filters in <code>optsa</code> Default options are
|
1538
|
+
those in the <code>:variant_call</code> global options hash which can be
|
1539
|
+
over ridden in the method call</p>
|
1540
|
+
|
1541
|
+
<p>Options and defaults:</p>
|
1542
|
+
<ul><li>
|
1543
|
+
<p><code>:indels => false</code> -call small insertions AND deletions
|
1544
|
+
instead of simple SNPs</p>
|
1545
|
+
</li><li>
|
1546
|
+
<p><code>:deletions_only => false</code> -call just deletions instead of
|
1547
|
+
simple SNPs</p>
|
1548
|
+
</li><li>
|
1549
|
+
<p><code>:insertions_only => false</code> -call small insertions instead of
|
1550
|
+
simple SNPs</p>
|
1551
|
+
</li><li>
|
1552
|
+
<p><code>:min_depth => 2</code> -minimum quality passing depth of coverage
|
1553
|
+
at a position for a SNP call</p>
|
1554
|
+
</li><li>
|
1555
|
+
<p><code>:max_depth => 10000000</code> -maximum quality passing depth of
|
1556
|
+
coverage at a position for a SNP call</p>
|
1557
|
+
</li><li>
|
1558
|
+
<p><code>:mapping_quality => 10.0</code> -minimum mapping quality required
|
1559
|
+
for a read to be used in depth calculation</p>
|
1560
|
+
</li><li>
|
1561
|
+
<p><code>:min_non_ref_count => 2</code> -minimum number of reads not
|
1562
|
+
matching the reference for SNP to be called</p>
|
1563
|
+
</li><li>
|
1564
|
+
<p><code>:ignore_reference_n => true</code> -ignore positions where the
|
1565
|
+
reference is N or n</p>
|
1566
|
+
</li></ul>
|
1567
|
+
|
1568
|
+
<p>When INDEL calling only one of <code>:indels, :deletions_only,
|
1569
|
+
:insertions_only</code> should be used. If all are <code>false</code>, SNPs
|
1570
|
+
are called.</p>
|
1571
|
+
|
1572
|
+
<p>Sets the instance variable @snp_positions. Only gets positions the first
|
1573
|
+
time it is called, in subsequent calls pre-computed positions and
|
1574
|
+
statistics are returned, so changing parameters has no effect.</p>
|
1575
|
+
|
1576
|
+
|
1577
|
+
|
1578
|
+
<div class="method-source-code" id="snp_positions-source">
|
1579
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 422</span>
|
1580
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">snp_positions</span>(<span class="ruby-identifier">optsa</span>={})
|
1581
|
+
<span class="ruby-identifier">opts</span> = <span class="ruby-ivar">@opts</span>[<span class="ruby-value">:variant_call</span>].<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">optsa</span>)
|
1582
|
+
<span class="ruby-keyword">return</span> <span class="ruby-ivar">@snp_positions</span> <span class="ruby-keyword">if</span> <span class="ruby-ivar">@snp_positions</span>
|
1583
|
+
<span class="ruby-keyword">case</span>
|
1584
|
+
<span class="ruby-keyword">when</span> <span class="ruby-ivar">@file</span>.<span class="ruby-identifier">instance_of?</span>(<span class="ruby-constant">Bio</span><span class="ruby-operator">::</span><span class="ruby-constant">DB</span><span class="ruby-operator">::</span><span class="ruby-constant">Sam</span>) <span class="ruby-keyword">then</span> <span class="ruby-identifier">get_snp_positions_from_bam</span>(<span class="ruby-identifier">opts</span>)
|
1585
|
+
<span class="ruby-keyword">end</span>
|
1586
|
+
<span class="ruby-keyword">end</span></pre>
|
1587
|
+
</div><!-- snp_positions-source -->
|
1588
|
+
|
1589
|
+
</div>
|
1590
|
+
|
1591
|
+
|
1592
|
+
|
1593
|
+
|
1594
|
+
</div><!-- snp_positions-method -->
|
1595
|
+
|
1596
|
+
|
1597
|
+
<div id="method-i-threads" class="method-detail ">
|
1598
|
+
|
1599
|
+
<div class="method-heading">
|
1600
|
+
<span class="method-name">threads</span><span
|
1601
|
+
class="method-args">(opts=@opts[:threads])</span>
|
1602
|
+
<span class="method-click-advice">click to toggle source</span>
|
1603
|
+
</div>
|
1604
|
+
|
1605
|
+
|
1606
|
+
<div class="method-description">
|
1607
|
+
|
1608
|
+
<p>Returns contents of @threads, an array of arrays <code>[[window 1, snp
|
1609
|
+
position 1, snp position 2 ... snp position n],[window 2, snp position 1,
|
1610
|
+
snp position 2 ... snp position n] ]</code>. If @threads is nil (because
|
1611
|
+
snps have not yet been gathered into threads) the <a
|
1612
|
+
href="Gngm.html#method-i-collect_threads">#collect_threads</a> method is
|
1613
|
+
called and @threads is set before returning</p>
|
1614
|
+
|
1615
|
+
<p>Options and defaults:</p>
|
1616
|
+
<ul><li>
|
1617
|
+
<p><code>:start => 0.2</code> -first window</p>
|
1618
|
+
</li><li>
|
1619
|
+
<p><code>:stop => 1.0</code> -last window</p>
|
1620
|
+
</li><li>
|
1621
|
+
<p><code>:slide => 0.01</code> -distance between windows</p>
|
1622
|
+
</li><li>
|
1623
|
+
<p><code>:size => 0.1</code> -window width</p>
|
1624
|
+
</li></ul>
|
1625
|
+
|
1626
|
+
|
1627
|
+
|
1628
|
+
<div class="method-source-code" id="threads-source">
|
1629
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 584</span>
|
1630
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">threads</span>(<span class="ruby-identifier">opts</span>=<span class="ruby-ivar">@opts</span>[<span class="ruby-value">:threads</span>])
|
1631
|
+
<span class="ruby-ivar">@threads</span> <span class="ruby-operator">||=</span> <span class="ruby-identifier">collect_threads</span>(<span class="ruby-identifier">opts</span>)
|
1632
|
+
<span class="ruby-keyword">end</span></pre>
|
1633
|
+
</div><!-- threads-source -->
|
1634
|
+
|
1635
|
+
</div>
|
1636
|
+
|
1637
|
+
|
1638
|
+
|
1639
|
+
|
1640
|
+
</div><!-- threads-method -->
|
1641
|
+
|
1642
|
+
|
1643
|
+
</section><!-- public-instance-method-details -->
|
1644
|
+
|
1645
|
+
</section><!-- 5Buntitled-5D -->
|
1646
|
+
|
1647
|
+
</div><!-- documentation -->
|
1648
|
+
|
1649
|
+
|
1650
|
+
<footer id="validator-badges">
|
1651
|
+
<p><a href="http://validator.w3.org/check/referer">[Validate]</a>
|
1652
|
+
<p>Generated by <a href="https://github.com/rdoc/rdoc">RDoc</a> 3.11.
|
1653
|
+
<p>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish Rdoc Generator</a> 3.
|
1654
|
+
</footer>
|
1655
|
+
|